From 220b0f5755f86745e4e16d001fe6f46b448565fa Mon Sep 17 00:00:00 2001 From: "Artem B. Bityutskiy" Date: Wed, 5 Jul 2006 11:04:02 +0400 Subject: [PATCH 0001/1063] [PATCH] [MTD] NAND: fix dead URL in Kconfig Signed-off-by: Artem B. Bityutskiy --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 3db77eec0ed2..c99302ed3823 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -11,7 +11,7 @@ config MTD_NAND help This enables support for accessing all type of NAND flash devices. For further information see - . + . config MTD_NAND_VERIFY_WRITE bool "Verify NAND page writes" From 90a18fab4ae07b77bf053b75a4d1285cd94faa79 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 6 Jul 2006 22:37:43 +0200 Subject: [PATCH 0002/1063] make fs/jffs2/nodelist.c:jffs2_obsolete_node_frag() static This patch makes the needlessly global jffs2_obsolete_node_frag() static. Signed-off-by: Adrian Bunk Signed-off-by: David Woodhouse --- fs/jffs2/nodelist.c | 6 +++++- fs/jffs2/nodelist.h | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 7675b33396c7..5a6b4d64206c 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -21,6 +21,9 @@ #include #include "nodelist.h" +static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, + struct jffs2_node_frag *this); + void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list) { struct jffs2_full_dirent **prev = list; @@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint } } -void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this) +static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, + struct jffs2_node_frag *this) { if (this->node) { this->node->frags--; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index cae92c14116d..0ddfd70307fb 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete); struct rb_node *rb_next(struct rb_node *); struct rb_node *rb_prev(struct rb_node *); void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); -void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this); int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn); From fda7ffd25fc5bbe1b4209dfafb854c7ad7308c93 Mon Sep 17 00:00:00 2001 From: Niels Kristian Bech Jensen Date: Sun, 2 Jul 2006 13:02:27 +0200 Subject: [PATCH 0003/1063] [POWERPC] Add -fno-stack-protector to BOOTCFLAGS in arch/powerpc/boot/Makefile. I got some undefined references to __stack_chk_fail in arch/powerpc/boot/stdio.o and arch/powerpc/boot/prom.o when I was trying to build a kernel on Ubuntu Edgy Eft - which includes Stack Smashing Protection. This patch adds -fno-stack-protector to BOOTCFLAGS in arch/powerpc/boot/Makefile (why does BOOTCFLAGS depend on HOSTCFLAGS and not CFLAGS?). Regards, Niels Kristian Bech Jensen Signed-off-by: Paul Mackerras --- arch/powerpc/boot/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index d961bfeed05f..afc776f821e5 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -41,6 +41,10 @@ src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) +ifeq ($(call cc-option-yn, -fstack-protector),y) +BOOTCFLAGS += -fno-stack-protector +endif + BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) quiet_cmd_copy_zlib = COPY $@ From 3a09aa4730f021ad917a66a0c6d2ff6d616a7e4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Jul 2006 14:28:14 +0200 Subject: [PATCH 0004/1063] [POWERPC] fix up front-LED Kconfig Rather long patch, apparently no one has updated the pmac32_defconfig in a while. Signed-off-by: Paul Mackerras --- arch/powerpc/configs/pmac32_defconfig | 112 ++++++++++++++++++++++---- drivers/ide/Kconfig | 14 ---- drivers/macintosh/Kconfig | 9 +++ drivers/macintosh/via-pmu-led.c | 2 +- 4 files changed, 106 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index addc79381c3b..3545af9896af 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -1,16 +1,18 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.17-rc5 -# Mon May 29 14:47:49 2006 +# Linux kernel version: 2.6.17 +# Mon Jul 3 14:20:49 2006 # # CONFIG_PPC64 is not set CONFIG_PPC32=y CONFIG_PPC_MERGE=y CONFIG_MMU=y CONFIG_GENERIC_HARDIRQS=y +CONFIG_IRQ_PER_CPU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_NVRAM=y @@ -29,6 +31,7 @@ CONFIG_CLASSIC32=y # CONFIG_PPC_82xx is not set # CONFIG_PPC_83xx is not set # CONFIG_PPC_85xx is not set +# CONFIG_PPC_86xx is not set # CONFIG_40x is not set # CONFIG_44x is not set # CONFIG_8xx is not set @@ -39,6 +42,7 @@ CONFIG_ALTIVEC=y CONFIG_PPC_STD_MMU=y CONFIG_PPC_STD_MMU_32=y # CONFIG_SMP is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # Code maturity level options @@ -72,10 +76,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y +CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -119,6 +125,9 @@ CONFIG_PPC_MULTIPLATFORM=y # CONFIG_APUS is not set # CONFIG_PPC_CHRP is not set CONFIG_PPC_PMAC=y +# CONFIG_PPC_CELL is not set +# CONFIG_PPC_CELL_NATIVE is not set +# CONFIG_UDBG_RTAS_CONSOLE is not set CONFIG_MPIC=y # CONFIG_PPC_RTAS is not set # CONFIG_MMIO_NVRAM is not set @@ -154,6 +163,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT is not set CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # CONFIG_KEXEC is not set CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y @@ -164,6 +174,7 @@ CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set CONFIG_PM=y @@ -182,6 +193,7 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_PPC_INDIRECT_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y +# CONFIG_PCIEPORTBUS is not set # CONFIG_PCI_DEBUG is not set # @@ -256,6 +268,8 @@ CONFIG_INET_ESP=y # CONFIG_INET_IPCOMP is not set # CONFIG_INET_XFRM_TUNNEL is not set # CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set @@ -268,6 +282,7 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set # CONFIG_INET6_XFRM_TUNNEL is not set # CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set @@ -292,9 +307,11 @@ CONFIG_NETFILTER_XT_MATCH_MARK=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_SCTP=m CONFIG_NETFILTER_XT_MATCH_STATE=m +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m @@ -313,6 +330,7 @@ CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m CONFIG_IP_NF_PPTP=m CONFIG_IP_NF_H323=m +# CONFIG_IP_NF_SIP is not set # CONFIG_IP_NF_QUEUE is not set CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_IPRANGE=m @@ -457,6 +475,7 @@ CONFIG_IRTTY_SIR=m # CONFIG_ALI_FIR is not set # CONFIG_VLSI_FIR is not set # CONFIG_VIA_FIR is not set +# CONFIG_MCS_FIR is not set CONFIG_BT=m CONFIG_BT_L2CAP=m CONFIG_BT_SCO=m @@ -500,6 +519,7 @@ CONFIG_WIRELESS_EXT=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y # CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -600,7 +620,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y CONFIG_BLK_DEV_IDE_PMAC=y CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_IDEDMA_PMAC=y -CONFIG_BLK_DEV_IDE_PMAC_BLINK=y # CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_IVB is not set @@ -661,6 +680,7 @@ CONFIG_SCSI_AIC7XXX_OLD=m # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set # CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -705,9 +725,7 @@ CONFIG_MD_LINEAR=m CONFIG_MD_RAID0=m CONFIG_MD_RAID1=m CONFIG_MD_RAID10=m -CONFIG_MD_RAID5=m -CONFIG_MD_RAID5_RESHAPE=y -CONFIG_MD_RAID6=m +# CONFIG_MD_RAID456 is not set CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=m @@ -750,7 +768,6 @@ CONFIG_IEEE1394_OHCI1394=m # CONFIG_IEEE1394_VIDEO1394=m CONFIG_IEEE1394_SBP2=m -# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set # CONFIG_IEEE1394_ETH1394 is not set CONFIG_IEEE1394_DV1394=m CONFIG_IEEE1394_RAWIO=m @@ -766,9 +783,12 @@ CONFIG_IEEE1394_RAWIO=m CONFIG_ADB=y CONFIG_ADB_CUDA=y CONFIG_ADB_PMU=y +CONFIG_ADB_PMU_LED=y +CONFIG_ADB_PMU_LED_IDE=y CONFIG_PMAC_APM_EMU=m CONFIG_PMAC_MEDIABAY=y CONFIG_PMAC_BACKLIGHT=y +CONFIG_PMAC_BACKLIGHT_LEGACY=y CONFIG_INPUT_ADBHID=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_THERM_WINDTUNNEL=m @@ -858,6 +878,7 @@ CONFIG_PCNET32=y # CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set # # Token Ring devices @@ -908,6 +929,7 @@ CONFIG_APPLE_AIRPORT=m # Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support # CONFIG_PRISM54=m +# CONFIG_USB_ZD1201 is not set # CONFIG_HOSTAP is not set CONFIG_NET_WIRELESS=y @@ -998,6 +1020,7 @@ CONFIG_SERIO=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_SERIAL_NONSTANDARD is not set # @@ -1029,6 +1052,7 @@ CONFIG_LEGACY_PTY_COUNT=256 # Watchdog Cards # # CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set CONFIG_NVRAM=y CONFIG_GEN_RTC=y # CONFIG_GEN_RTC_X is not set @@ -1040,6 +1064,7 @@ CONFIG_GEN_RTC=y # Ftape, the floppy tape device driver # CONFIG_AGP=m +# CONFIG_AGP_SIS is not set # CONFIG_AGP_VIA is not set CONFIG_AGP_UNINORTH=m CONFIG_DRM=m @@ -1092,6 +1117,7 @@ CONFIG_I2C_ALGOBIT=y CONFIG_I2C_POWERMAC=y # CONFIG_I2C_MPC is not set # CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_OCORES is not set # CONFIG_I2C_PARPORT_LIGHT is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set @@ -1156,12 +1182,13 @@ CONFIG_VIDEO_V4L2=y # # Graphics support # +# CONFIG_FIRMWARE_EDID is not set CONFIG_FB=y CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y CONFIG_FB_MACMODES=y -CONFIG_FB_FIRMWARE_EDID=y +CONFIG_FB_BACKLIGHT=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y # CONFIG_FB_CIRRUS is not set @@ -1178,6 +1205,7 @@ CONFIG_FB_IMSTT=y # CONFIG_FB_S1D13XXX is not set CONFIG_FB_NVIDIA=y CONFIG_FB_NVIDIA_I2C=y +CONFIG_FB_NVIDIA_BACKLIGHT=y # CONFIG_FB_RIVA is not set CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y @@ -1187,12 +1215,15 @@ CONFIG_FB_MATROX_MYSTIQUE=y # CONFIG_FB_MATROX_MULTIHEAD is not set CONFIG_FB_RADEON=y CONFIG_FB_RADEON_I2C=y +CONFIG_FB_RADEON_BACKLIGHT=y # CONFIG_FB_RADEON_DEBUG is not set CONFIG_FB_ATY128=y +CONFIG_FB_ATY128_BACKLIGHT=y CONFIG_FB_ATY=y CONFIG_FB_ATY_CT=y # CONFIG_FB_ATY_GENERIC_LCD is not set CONFIG_FB_ATY_GX=y +CONFIG_FB_ATY_BACKLIGHT=y # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set # CONFIG_FB_NEOMAGIC is not set @@ -1221,7 +1252,11 @@ CONFIG_LOGO=y CONFIG_LOGO_LINUX_MONO=y CONFIG_LOGO_LINUX_VGA16=y CONFIG_LOGO_LINUX_CLUT224=y -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_DEVICE=y +CONFIG_LCD_CLASS_DEVICE=m +CONFIG_LCD_DEVICE=y # # Sound @@ -1278,6 +1313,18 @@ CONFIG_SND_DUMMY=m # CONFIG_SND_CMIPCI is not set # CONFIG_SND_CS4281 is not set # CONFIG_SND_CS46XX is not set +# CONFIG_SND_DARLA20 is not set +# CONFIG_SND_GINA20 is not set +# CONFIG_SND_LAYLA20 is not set +# CONFIG_SND_DARLA24 is not set +# CONFIG_SND_GINA24 is not set +# CONFIG_SND_LAYLA24 is not set +# CONFIG_SND_MONA is not set +# CONFIG_SND_MIA is not set +# CONFIG_SND_ECHO3G is not set +# CONFIG_SND_INDIGO is not set +# CONFIG_SND_INDIGOIO is not set +# CONFIG_SND_INDIGODJ is not set # CONFIG_SND_EMU10K1 is not set # CONFIG_SND_EMU10K1X is not set # CONFIG_SND_ENS1370 is not set @@ -1314,6 +1361,17 @@ CONFIG_SND_DUMMY=m CONFIG_SND_POWERMAC=m CONFIG_SND_POWERMAC_AUTO_DRC=y +# +# Apple Onboard Audio driver +# +CONFIG_SND_AOA=m +CONFIG_SND_AOA_FABRIC_LAYOUT=m +CONFIG_SND_AOA_ONYX=m +CONFIG_SND_AOA_TAS=m +CONFIG_SND_AOA_TOONIE=m +CONFIG_SND_AOA_SOUNDBUS=m +CONFIG_SND_AOA_SOUNDBUS_I2S=m + # # USB devices # @@ -1355,6 +1413,7 @@ CONFIG_USB_DYNAMIC_MINORS=y CONFIG_USB_EHCI_HCD=m CONFIG_USB_EHCI_SPLIT_ISO=y CONFIG_USB_EHCI_ROOT_HUB_TT=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_OHCI_BIG_ENDIAN is not set @@ -1431,7 +1490,6 @@ CONFIG_USB_NET_NET1080=m # CONFIG_USB_NET_RNDIS_HOST is not set # CONFIG_USB_NET_CDC_SUBSET is not set CONFIG_USB_NET_ZAURUS=m -# CONFIG_USB_ZD1201 is not set CONFIG_USB_MON=y # @@ -1499,10 +1557,12 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set # CONFIG_USB_LED is not set +# CONFIG_USB_CY7C63 is not set # CONFIG_USB_CYTHERM is not set # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +CONFIG_USB_APPLEDISPLAY=m # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set # CONFIG_USB_TEST is not set @@ -1524,7 +1584,8 @@ CONFIG_USB_EZUSB=y # # LED devices # -# CONFIG_NEW_LEDS is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y # # LED drivers @@ -1533,6 +1594,10 @@ CONFIG_USB_EZUSB=y # # LED Triggers # +CONFIG_LEDS_TRIGGERS=y +# CONFIG_LEDS_TRIGGER_TIMER is not set +CONFIG_LEDS_TRIGGER_IDE_DISK=y +# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set # # InfiniBand support @@ -1548,6 +1613,19 @@ CONFIG_USB_EZUSB=y # # CONFIG_RTC_CLASS is not set +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# + # # File systems # @@ -1569,6 +1647,7 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set @@ -1649,6 +1728,7 @@ CONFIG_RPCSEC_GSS_KRB5=y CONFIG_SMB_FS=m # CONFIG_SMB_NLS_DEFAULT is not set # CONFIG_CIFS is not set +# CONFIG_CIFS_DEBUG2 is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -1732,6 +1812,7 @@ CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y # # Instrumentation Support @@ -1744,12 +1825,15 @@ CONFIG_OPROFILE=y # # CONFIG_PRINTK_TIME is not set # CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set @@ -1763,11 +1847,7 @@ CONFIG_XMON=y CONFIG_XMON_DEFAULT=y # CONFIG_BDI_SWITCH is not set CONFIG_BOOTX_TEXT=y -# CONFIG_PPC_EARLY_DEBUG_LPAR is not set -# CONFIG_PPC_EARLY_DEBUG_G5 is not set -# CONFIG_PPC_EARLY_DEBUG_RTAS is not set -# CONFIG_PPC_EARLY_DEBUG_MAPLE is not set -# CONFIG_PPC_EARLY_DEBUG_ISERIES is not set +# CONFIG_PPC_EARLY_DEBUG is not set # # Security options diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index d1266fe2d1ab..53bba41f29bc 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -773,20 +773,6 @@ config BLK_DEV_IDEDMA_PMAC to transfer data to and from memory. Saying Y is safe and improves performance. -config BLK_DEV_IDE_PMAC_BLINK - bool "Blink laptop LED on drive activity (DEPRECATED)" - depends on BLK_DEV_IDE_PMAC && ADB_PMU - select ADB_PMU_LED - select LEDS_TRIGGERS - select LEDS_TRIGGER_IDE_DISK - help - This option enables the use of the sleep LED as a hard drive - activity LED. - This option is deprecated, it only selects ADB_PMU_LED and - LEDS_TRIGGER_IDE_DISK and changes the code in the new led class - device to default to the ide-disk trigger (which should be set - from userspace via sysfs). - config BLK_DEV_IDE_SWARM tristate "IDE for Sibyte evaluation boards" depends on SIBYTE_SB1xxx_SOC diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 54f3f6b94efc..dc6003859e54 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -90,6 +90,15 @@ config ADB_PMU_LED and the ide-disk LED trigger and configure appropriately through sysfs. +config ADB_PMU_LED_IDE + bool "Use front LED as IDE LED by default" + depends on ADB_PMU_LED + select LEDS_TRIGGERS + select LEDS_TRIGGER_IDE_DISK + help + This option makes the front LED default to the IDE trigger + so that it blinks on IDE activity. + config PMAC_SMU bool "Support for SMU based PowerMacs" depends on PPC_PMAC64 diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c index af8375ed0f5e..5189d5454b1f 100644 --- a/drivers/macintosh/via-pmu-led.c +++ b/drivers/macintosh/via-pmu-led.c @@ -74,7 +74,7 @@ static void pmu_led_set(struct led_classdev *led_cdev, static struct led_classdev pmu_led = { .name = "pmu-front-led", -#ifdef CONFIG_BLK_DEV_IDE_PMAC_BLINK +#ifdef CONFIG_ADB_PMU_LED_IDE .default_trigger = "ide-disk", #endif .brightness_set = pmu_led_set, From e8c0acf9a4fe3b2b6847541bf5cc3c86c18272ec Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:06:29 +1000 Subject: [PATCH 0005/1063] [POWERPC] Workaround Pegasos incorrect ISA "ranges" The Pegasos firmware doesn't create a valid "ranges" property for the ISA bridge, thus causing translation of ISA addresses and IO ports to fail. This fixes it, thus re-enabling proper early serial console to work on Pegasos. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom_init.c | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ebd501a59abd..b6c3ac20c14c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2030,6 +2030,39 @@ static void __init fixup_device_tree_maple(void) #define fixup_device_tree_maple() #endif +#ifdef CONFIG_PPC_CHRP +/* Pegasos lacks the "ranges" property in the isa node */ +static void __init fixup_device_tree_chrp(void) +{ + phandle isa; + u32 isa_ranges[6]; + char *name; + int rc; + + name = "/pci@80000000/isa@c"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(isa)) + return; + + rc = prom_getproplen(isa, "ranges"); + if (rc != 0 && rc != PROM_ERROR) + return; + + prom_printf("Fixing up missing ISA range on Pegasos...\n"); + + isa_ranges[0] = 0x1; + isa_ranges[1] = 0x0; + isa_ranges[2] = 0x01006000; + isa_ranges[3] = 0x0; + isa_ranges[4] = 0x0; + isa_ranges[5] = 0x00010000; + prom_setprop(isa, name, "ranges", + isa_ranges, sizeof(isa_ranges)); +} +#else +#define fixup_device_tree_chrp() +#endif + #if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) static void __init fixup_device_tree_pmac(void) { @@ -2077,6 +2110,7 @@ static void __init fixup_device_tree_pmac(void) static void __init fixup_device_tree(void) { fixup_device_tree_maple(); + fixup_device_tree_chrp(); fixup_device_tree_pmac(); } From 470407a88e549135dce5fba7d86fb9910f500e56 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:07:42 +1000 Subject: [PATCH 0006/1063] [POWERPC] Fix 32 bits warning in prom_init.c A warning is hurting my eyes when building 32 bits kernels Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index b6c3ac20c14c..462bced40c12 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -557,7 +557,9 @@ unsigned long prom_memparse(const char *ptr, const char **retptr) static void __init early_cmdline_parse(void) { struct prom_t *_prom = &RELOC(prom); +#ifdef CONFIG_PPC64 const char *opt; +#endif char *p; int l = 0; From 1e031d65b0cb5f882b20ebc356ea0345ff18dbf0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:09:36 +1000 Subject: [PATCH 0007/1063] [POWERPC] Fix non-MPIC CHRPs with CONFIG_SMP set Pseudo-CHRP machines like Pegasos without an MPIC would crash at boot if CONFIG_SMP was set because the "smp_ops" pointer was set to MPIC related ops unconditionally. This patch makes it NULL on machines that don't support SMP and provides proper default behaviour in the callers when smp_ops is NULL. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/smp.c | 24 +++++++++++++++++------- arch/powerpc/platforms/chrp/setup.c | 12 ++++++++---- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46c56cfd1b2f..6a9bc9ce54e0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -144,13 +144,15 @@ void smp_message_recv(int msg, struct pt_regs *regs) void smp_send_reschedule(int cpu) { - smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); + if (likely(smp_ops)) + smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { - smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); + if (likely(smp_ops)) + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); } #endif @@ -158,7 +160,7 @@ void smp_send_debugger_break(int cpu) void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback) { + if (crash_ipi_callback && smp_ops) { mb(); smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); } @@ -220,6 +222,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); + if (unlikely(smp_ops == NULL)) + return -1; + data.func = func; data.info = info; atomic_set(&data.started, 0); @@ -357,7 +362,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(boot_cpuid); cpu_callin_map[boot_cpuid] = 1; - max_cpus = smp_ops->probe(); + if (smp_ops) + max_cpus = smp_ops->probe(); + else + max_cpus = 1; smp_space_timers(max_cpus); @@ -453,7 +461,7 @@ void generic_mach_cpu_die(void) static int __devinit cpu_enable(unsigned int cpu) { - if (smp_ops->cpu_enable) + if (smp_ops && smp_ops->cpu_enable) return smp_ops->cpu_enable(cpu); return -ENOSYS; @@ -467,7 +475,8 @@ int __devinit __cpu_up(unsigned int cpu) if (!cpu_enable(cpu)) return 0; - if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)) + if (smp_ops == NULL || + (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; /* Make sure callin-map entry is 0 (can be leftover a CPU @@ -568,7 +577,8 @@ void __init smp_cpus_done(unsigned int max_cpus) old_mask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); - smp_ops->setup_cpu(boot_cpuid); + if (smp_ops) + smp_ops->setup_cpu(boot_cpuid); set_cpus_allowed(current, old_mask); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 538e337d63e2..9c08ff322290 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -291,10 +291,6 @@ void __init chrp_setup_arch(void) pci_create_OF_bus_map(); -#ifdef CONFIG_SMP - smp_ops = &chrp_smp_ops; -#endif /* CONFIG_SMP */ - /* * Print the banner, then scroll down so boot progress * can be printed. -- Cort @@ -479,6 +475,14 @@ void __init chrp_init_IRQ(void) chrp_find_openpic(); chrp_find_8259(); +#ifdef CONFIG_SMP + /* Pegasos has no MPIC, those ops would make it crash. It might be an + * option to move setting them to after we probe the PIC though + */ + if (chrp_mpic != NULL) + smp_ops = &chrp_smp_ops; +#endif /* CONFIG_SMP */ + if (_chrp_type == _CHRP_Pegasos) ppc_md.get_irq = i8259_irq; From e70e943847bdae13175bf3a8bca6328e369de90a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:11:23 +1000 Subject: [PATCH 0008/1063] [POWERPC] Fix default clock for udbg_16550 This patch makes it possible to provide 0 as the clock value for udbg_16550, making it default to the standard 1.8432Mhz clock Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/udbg_16550.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 0835b4841dea..2d17f2b8eda7 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -81,10 +81,14 @@ static int udbg_550_getc(void) void udbg_init_uart(void __iomem *comport, unsigned int speed, unsigned int clock) { - unsigned int dll, base_bauds = clock / 16; + unsigned int dll, base_bauds; + if (clock == 0) + clock = 1843200; if (speed == 0) speed = 9600; + + base_bauds = clock / 16; dll = base_bauds / speed; if (comport) { From f704b8d1f080ee71b7a9a88bcf585e7dd4272f4b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:14:07 +1000 Subject: [PATCH 0009/1063] [POWERPC] Fix legacy_serial.c error handling on 32 bits The code in legacy_serial.c wouldn't properly compare OF translation results against OF_BAD_ADDR as it's using a phys_addr_t which is 32 bits on some 32-bit powerpc platforms. This fixes it by always using a u64 which is what is returned by the OF parsing routines. It also makes translation failure harmless for ISA serial ports. If they can't translate, we can't use the UART early, but we can still let the 8250 driver use it later on by using IO port accessors. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/legacy_serial.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 7e98e778b52f..359ab89748e0 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -112,7 +112,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, static int __init add_legacy_soc_port(struct device_node *np, struct device_node *soc_dev) { - phys_addr_t addr; + u64 addr; u32 *addrp; upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; @@ -143,7 +143,7 @@ static int __init add_legacy_isa_port(struct device_node *np, u32 *reg; char *typep; int index = -1; - phys_addr_t taddr; + u64 taddr; DBG(" -> add_legacy_isa_port(%s)\n", np->full_name); @@ -165,10 +165,13 @@ static int __init add_legacy_isa_port(struct device_node *np, if (typep && *typep == 'S') index = simple_strtol(typep+1, NULL, 0) - 1; - /* Translate ISA address */ + /* Translate ISA address. If it fails, we still register the port + * with no translated address so that it can be picked up as an IO + * port later by the serial driver + */ taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) - return -1; + taddr = 0; /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, @@ -180,7 +183,7 @@ static int __init add_legacy_isa_port(struct device_node *np, static int __init add_legacy_pci_port(struct device_node *np, struct device_node *pci_dev) { - phys_addr_t addr, base; + u64 addr, base; u32 *addrp; unsigned int flags; int iotype, index = -1, lindex = 0; From 26c5032eaa64090b2a01973b0c6ea9e7f6a80fa7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 14:16:28 +1000 Subject: [PATCH 0010/1063] [POWERPC] Add briq support to CHRP The support for Briq machines has been floating around as patches for ages. This cleans it up and adds it once for all. Some of this is based on initial code provided by Karsten Jeppesen and mostly rewritten from scratch by me. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom_init.c | 10 +++++-- arch/powerpc/platforms/chrp/pci.c | 44 +++++++++++++++++++++++++---- arch/powerpc/platforms/chrp/setup.c | 27 +++++++++++++++++- include/asm-powerpc/processor.h | 1 + 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 462bced40c12..90972ef6c471 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2033,16 +2033,22 @@ static void __init fixup_device_tree_maple(void) #endif #ifdef CONFIG_PPC_CHRP -/* Pegasos lacks the "ranges" property in the isa node */ +/* Pegasos and BriQ lacks the "ranges" property in the isa node */ static void __init fixup_device_tree_chrp(void) { phandle isa; u32 isa_ranges[6]; + u32 rloc = 0x01006000; /* IO space; PCI device = 12 */ char *name; int rc; name = "/pci@80000000/isa@c"; isa = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(isa)) { + name = "/pci@ff500000/isa@6"; + isa = call_prom("finddevice", 1, 1, ADDR(name)); + rloc = 0x01003000; /* IO space; PCI device = 6 */ + } if (!PHANDLE_VALID(isa)) return; @@ -2054,7 +2060,7 @@ static void __init fixup_device_tree_chrp(void) isa_ranges[0] = 0x1; isa_ranges[1] = 0x0; - isa_ranges[2] = 0x01006000; + isa_ranges[2] = rloc; isa_ranges[3] = 0x0; isa_ranges[4] = 0x0; isa_ranges[5] = 0x00010000; diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 6802cdc3168a..6d7ac649b45e 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -257,7 +257,7 @@ chrp_find_bridges(void) else printk(KERN_INFO "PCI buses %d..%d", bus_range[0], bus_range[1]); - printk(" controlled by %s", dev->type); + printk(" controlled by %s", dev->full_name); if (!is_longtrail) printk(" at %llx", (unsigned long long)r.start); printk("\n"); @@ -289,6 +289,19 @@ chrp_find_bridges(void) setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc); } else if (is_pegasos == 2) { setup_peg2(hose, dev); + } else if (!strncmp(model, "IBM,CPC710", 10)) { + setup_indirect_pci(hose, + r.start + 0x000f8000, + r.start + 0x000f8010); + if (index == 0) { + dma = get_property(dev, "system-dma-base",&len); + if (dma && len >= sizeof(*dma)) { + dma = (unsigned int *) + (((unsigned long)dma) + + len - sizeof(*dma)); + pci_dram_offset = *dma; + } + } } else { printk("No methods for %s (model %s), using RTAS\n", dev->full_name, model); @@ -306,8 +319,29 @@ chrp_find_bridges(void) printk("pci_dram_offset = %lx\n", pci_dram_offset); } } - - /* Do not fixup interrupts from OF tree on pegasos */ - if (is_pegasos) - ppc_md.pcibios_fixup = NULL; } + +/* SL82C105 IDE Control/Status Register */ +#define SL82C105_IDECSR 0x40 + +/* Fixup for Winbond ATA quirk, required for briq */ +void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105) +{ + u8 progif; + + /* If non-briq machines need that fixup too, please speak up */ + if (!machine_is(chrp) || _chrp_type != _CHRP_briq) + return; + + if ((sl82c105->class & 5) != 5) { + printk("W83C553: Switching SL82C105 IDE to PCI native mode\n"); + /* Enable SL82C105 PCI native IDE mode */ + pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif); + pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05); + sl82c105->class |= 0x05; + /* Disable SL82C105 second port */ + pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + chrp_pci_fixup_winbond_ata); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 9c08ff322290..be39742db809 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -74,6 +74,9 @@ extern irqreturn_t xmon_irq(int, void *, struct pt_regs *); extern unsigned long loops_per_jiffy; +/* To be replaced by RTAS when available */ +static unsigned int *briq_SPOR; + #ifdef CONFIG_SMP extern struct smp_ops_t chrp_smp_ops; #endif @@ -92,6 +95,15 @@ static const char *gg2_cachemodes[4] = { "Disabled", "Write-Through", "Copy-Back", "Transparent Mode" }; +static const char *chrp_names[] = { + "Unknown", + "","","", + "Motorola", + "IBM or Longtrail", + "Genesi Pegasos", + "Total Impact Briq" +}; + void chrp_show_cpuinfo(struct seq_file *m) { int i, sdramen; @@ -229,6 +241,14 @@ static void __init pegasos_set_l2cr(void) } } +static void briq_restart(char *cmd) +{ + local_irq_disable(); + if (briq_SPOR) + out_be32(briq_SPOR, 0); + for(;;); +} + void __init chrp_setup_arch(void) { struct device_node *root = find_path_device ("/"); @@ -245,11 +265,16 @@ void __init chrp_setup_arch(void) _chrp_type = _CHRP_IBM; } else if (machine && strncmp(machine, "MOT", 3) == 0) { _chrp_type = _CHRP_Motorola; + } else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) { + _chrp_type = _CHRP_briq; + /* Map the SPOR register on briq and change the restart hook */ + briq_SPOR = (unsigned int *)ioremap(0xff0000e8, 4); + ppc_md.restart = briq_restart; } else { /* Let's assume it is an IBM chrp if all else fails */ _chrp_type = _CHRP_IBM; } - printk("chrp type = %x\n", _chrp_type); + printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]); rtas_initialize(); if (rtas_token("display-character") >= 0) diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h index 22e54a2a6604..6cb6fb19e57f 100644 --- a/include/asm-powerpc/processor.h +++ b/include/asm-powerpc/processor.h @@ -32,6 +32,7 @@ #define _CHRP_Motorola 0x04 /* motorola chrp, the cobra */ #define _CHRP_IBM 0x05 /* IBM chrp, the longtrail and longtrail 2 */ #define _CHRP_Pegasos 0x06 /* Genesi/bplan's Pegasos and Pegasos2 */ +#define _CHRP_briq 0x07 /* TotalImpact's briQ */ #if defined(__KERNEL__) && defined(CONFIG_PPC32) From a45b83957deabbdac9a3d908c6ca4c25f05ce1ad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 15:06:20 +1000 Subject: [PATCH 0011/1063] [POWERPC] Add support for briq front panel This adds the driver for the Briq front panel. This is a cleaned up version of a driver that has been floating around for some time now, initially written by Karsten Jeppesen and cleaned up by jk and myself. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- drivers/char/Kconfig | 14 ++ drivers/char/Makefile | 1 + drivers/char/briq_panel.c | 268 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 283 insertions(+) create mode 100644 drivers/char/briq_panel.c diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index c40e487d9f5c..11de59ff4229 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -495,6 +495,20 @@ config LEGACY_PTY_COUNT When not in use, each legacy PTY occupies 12 bytes on 32-bit architectures and 24 bytes on 64-bit architectures. +config BRIQ_PANEL + tristate 'Total Impact briQ front panel driver' + ---help--- + The briQ is a small footprint CHRP computer with a frontpanel VFD, a + tristate led and two switches. It is the size of a CDROM drive. + + If you have such one and want anything showing on the VFD then you + must answer Y here. + + To compile this driver as a module, choose M here: the + module will be called briq_panel. + + It's safe to say N here. + config PRINTER tristate "Parallel printer support" depends on PARPORT diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 6e0f4469d8bb..7a7ee5721279 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_VIOCONS) += viocons.o obj-$(CONFIG_VIOTAPE) += viotape.o obj-$(CONFIG_HVCS) += hvcs.o obj-$(CONFIG_SGI_MBCS) += mbcs.o +obj-$(CONFIG_BRIQ_PANEL) += briq_panel.o obj-$(CONFIG_PRINTER) += lp.o obj-$(CONFIG_TIPAR) += tipar.o diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c new file mode 100644 index 000000000000..a0e5eac5f33a --- /dev/null +++ b/drivers/char/briq_panel.c @@ -0,0 +1,268 @@ +/* + * Drivers for the Total Impact PPC based computer "BRIQ" + * by Dr. Karsten Jeppesen + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BRIQ_PANEL_MINOR 156 +#define BRIQ_PANEL_VFD_IOPORT 0x0390 +#define BRIQ_PANEL_LED_IOPORT 0x0398 +#define BRIQ_PANEL_VER "1.1 (04/20/2002)" +#define BRIQ_PANEL_MSG0 "Loading Linux" + +static int vfd_is_open; +static unsigned char vfd[40]; +static int vfd_cursor; +static unsigned char ledpb, led; + +static void update_vfd(void) +{ + int i; + + /* cursor home */ + outb(0x02, BRIQ_PANEL_VFD_IOPORT); + for (i=0; i<20; i++) + outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1); + + /* cursor to next line */ + outb(0xc0, BRIQ_PANEL_VFD_IOPORT); + for (i=20; i<40; i++) + outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1); + +} + +static void set_led(char state) +{ + if (state == 'R') + led = 0x01; + else if (state == 'G') + led = 0x02; + else if (state == 'Y') + led = 0x03; + else if (state == 'X') + led = 0x00; + outb(led, BRIQ_PANEL_LED_IOPORT); +} + +static int briq_panel_open(struct inode *ino, struct file *filep) +{ + /* enforce single access */ + if (vfd_is_open) + return -EBUSY; + vfd_is_open = 1; + + return 0; +} + +static int briq_panel_release(struct inode *ino, struct file *filep) +{ + if (!vfd_is_open) + return -ENODEV; + + vfd_is_open = 0; + + return 0; +} + +static ssize_t briq_panel_read(struct file *file, char *buf, size_t count, + loff_t *ppos) +{ + unsigned short c; + unsigned char cp; + +#if 0 /* Can't seek (pread) on this device */ + if (ppos != &file->f_pos) + return -ESPIPE; +#endif + + if (!vfd_is_open) + return -ENODEV; + + c = (inb(BRIQ_PANEL_LED_IOPORT) & 0x000c) | (ledpb & 0x0003); + set_led(' '); + /* upper button released */ + if ((!(ledpb & 0x0004)) && (c & 0x0004)) { + cp = ' '; + ledpb = c; + if (copy_to_user(buf, &cp, 1)) + return -EFAULT; + return 1; + } + /* lower button released */ + else if ((!(ledpb & 0x0008)) && (c & 0x0008)) { + cp = '\r'; + ledpb = c; + if (copy_to_user(buf, &cp, 1)) + return -EFAULT; + return 1; + } else { + ledpb = c; + return 0; + } +} + +static void scroll_vfd( void ) +{ + int i; + + for (i=0; i<20; i++) { + vfd[i] = vfd[i+20]; + vfd[i+20] = ' '; + } + vfd_cursor = 20; +} + +static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len, + loff_t *ppos) +{ + size_t indx = len; + int i, esc = 0; + +#if 0 /* Can't seek (pwrite) on this device */ + if (ppos != &file->f_pos) + return -ESPIPE; +#endif + + if (!vfd_is_open) + return -EBUSY; + + for (;;) { + if (!indx) + break; + if (esc) { + set_led(*buf); + esc = 0; + } else if (*buf == 27) { + esc = 1; + } else if (*buf == 12) { + /* do a form feed */ + for (i=0; i<40; i++) + vfd[i] = ' '; + vfd_cursor = 0; + } else if (*buf == 10) { + if (vfd_cursor < 20) + vfd_cursor = 20; + else if (vfd_cursor < 40) + vfd_cursor = 40; + else if (vfd_cursor < 60) + vfd_cursor = 60; + if (vfd_cursor > 59) + scroll_vfd(); + } else { + /* just a character */ + if (vfd_cursor > 39) + scroll_vfd(); + vfd[vfd_cursor++] = *buf; + } + indx--; + buf++; + } + update_vfd(); + + return len; +} + +static struct file_operations briq_panel_fops = { + .owner = THIS_MODULE, + .read = briq_panel_read, + .write = briq_panel_write, + .open = briq_panel_open, + .release = briq_panel_release, +}; + +static struct miscdevice briq_panel_miscdev = { + BRIQ_PANEL_MINOR, + "briq_panel", + &briq_panel_fops +}; + +static int __init briq_panel_init(void) +{ + struct device_node *root = find_path_device("/"); + char *machine; + int i; + + machine = get_property(root, "model", NULL); + if (!machine || strncmp(machine, "TotalImpact,BRIQ-1", 18) != 0) + return -ENODEV; + + printk(KERN_INFO + "briq_panel: v%s Dr. Karsten Jeppesen (kj@totalimpact.com)\n", + BRIQ_PANEL_VER); + + if (!request_region(BRIQ_PANEL_VFD_IOPORT, 4, "BRIQ Front Panel")) + return -EBUSY; + + if (!request_region(BRIQ_PANEL_LED_IOPORT, 2, "BRIQ Front Panel")) { + release_region(BRIQ_PANEL_VFD_IOPORT, 4); + return -EBUSY; + } + ledpb = inb(BRIQ_PANEL_LED_IOPORT) & 0x000c; + + if (misc_register(&briq_panel_miscdev) < 0) { + release_region(BRIQ_PANEL_VFD_IOPORT, 4); + release_region(BRIQ_PANEL_LED_IOPORT, 2); + return -EBUSY; + } + + outb(0x38, BRIQ_PANEL_VFD_IOPORT); /* Function set */ + outb(0x01, BRIQ_PANEL_VFD_IOPORT); /* Clear display */ + outb(0x0c, BRIQ_PANEL_VFD_IOPORT); /* Display on */ + outb(0x06, BRIQ_PANEL_VFD_IOPORT); /* Entry normal */ + for (i=0; i<40; i++) + vfd[i]=' '; +#ifndef MODULE + vfd[0] = 'L'; + vfd[1] = 'o'; + vfd[2] = 'a'; + vfd[3] = 'd'; + vfd[4] = 'i'; + vfd[5] = 'n'; + vfd[6] = 'g'; + vfd[7] = ' '; + vfd[8] = '.'; + vfd[9] = '.'; + vfd[10] = '.'; +#endif /* !MODULE */ + + update_vfd(); + + return 0; +} + +static void __exit briq_panel_exit(void) +{ + misc_deregister(&briq_panel_miscdev); + release_region(BRIQ_PANEL_VFD_IOPORT, 4); + release_region(BRIQ_PANEL_LED_IOPORT, 2); +} + +module_init(briq_panel_init); +module_exit(briq_panel_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Karsten Jeppesen "); +MODULE_DESCRIPTION("Driver for the Total Impact briQ front panel"); From 73ea6959b11821ba5ade77fb1d3d4aed52be3b67 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 4 Jul 2006 17:07:18 +1000 Subject: [PATCH 0012/1063] [POWERPC] More offb/bootx fixes There were still some issues with offb when BootX doesn't provide a proper display node, this fixes them. This also re-instates the palette hacks that were disabled a couple of kernel versions ago when I converted to the new OF parsing, and shuffles some functions around to avoid prototypes. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/powermac/bootx_init.c | 35 ++- drivers/video/offb.c | 307 ++++++++++--------- 2 files changed, 185 insertions(+), 157 deletions(-) diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 871b002c9f90..6a026c733f6a 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -181,13 +181,18 @@ static void __init bootx_add_chosen_props(unsigned long base, } static void __init bootx_add_display_props(unsigned long base, - unsigned long *mem_end) + unsigned long *mem_end, + int has_real_node) { boot_infos_t *bi = bootx_info; u32 tmp; - bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end); - bootx_dt_add_prop("linux,opened", NULL, 0, mem_end); + if (has_real_node) { + bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end); + bootx_dt_add_prop("linux,opened", NULL, 0, mem_end); + } else + bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end); + tmp = bi->dispDeviceDepth; bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end); tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0]; @@ -241,11 +246,6 @@ static void __init bootx_scan_dt_build_strings(unsigned long base, DBG(" detected display ! adding properties names !\n"); bootx_dt_add_string("linux,boot-display", mem_end); bootx_dt_add_string("linux,opened", mem_end); - bootx_dt_add_string("linux,bootx-depth", mem_end); - bootx_dt_add_string("linux,bootx-width", mem_end); - bootx_dt_add_string("linux,bootx-height", mem_end); - bootx_dt_add_string("linux,bootx-linebytes", mem_end); - bootx_dt_add_string("linux,bootx-addr", mem_end); strncpy(bootx_disp_path, namep, 255); } @@ -329,10 +329,13 @@ static void __init bootx_scan_dt_build_struct(unsigned long base, ppp = &pp->next; } - if (node == bootx_node_chosen) + if (node == bootx_node_chosen) { bootx_add_chosen_props(base, mem_end); - if (node == bootx_info->dispDeviceRegEntryOffset) - bootx_add_display_props(base, mem_end); + if (bootx_info->dispDeviceRegEntryOffset == 0) + bootx_add_display_props(base, mem_end, 0); + } + else if (node == bootx_info->dispDeviceRegEntryOffset) + bootx_add_display_props(base, mem_end, 1); /* do all our children */ cpp = &np->child; @@ -374,6 +377,14 @@ static unsigned long __init bootx_flatten_dt(unsigned long start) mem_end += 4; bootx_dt_strend = mem_end; bootx_scan_dt_build_strings(base, 4, &mem_end); + /* Add some strings */ + bootx_dt_add_string("linux,bootx-noscreen", &mem_end); + bootx_dt_add_string("linux,bootx-depth", &mem_end); + bootx_dt_add_string("linux,bootx-width", &mem_end); + bootx_dt_add_string("linux,bootx-height", &mem_end); + bootx_dt_add_string("linux,bootx-linebytes", &mem_end); + bootx_dt_add_string("linux,bootx-addr", &mem_end); + /* Wrap up strings */ hdr->off_dt_strings = bootx_dt_strbase - mem_start; hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase; @@ -471,6 +482,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4) if (bi->dispDeviceDepth == 16) bi->dispDeviceDepth = 15; + #ifdef CONFIG_BOOTX_TEXT ptr = (unsigned long)bi->logicalDisplayBase; ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes; @@ -508,6 +520,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4) #ifdef CONFIG_BOOTX_TEXT btext_welcome(bi); #endif + /* New BootX enters kernel with MMU off, i/os are not allowed * here. This hack will have been done by the boostrap anyway. */ diff --git a/drivers/video/offb.c b/drivers/video/offb.c index 71ce1fa45cf4..faba67228526 100644 --- a/drivers/video/offb.c +++ b/drivers/video/offb.c @@ -63,8 +63,6 @@ struct offb_par default_par; * Interface used by the world */ -int offb_init(void); - static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, u_int transp, struct fb_info *info); static int offb_blank(int blank, struct fb_info *info); @@ -73,11 +71,6 @@ static int offb_blank(int blank, struct fb_info *info); extern boot_infos_t *boot_infos; #endif -static void offb_init_nodriver(struct device_node *); -static void offb_init_fb(const char *name, const char *full_name, - int width, int height, int depth, int pitch, - unsigned long address, struct device_node *dp); - static struct fb_ops offb_ops = { .owner = THIS_MODULE, .fb_setcolreg = offb_setcolreg, @@ -230,123 +223,17 @@ static int offb_blank(int blank, struct fb_info *info) return 0; } - /* - * Initialisation - */ -int __init offb_init(void) +static void __iomem *offb_map_reg(struct device_node *np, int index, + unsigned long offset, unsigned long size) { - struct device_node *dp = NULL, *boot_disp = NULL; + struct resource r; - if (fb_get_options("offb", NULL)) - return -ENODEV; - - for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { - if (get_property(dp, "linux,opened", NULL) && - get_property(dp, "linux,boot-display", NULL)) { - boot_disp = dp; - offb_init_nodriver(dp); - } - } - for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { - if (get_property(dp, "linux,opened", NULL) && - dp != boot_disp) - offb_init_nodriver(dp); - } - - return 0; -} - - -static void __init offb_init_nodriver(struct device_node *dp) -{ - unsigned int len; - int i, width = 640, height = 480, depth = 8, pitch = 640; - unsigned int flags, rsize, addr_prop = 0; - unsigned long max_size = 0; - u64 rstart, address = OF_BAD_ADDR; - u32 *pp, *addrp, *up; - u64 asize; - - pp = (u32 *)get_property(dp, "linux,bootx-depth", &len); - if (pp == NULL) - pp = (u32 *)get_property(dp, "depth", &len); - if (pp && len == sizeof(u32)) - depth = *pp; - - pp = (u32 *)get_property(dp, "linux,bootx-width", &len); - if (pp == NULL) - pp = (u32 *)get_property(dp, "width", &len); - if (pp && len == sizeof(u32)) - width = *pp; - - pp = (u32 *)get_property(dp, "linux,bootx-height", &len); - if (pp == NULL) - pp = (u32 *)get_property(dp, "height", &len); - if (pp && len == sizeof(u32)) - height = *pp; - - pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len); - if (pp == NULL) - pp = (u32 *)get_property(dp, "linebytes", &len); - if (pp && len == sizeof(u32)) - pitch = *pp; - else - pitch = width * ((depth + 7) / 8); - - rsize = (unsigned long)pitch * (unsigned long)height; - - /* Ok, now we try to figure out the address of the framebuffer. - * - * Unfortunately, Open Firmware doesn't provide a standard way to do - * so. All we can do is a dodgy heuristic that happens to work in - * practice. On most machines, the "address" property contains what - * we need, though not on Matrox cards found in IBM machines. What I've - * found that appears to give good results is to go through the PCI - * ranges and pick one that is both big enough and if possible encloses - * the "address" property. If none match, we pick the biggest - */ - up = (u32 *)get_property(dp, "linux,bootx-addr", &len); - if (up == NULL) - up = (u32 *)get_property(dp, "address", &len); - if (up && len == sizeof(u32)) - addr_prop = *up; - - for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags)) - != NULL; i++) { - int match_addrp = 0; - - if (!(flags & IORESOURCE_MEM)) - continue; - if (asize < rsize) - continue; - rstart = of_translate_address(dp, addrp); - if (rstart == OF_BAD_ADDR) - continue; - if (addr_prop && (rstart <= addr_prop) && - ((rstart + asize) >= (addr_prop + rsize))) - match_addrp = 1; - if (match_addrp) { - address = addr_prop; - break; - } - if (rsize > max_size) { - max_size = rsize; - address = OF_BAD_ADDR; - } - - if (address == OF_BAD_ADDR) - address = rstart; - } - if (address == OF_BAD_ADDR && addr_prop) - address = (u64)addr_prop; - if (address != OF_BAD_ADDR) { - /* kludge for valkyrie */ - if (strcmp(dp->name, "valkyrie") == 0) - address += 0x1000; - offb_init_fb(dp->name, dp->full_name, width, height, depth, - pitch, address, dp); - } + if (of_address_to_resource(np, index, &r)) + return 0; + if ((r.start + offset + size) > r.end) + return 0; + return ioremap(r.start + offset, size); } static void __init offb_init_fb(const char *name, const char *full_name, @@ -403,45 +290,39 @@ static void __init offb_init_fb(const char *name, const char *full_name, par->cmap_type = cmap_unknown; if (depth == 8) { - /* Palette hacks disabled for now */ -#if 0 if (dp && !strncmp(name, "ATY,Rage128", 11)) { - unsigned long regbase = dp->addrs[2].address; - par->cmap_adr = ioremap(regbase, 0x1FFF); - par->cmap_type = cmap_r128; + par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); + if (par->cmap_adr) + par->cmap_type = cmap_r128; } else if (dp && (!strncmp(name, "ATY,RageM3pA", 12) || !strncmp(name, "ATY,RageM3p12A", 14))) { - unsigned long regbase = - dp->parent->addrs[2].address; - par->cmap_adr = ioremap(regbase, 0x1FFF); - par->cmap_type = cmap_M3A; + par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); + if (par->cmap_adr) + par->cmap_type = cmap_M3A; } else if (dp && !strncmp(name, "ATY,RageM3pB", 12)) { - unsigned long regbase = - dp->parent->addrs[2].address; - par->cmap_adr = ioremap(regbase, 0x1FFF); - par->cmap_type = cmap_M3B; + par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); + if (par->cmap_adr) + par->cmap_type = cmap_M3B; } else if (dp && !strncmp(name, "ATY,Rage6", 9)) { - unsigned long regbase = dp->addrs[1].address; - par->cmap_adr = ioremap(regbase, 0x1FFF); - par->cmap_type = cmap_radeon; + par->cmap_adr = offb_map_reg(dp, 1, 0, 0x1fff); + if (par->cmap_adr) + par->cmap_type = cmap_radeon; } else if (!strncmp(name, "ATY,", 4)) { unsigned long base = address & 0xff000000UL; par->cmap_adr = ioremap(base + 0x7ff000, 0x1000) + 0xcc0; par->cmap_data = par->cmap_adr + 1; par->cmap_type = cmap_m64; - } else if (device_is_compatible(dp, "pci1014,b7")) { - unsigned long regbase = dp->addrs[0].address; - par->cmap_adr = ioremap(regbase + 0x6000, 0x1000); - par->cmap_type = cmap_gxt2000; + } else if (dp && device_is_compatible(dp, "pci1014,b7")) { + par->cmap_adr = offb_map_reg(dp, 0, 0x6000, 0x1000); + if (par->cmap_adr) + par->cmap_type = cmap_gxt2000; } -#endif - fix->visual = par->cmap_adr ? FB_VISUAL_PSEUDOCOLOR - : FB_VISUAL_STATIC_PSEUDOCOLOR; + fix->visual = (par->cmap_type != cmap_unknown) ? + FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_STATIC_PSEUDOCOLOR; } else - fix->visual = /* par->cmap_adr ? FB_VISUAL_DIRECTCOLOR - : */ FB_VISUAL_TRUECOLOR; + fix->visual = FB_VISUAL_TRUECOLOR; var->xoffset = var->yoffset = 0; switch (depth) { @@ -521,5 +402,139 @@ static void __init offb_init_fb(const char *name, const char *full_name, info->node, full_name); } + +static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) +{ + unsigned int len; + int i, width = 640, height = 480, depth = 8, pitch = 640; + unsigned int flags, rsize, addr_prop = 0; + unsigned long max_size = 0; + u64 rstart, address = OF_BAD_ADDR; + u32 *pp, *addrp, *up; + u64 asize; + + pp = (u32 *)get_property(dp, "linux,bootx-depth", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "depth", &len); + if (pp && len == sizeof(u32)) + depth = *pp; + + pp = (u32 *)get_property(dp, "linux,bootx-width", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "width", &len); + if (pp && len == sizeof(u32)) + width = *pp; + + pp = (u32 *)get_property(dp, "linux,bootx-height", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "height", &len); + if (pp && len == sizeof(u32)) + height = *pp; + + pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len); + if (pp == NULL) + pp = (u32 *)get_property(dp, "linebytes", &len); + if (pp && len == sizeof(u32)) + pitch = *pp; + else + pitch = width * ((depth + 7) / 8); + + rsize = (unsigned long)pitch * (unsigned long)height; + + /* Ok, now we try to figure out the address of the framebuffer. + * + * Unfortunately, Open Firmware doesn't provide a standard way to do + * so. All we can do is a dodgy heuristic that happens to work in + * practice. On most machines, the "address" property contains what + * we need, though not on Matrox cards found in IBM machines. What I've + * found that appears to give good results is to go through the PCI + * ranges and pick one that is both big enough and if possible encloses + * the "address" property. If none match, we pick the biggest + */ + up = (u32 *)get_property(dp, "linux,bootx-addr", &len); + if (up == NULL) + up = (u32 *)get_property(dp, "address", &len); + if (up && len == sizeof(u32)) + addr_prop = *up; + + /* Hack for when BootX is passing us */ + if (no_real_node) + goto skip_addr; + + for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags)) + != NULL; i++) { + int match_addrp = 0; + + if (!(flags & IORESOURCE_MEM)) + continue; + if (asize < rsize) + continue; + rstart = of_translate_address(dp, addrp); + if (rstart == OF_BAD_ADDR) + continue; + if (addr_prop && (rstart <= addr_prop) && + ((rstart + asize) >= (addr_prop + rsize))) + match_addrp = 1; + if (match_addrp) { + address = addr_prop; + break; + } + if (rsize > max_size) { + max_size = rsize; + address = OF_BAD_ADDR; + } + + if (address == OF_BAD_ADDR) + address = rstart; + } + skip_addr: + if (address == OF_BAD_ADDR && addr_prop) + address = (u64)addr_prop; + if (address != OF_BAD_ADDR) { + /* kludge for valkyrie */ + if (strcmp(dp->name, "valkyrie") == 0) + address += 0x1000; + offb_init_fb(no_real_node ? "bootx" : dp->name, + no_real_node ? "display" : dp->full_name, + width, height, depth, pitch, address, + no_real_node ? dp : NULL); + } +} + +static int __init offb_init(void) +{ + struct device_node *dp = NULL, *boot_disp = NULL; + + if (fb_get_options("offb", NULL)) + return -ENODEV; + + /* Check if we have a MacOS display without a node spec */ + if (get_property(of_chosen, "linux,bootx-noscreen", NULL) != NULL) { + /* The old code tried to work out which node was the MacOS + * display based on the address. I'm dropping that since the + * lack of a node spec only happens with old BootX versions + * (users can update) and with this code, they'll still get + * a display (just not the palette hacks). + */ + offb_init_nodriver(of_chosen, 1); + } + + for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { + if (get_property(dp, "linux,opened", NULL) && + get_property(dp, "linux,boot-display", NULL)) { + boot_disp = dp; + offb_init_nodriver(dp, 0); + } + } + for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) { + if (get_property(dp, "linux,opened", NULL) && + dp != boot_disp) + offb_init_nodriver(dp, 0); + } + + return 0; +} + + module_init(offb_init); MODULE_LICENSE("GPL"); From e7c1f69d4fa4da47dc995b5de64b6cb76ae32081 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 4 Jul 2006 17:13:23 +1000 Subject: [PATCH 0013/1063] [POWERPC] Fix mem= handling when the memory limit is > RMO size There's a bug in my cleaned up mem= handling, if the memory limit is larger than the RMO size we'll erroneously enlarge the RMO size. Fix is to only change the RMO size if the memory limit is less than the current RMO value. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/lmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 4b17a7359924..716a2906a24d 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -320,7 +320,8 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit) break; } - lmb.rmo_size = lmb.memory.region[0].size; + if (lmb.memory.region[0].size < lmb.rmo_size) + lmb.rmo_size = lmb.memory.region[0].size; /* And truncate any reserves above the limit also. */ for (i = 0; i < lmb.reserved.cnt; i++) { From 861fa7737db889ae1701ba58c083d4a7bd8705d3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 6 Jul 2006 18:03:06 +1000 Subject: [PATCH 0014/1063] [POWERPC] Xserve G5 thermal control fixes The thermal control for the Xserve G5s had a few issues. For one, the way to program the RPM fans speeds into the FCU is different between it and the desktop models, which I didn't figure out until recently, and it was missing a control loop for the slots fan, running it too fast. Both of those problems were causing the machine to be much more noisy than necessary. This patch also changes the fixed value of the slots fan for desktop G5s to 40% instead of 50%. It seems to still have a pretty good airflow that way and is much less noisy. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- drivers/macintosh/therm_pm72.c | 218 ++++++++++++++++++++++++++++++--- drivers/macintosh/therm_pm72.h | 33 ++++- 2 files changed, 234 insertions(+), 17 deletions(-) diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index c1fe0b368f76..20bf67244e2c 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -95,6 +95,17 @@ * - Use min/max macros here or there * - Latest darwin updated U3H min fan speed to 20% PWM * + * July. 06, 2006 : 1.3 + * - Fix setting of RPM fans on Xserve G5 (they were going too fast) + * - Add missing slots fan control loop for Xserve G5 + * - Lower fixed slots fan speed from 50% to 40% on desktop G5s. We + * still can't properly implement the control loop for these, so let's + * reduce the noise a little bit, it appears that 40% still gives us + * a pretty good air flow + * - Add code to "tickle" the FCU regulary so it doesn't think that + * we are gone while in fact, the machine just didn't need any fan + * speed change lately + * */ #include @@ -121,7 +132,7 @@ #include "therm_pm72.h" -#define VERSION "1.2b2" +#define VERSION "1.3" #undef DEBUG @@ -146,6 +157,7 @@ static struct basckside_pid_params backside_params; static struct backside_pid_state backside_state; static struct drives_pid_state drives_state; static struct dimm_pid_state dimms_state; +static struct slots_pid_state slots_state; static int state; static int cpu_count; static int cpu_pid_type; @@ -154,7 +166,8 @@ static struct completion ctrl_complete; static int critical_state; static int rackmac; static s32 dimm_output_clamp; - +static int fcu_rpm_shift; +static int fcu_tickle_ticks; static DECLARE_MUTEX(driver_lock); /* @@ -495,13 +508,20 @@ static int start_fcu(void) rc = fan_write_reg(0x2e, &buf, 1); if (rc < 0) return -EIO; + rc = fan_read_reg(0, &buf, 1); + if (rc < 0) + return -EIO; + fcu_rpm_shift = (buf == 1) ? 2 : 3; + printk(KERN_DEBUG "FCU Initialized, RPM fan shift is %d\n", + fcu_rpm_shift); + return 0; } static int set_rpm_fan(int fan_index, int rpm) { unsigned char buf[2]; - int rc, id; + int rc, id, min, max; if (fcu_fans[fan_index].type != FCU_FAN_RPM) return -EINVAL; @@ -509,12 +529,15 @@ static int set_rpm_fan(int fan_index, int rpm) if (id == FCU_FAN_ABSENT_ID) return -EINVAL; - if (rpm < 300) - rpm = 300; - else if (rpm > 8191) - rpm = 8191; - buf[0] = rpm >> 5; - buf[1] = rpm << 3; + min = 2400 >> fcu_rpm_shift; + max = 56000 >> fcu_rpm_shift; + + if (rpm < min) + rpm = min; + else if (rpm > max) + rpm = max; + buf[0] = rpm >> (8 - fcu_rpm_shift); + buf[1] = rpm << fcu_rpm_shift; rc = fan_write_reg(0x10 + (id * 2), buf, 2); if (rc < 0) return -EIO; @@ -551,7 +574,7 @@ static int get_rpm_fan(int fan_index, int programmed) if (rc != 2) return -EIO; - return (buf[0] << 5) | buf[1] >> 3; + return (buf[0] << (8 - fcu_rpm_shift)) | buf[1] >> fcu_rpm_shift; } static int set_pwm_fan(int fan_index, int pwm) @@ -609,6 +632,26 @@ static int get_pwm_fan(int fan_index) return (buf[0] * 1000) / 2559; } +static void tickle_fcu(void) +{ + int pwm; + + pwm = get_pwm_fan(SLOTS_FAN_PWM_INDEX); + + DBG("FCU Tickle, slots fan is: %d\n", pwm); + if (pwm < 0) + pwm = 100; + + if (!rackmac) { + pwm = SLOTS_FAN_DEFAULT_PWM; + } else if (pwm < SLOTS_PID_OUTPUT_MIN) + pwm = SLOTS_PID_OUTPUT_MIN; + + /* That is hopefully enough to make the FCU happy */ + set_pwm_fan(SLOTS_FAN_PWM_INDEX, pwm); +} + + /* * Utility routine to read the CPU calibration EEPROM data * from the device-tree @@ -715,6 +758,9 @@ BUILD_SHOW_FUNC_INT(backside_fan_pwm, backside_state.pwm) BUILD_SHOW_FUNC_FIX(drives_temperature, drives_state.last_temp) BUILD_SHOW_FUNC_INT(drives_fan_rpm, drives_state.rpm) +BUILD_SHOW_FUNC_FIX(slots_temperature, slots_state.last_temp) +BUILD_SHOW_FUNC_INT(slots_fan_pwm, slots_state.pwm) + BUILD_SHOW_FUNC_FIX(dimms_temperature, dimms_state.last_temp) static DEVICE_ATTR(cpu0_temperature,S_IRUGO,show_cpu0_temperature,NULL); @@ -735,6 +781,9 @@ static DEVICE_ATTR(backside_fan_pwm,S_IRUGO,show_backside_fan_pwm,NULL); static DEVICE_ATTR(drives_temperature,S_IRUGO,show_drives_temperature,NULL); static DEVICE_ATTR(drives_fan_rpm,S_IRUGO,show_drives_fan_rpm,NULL); +static DEVICE_ATTR(slots_temperature,S_IRUGO,show_slots_temperature,NULL); +static DEVICE_ATTR(slots_fan_pwm,S_IRUGO,show_slots_fan_pwm,NULL); + static DEVICE_ATTR(dimms_temperature,S_IRUGO,show_dimms_temperature,NULL); /* @@ -1076,6 +1125,9 @@ static void do_monitor_cpu_rack(struct cpu_pid_state *state) fan_min = dimm_output_clamp; fan_min = max(fan_min, (int)state->mpu.rminn_intake_fan); + DBG(" CPU min mpu = %d, min dimm = %d\n", + state->mpu.rminn_intake_fan, dimm_output_clamp); + state->rpm = max(state->rpm, (int)fan_min); state->rpm = min(state->rpm, (int)state->mpu.rmaxn_intake_fan); state->intake_rpm = state->rpm; @@ -1374,7 +1426,8 @@ static void do_monitor_drives(struct drives_pid_state *state) DBG(" current rpm: %d\n", state->rpm); /* Get some sensor readings */ - temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor, DS1775_TEMP)) << 8; + temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor, + DS1775_TEMP)) << 8; state->last_temp = temp; DBG(" temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp), FIX32TOPRINT(DRIVES_PID_INPUT_TARGET)); @@ -1575,7 +1628,7 @@ static int init_dimms_state(struct dimm_pid_state *state) } /* - * Dispose of the state data for the drives control loop + * Dispose of the state data for the DIMM control loop */ static void dispose_dimms_state(struct dimm_pid_state *state) { @@ -1588,6 +1641,127 @@ static void dispose_dimms_state(struct dimm_pid_state *state) state->monitor = NULL; } +/* + * Slots fan control loop + */ +static void do_monitor_slots(struct slots_pid_state *state) +{ + s32 temp, integral, derivative; + s64 integ_p, deriv_p, prop_p, sum; + int i, rc; + + if (--state->ticks != 0) + return; + state->ticks = SLOTS_PID_INTERVAL; + + DBG("slots:\n"); + + /* Check fan status */ + rc = get_pwm_fan(SLOTS_FAN_PWM_INDEX); + if (rc < 0) { + printk(KERN_WARNING "Error %d reading slots fan !\n", rc); + /* XXX What do we do now ? */ + } else + state->pwm = rc; + DBG(" current pwm: %d\n", state->pwm); + + /* Get some sensor readings */ + temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor, + DS1775_TEMP)) << 8; + state->last_temp = temp; + DBG(" temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp), + FIX32TOPRINT(SLOTS_PID_INPUT_TARGET)); + + /* Store temperature and error in history array */ + state->cur_sample = (state->cur_sample + 1) % SLOTS_PID_HISTORY_SIZE; + state->sample_history[state->cur_sample] = temp; + state->error_history[state->cur_sample] = temp - SLOTS_PID_INPUT_TARGET; + + /* If first loop, fill the history table */ + if (state->first) { + for (i = 0; i < (SLOTS_PID_HISTORY_SIZE - 1); i++) { + state->cur_sample = (state->cur_sample + 1) % + SLOTS_PID_HISTORY_SIZE; + state->sample_history[state->cur_sample] = temp; + state->error_history[state->cur_sample] = + temp - SLOTS_PID_INPUT_TARGET; + } + state->first = 0; + } + + /* Calculate the integral term */ + sum = 0; + integral = 0; + for (i = 0; i < SLOTS_PID_HISTORY_SIZE; i++) + integral += state->error_history[i]; + integral *= SLOTS_PID_INTERVAL; + DBG(" integral: %08x\n", integral); + integ_p = ((s64)SLOTS_PID_G_r) * (s64)integral; + DBG(" integ_p: %d\n", (int)(integ_p >> 36)); + sum += integ_p; + + /* Calculate the derivative term */ + derivative = state->error_history[state->cur_sample] - + state->error_history[(state->cur_sample + SLOTS_PID_HISTORY_SIZE - 1) + % SLOTS_PID_HISTORY_SIZE]; + derivative /= SLOTS_PID_INTERVAL; + deriv_p = ((s64)SLOTS_PID_G_d) * (s64)derivative; + DBG(" deriv_p: %d\n", (int)(deriv_p >> 36)); + sum += deriv_p; + + /* Calculate the proportional term */ + prop_p = ((s64)SLOTS_PID_G_p) * (s64)(state->error_history[state->cur_sample]); + DBG(" prop_p: %d\n", (int)(prop_p >> 36)); + sum += prop_p; + + /* Scale sum */ + sum >>= 36; + + DBG(" sum: %d\n", (int)sum); + state->pwm = (s32)sum; + + state->pwm = max(state->pwm, SLOTS_PID_OUTPUT_MIN); + state->pwm = min(state->pwm, SLOTS_PID_OUTPUT_MAX); + + DBG("** DRIVES PWM: %d\n", (int)state->pwm); + set_pwm_fan(SLOTS_FAN_PWM_INDEX, state->pwm); +} + +/* + * Initialize the state structure for the slots bay fan control loop + */ +static int init_slots_state(struct slots_pid_state *state) +{ + state->ticks = 1; + state->first = 1; + state->pwm = 50; + + state->monitor = attach_i2c_chip(XSERVE_SLOTS_LM75, "slots_temp"); + if (state->monitor == NULL) + return -ENODEV; + + device_create_file(&of_dev->dev, &dev_attr_slots_temperature); + device_create_file(&of_dev->dev, &dev_attr_slots_fan_pwm); + + return 0; +} + +/* + * Dispose of the state data for the slots control loop + */ +static void dispose_slots_state(struct slots_pid_state *state) +{ + if (state->monitor == NULL) + return; + + device_remove_file(&of_dev->dev, &dev_attr_slots_temperature); + device_remove_file(&of_dev->dev, &dev_attr_slots_fan_pwm); + + detach_i2c_chip(state->monitor); + state->monitor = NULL; +} + + static int call_critical_overtemp(void) { char *argv[] = { critical_overtemp_path, NULL }; @@ -1617,14 +1791,17 @@ static int main_control_loop(void *x) goto out; } - /* Set the PCI fan once for now */ - set_pwm_fan(SLOTS_FAN_PWM_INDEX, SLOTS_FAN_DEFAULT_PWM); + /* Set the PCI fan once for now on non-RackMac */ + if (!rackmac) + set_pwm_fan(SLOTS_FAN_PWM_INDEX, SLOTS_FAN_DEFAULT_PWM); /* Initialize ADCs */ initialize_adc(&cpu_state[0]); if (cpu_state[1].monitor != NULL) initialize_adc(&cpu_state[1]); + fcu_tickle_ticks = FCU_TICKLE_TICKS; + up(&driver_lock); while (state == state_attached) { @@ -1634,6 +1811,12 @@ static int main_control_loop(void *x) down(&driver_lock); + /* Tickle the FCU just in case */ + if (--fcu_tickle_ticks < 0) { + fcu_tickle_ticks = FCU_TICKLE_TICKS; + tickle_fcu(); + } + /* First, we always calculate the new DIMMs state on an Xserve */ if (rackmac) do_monitor_dimms(&dimms_state); @@ -1654,7 +1837,9 @@ static int main_control_loop(void *x) } /* Then, the rest */ do_monitor_backside(&backside_state); - if (!rackmac) + if (rackmac) + do_monitor_slots(&slots_state); + else do_monitor_drives(&drives_state); up(&driver_lock); @@ -1696,6 +1881,7 @@ static void dispose_control_loops(void) dispose_cpu_state(&cpu_state[1]); dispose_backside_state(&backside_state); dispose_drives_state(&drives_state); + dispose_slots_state(&slots_state); dispose_dimms_state(&dimms_state); } @@ -1745,6 +1931,8 @@ static int create_control_loops(void) goto fail; if (rackmac && init_dimms_state(&dimms_state)) goto fail; + if (rackmac && init_slots_state(&slots_state)) + goto fail; if (!rackmac && init_drives_state(&drives_state)) goto fail; diff --git a/drivers/macintosh/therm_pm72.h b/drivers/macintosh/therm_pm72.h index fc7e9b7ecaf2..393cc9df94e1 100644 --- a/drivers/macintosh/therm_pm72.h +++ b/drivers/macintosh/therm_pm72.h @@ -105,6 +105,7 @@ static char * critical_overtemp_path = "/sbin/critical_overtemp"; #define DRIVES_DALLAS_ID 0x94 #define BACKSIDE_MAX_ID 0x98 #define XSERVE_DIMMS_LM87 0x25a +#define XSERVE_SLOTS_LM75 0x290 /* * Some MAX6690, DS1775, LM87 register definitions @@ -198,7 +199,7 @@ struct drives_pid_state #define SLOTS_FAN_PWM_DEFAULT_ID 2 #define SLOTS_FAN_PWM_INDEX 2 -#define SLOTS_FAN_DEFAULT_PWM 50 /* Do better here ! */ +#define SLOTS_FAN_DEFAULT_PWM 40 /* Do better here ! */ /* @@ -206,7 +207,7 @@ struct drives_pid_state */ #define DIMM_PID_G_d 0 #define DIMM_PID_G_p 0 -#define DIMM_PID_G_r 0x6553600 +#define DIMM_PID_G_r 0x06553600 #define DIMM_PID_INPUT_TARGET 3276800 #define DIMM_PID_INTERVAL 1 #define DIMM_PID_OUTPUT_MAX 14000 @@ -226,6 +227,31 @@ struct dimm_pid_state }; +/* + * PID factors for the Xserve Slots control loop + */ +#define SLOTS_PID_G_d 0 +#define SLOTS_PID_G_p 0 +#define SLOTS_PID_G_r 0x00100000 +#define SLOTS_PID_INPUT_TARGET 3200000 +#define SLOTS_PID_INTERVAL 1 +#define SLOTS_PID_OUTPUT_MAX 100 +#define SLOTS_PID_OUTPUT_MIN 20 +#define SLOTS_PID_HISTORY_SIZE 20 + +struct slots_pid_state +{ + int ticks; + struct i2c_client * monitor; + s32 sample_history[SLOTS_PID_HISTORY_SIZE]; + s32 error_history[SLOTS_PID_HISTORY_SIZE]; + int cur_sample; + s32 last_temp; + int first; + int pwm; +}; + + /* Desktops */ @@ -283,6 +309,9 @@ struct cpu_pid_state s32 pump_max; }; +/* Tickle FCU every 10 seconds */ +#define FCU_TICKLE_TICKS 10 + /* * Driver state */ From 7ed14c2177694ce086180eb9ca9ca4c6cd72c7ef Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 6 Jul 2006 15:09:19 +1000 Subject: [PATCH 0015/1063] [POWERPC] Add cpufreq support for Xserve G5 The Xserve G5 are capable of frequency switching like other desktop G5s. This enables it. It also fix a Kconfig issue which prevented from building the G5 cpufreq support if CONFIG_PMAC_SMU was not set (the first version of that driver only worked with SMU based macs, but this isn't the case anymore). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 2 +- arch/powerpc/platforms/powermac/cpufreq_64.c | 78 ++++++++++++-------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2643dbc3f289..13e583f16ede 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -504,7 +504,7 @@ config CPU_FREQ_PMAC config CPU_FREQ_PMAC64 bool "Support for some Apple G5s" - depends on CPU_FREQ && PMAC_SMU && PPC64 + depends on CPU_FREQ && PPC64 select CPU_FREQ_TABLE help This adds support for frequency switching on Apple iMac G5, diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c index f08a14516139..a6a84ac5433e 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ b/arch/powerpc/platforms/powermac/cpufreq_64.c @@ -10,6 +10,8 @@ * that is iMac G5 and latest single CPU desktop. */ +#undef DEBUG + #include #include #include @@ -30,13 +32,7 @@ #include #include -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif +#define DBG(fmt...) pr_debug(fmt) /* see 970FX user manual */ @@ -82,8 +78,6 @@ static struct freq_attr* g5_cpu_freqs_attr[] = { /* Power mode data is an array of the 32 bits PCR values to use for * the various frequencies, retrieved from the device-tree */ -static u32 *g5_pmode_data; -static int g5_pmode_max; static int g5_pmode_cur; static void (*g5_switch_volt)(int speed_mode); @@ -93,6 +87,11 @@ static int (*g5_query_freq)(void); static DEFINE_MUTEX(g5_switch_mutex); +#ifdef CONFIG_PPC_SMU + +static u32 *g5_pmode_data; +static int g5_pmode_max; + static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ static int g5_fvt_count; /* number of op. points */ static int g5_fvt_cur; /* current op. point */ @@ -209,6 +208,16 @@ static int g5_scom_query_freq(void) return i; } +/* + * Fake voltage switching for platforms with missing support + */ + +static void g5_dummy_switch_volt(int speed_mode) +{ +} + +#endif /* CONFIG_PPC_SMU */ + /* * Platform function based voltage switching for PowerMac7,2 & 7,3 */ @@ -248,6 +257,9 @@ static int g5_pfunc_switch_freq(int speed_mode) struct pmf_args args; u32 done = 0; unsigned long timeout; + int rc; + + DBG("g5_pfunc_switch_freq(%d)\n", speed_mode); /* If frequency is going up, first ramp up the voltage */ if (speed_mode < g5_pmode_cur) @@ -255,9 +267,12 @@ static int g5_pfunc_switch_freq(int speed_mode) /* Do it */ if (speed_mode == CPUFREQ_HIGH) - pmf_call_one(pfunc_cpu_setfreq_high, NULL); + rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL); else - pmf_call_one(pfunc_cpu_setfreq_low, NULL); + rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL); + + if (rc) + printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc); /* It's an irq GPIO so we should be able to just block here, * I'll do that later after I've properly tested the IRQ code for @@ -296,13 +311,6 @@ static int g5_pfunc_query_freq(void) return val ? CPUFREQ_HIGH : CPUFREQ_LOW; } -/* - * Fake voltage switching for platforms with missing support - */ - -static void g5_dummy_switch_volt(int speed_mode) -{ -} /* * Common interface to the cpufreq core @@ -375,6 +383,8 @@ static struct cpufreq_driver g5_cpufreq_driver = { }; +#ifdef CONFIG_PPC_SMU + static int __init g5_neo2_cpufreq_init(struct device_node *cpus) { struct device_node *cpunode; @@ -525,6 +535,9 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) return rc; } +#endif /* CONFIG_PPC_SMU */ + + static int __init g5_pm72_cpufreq_init(struct device_node *cpus) { struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL; @@ -533,6 +546,9 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) u64 max_freq, min_freq, ih, il; int has_volt = 1, rc = 0; + DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and" + " RackMac3,1...\n"); + /* Get first CPU node */ for (cpunode = NULL; (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { @@ -636,6 +652,15 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) */ ih = *((u32 *)(eeprom + 0x10)); il = *((u32 *)(eeprom + 0x20)); + + /* Check for machines with no useful settings */ + if (il == ih) { + printk(KERN_WARNING "cpufreq: No low frequency mode available" + " on this model !\n"); + rc = -ENODEV; + goto bail; + } + min_freq = 0; if (ih != 0 && il != 0) min_freq = (max_freq * il) / ih; @@ -643,7 +668,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) /* Sanity check */ if (min_freq >= max_freq || min_freq < 1000) { printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n"); - rc = -ENODEV; + rc = -ENXIO; goto bail; } g5_cpu_freqs[0].frequency = max_freq; @@ -690,16 +715,10 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) return rc; } -static int __init g5_rm31_cpufreq_init(struct device_node *cpus) -{ - /* NYI */ - return 0; -} - static int __init g5_cpufreq_init(void) { struct device_node *cpus; - int rc; + int rc = 0; cpus = of_find_node_by_path("/cpus"); if (cpus == NULL) { @@ -708,12 +727,13 @@ static int __init g5_cpufreq_init(void) } if (machine_is_compatible("PowerMac7,2") || - machine_is_compatible("PowerMac7,3")) + machine_is_compatible("PowerMac7,3") || + machine_is_compatible("RackMac3,1")) rc = g5_pm72_cpufreq_init(cpus); - else if (machine_is_compatible("RackMac3,1")) - rc = g5_rm31_cpufreq_init(cpus); +#ifdef CONFIG_PPC_SMU else rc = g5_neo2_cpufreq_init(cpus); +#endif /* CONFIG_PPC_SMU */ of_node_put(cpus); return rc; From 980ffd3258dbcdb011e929de5d658ec81febba8d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 4 Jul 2006 16:44:46 +1000 Subject: [PATCH 0016/1063] [POWERPC] Remove linux,device properties The linux,device property isn't used anywhere within the kernel, and since it's a kernel pointer, it's a little useless for userspace. This change removes the code to create this property in of_device_register. Built for pmac32. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/of_device.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index 3262b73a3a68..397c83eda20e 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -189,27 +189,9 @@ void of_release_dev(struct device *dev) int of_device_register(struct of_device *ofdev) { int rc; - struct of_device **odprop; BUG_ON(ofdev->node == NULL); - odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); - if (!odprop) { - struct property *new_prop; - - new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *), - GFP_KERNEL); - if (new_prop == NULL) - return -ENOMEM; - new_prop->name = "linux,device"; - new_prop->length = sizeof(sizeof(struct of_device *)); - new_prop->value = (unsigned char *)&new_prop[1]; - odprop = (struct of_device **)new_prop->value; - *odprop = NULL; - prom_add_property(ofdev->node, new_prop); - } - *odprop = ofdev; - rc = device_register(&ofdev->dev); if (rc) return rc; @@ -221,14 +203,8 @@ int of_device_register(struct of_device *ofdev) void of_device_unregister(struct of_device *ofdev) { - struct of_device **odprop; - device_remove_file(&ofdev->dev, &dev_attr_devspec); - odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL); - if (odprop) - *odprop = NULL; - device_unregister(&ofdev->dev); } From b5a1a9abe1a54ba40a9612001920f98bbdd0c56f Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 4 Jul 2006 16:46:44 +1000 Subject: [PATCH 0017/1063] [POWERPC] Use const qualifiers for prom parsing utilites The of_bus callbacks map and get_flags can be constified, as they don't alter the range or addr arguments. of_dump_addr and of_read_addr can also be constified. Built for 32- and 64-bit powerpc Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom_parse.c | 22 ++++++++++++---------- include/asm-powerpc/prom.h | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 21009b1f7869..e9960170667b 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -27,7 +27,7 @@ /* Debug utility */ #ifdef DEBUG -static void of_dump_addr(const char *s, u32 *addr, int na) +static void of_dump_addr(const char *s, const u32 *addr, int na) { printk("%s", s); while(na--) @@ -35,7 +35,7 @@ static void of_dump_addr(const char *s, u32 *addr, int na) printk("\n"); } #else -static void of_dump_addr(const char *s, u32 *addr, int na) { } +static void of_dump_addr(const char *s, const u32 *addr, int na) { } #endif @@ -46,9 +46,10 @@ struct of_bus { int (*match)(struct device_node *parent); void (*count_cells)(struct device_node *child, int *addrc, int *sizec); - u64 (*map)(u32 *addr, u32 *range, int na, int ns, int pna); + u64 (*map)(u32 *addr, const u32 *range, + int na, int ns, int pna); int (*translate)(u32 *addr, u64 offset, int na); - unsigned int (*get_flags)(u32 *addr); + unsigned int (*get_flags)(const u32 *addr); }; @@ -65,7 +66,8 @@ static void of_bus_default_count_cells(struct device_node *dev, *sizec = prom_n_size_cells(dev); } -static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna) +static u64 of_bus_default_map(u32 *addr, const u32 *range, + int na, int ns, int pna) { u64 cp, s, da; @@ -93,7 +95,7 @@ static int of_bus_default_translate(u32 *addr, u64 offset, int na) return 0; } -static unsigned int of_bus_default_get_flags(u32 *addr) +static unsigned int of_bus_default_get_flags(const u32 *addr) { return IORESOURCE_MEM; } @@ -118,7 +120,7 @@ static void of_bus_pci_count_cells(struct device_node *np, *sizec = 2; } -static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna) +static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna) { u64 cp, s, da; @@ -143,7 +145,7 @@ static int of_bus_pci_translate(u32 *addr, u64 offset, int na) return of_bus_default_translate(addr + 1, offset, na - 1); } -static unsigned int of_bus_pci_get_flags(u32 *addr) +static unsigned int of_bus_pci_get_flags(const u32 *addr) { unsigned int flags = 0; u32 w = addr[0]; @@ -178,7 +180,7 @@ static void of_bus_isa_count_cells(struct device_node *child, *sizec = 1; } -static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna) +static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna) { u64 cp, s, da; @@ -203,7 +205,7 @@ static int of_bus_isa_translate(u32 *addr, u64 offset, int na) return of_bus_default_translate(addr + 1, offset, na - 1); } -static unsigned int of_bus_isa_get_flags(u32 *addr) +static unsigned int of_bus_isa_get_flags(const u32 *addr) { unsigned int flags = 0; u32 w = addr[0]; diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index b095a285c84b..56f6ea0c76de 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -198,7 +198,7 @@ extern int release_OF_resource(struct device_node* node, int index); /* Helper to read a big number */ -static inline u64 of_read_number(u32 *cell, int size) +static inline u64 of_read_number(const u32 *cell, int size) { u64 r = 0; while (size--) From 3da27289a8ecc688fc62c0961dfe89d392370480 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 4 Jul 2006 16:47:18 +1000 Subject: [PATCH 0018/1063] [POWERPC] Remove linux,pci-domain properties The linux,pci-domain property is no longer used by DLPAR/PCI Hotplug utilites, or LSVPD. This change removes it. Built for ppc64_defconfig. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/pci_64.c | 39 +----------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index efc0b5559ee0..1d85fcba51e4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -185,34 +185,6 @@ static void __devinit pci_setup_pci_controller(struct pci_controller *hose) spin_unlock(&hose_spinlock); } -static void add_linux_pci_domain(struct device_node *dev, - struct pci_controller *phb) -{ - struct property *of_prop; - unsigned int size; - - of_prop = (struct property *) - get_property(dev, "linux,pci-domain", &size); - if (of_prop != NULL) - return; - WARN_ON(of_prop && size < sizeof(int)); - if (of_prop && size < sizeof(int)) - of_prop = NULL; - size = sizeof(struct property) + sizeof(int); - if (of_prop == NULL) { - if (mem_init_done) - of_prop = kmalloc(size, GFP_KERNEL); - else - of_prop = alloc_bootmem(size); - } - memset(of_prop, 0, sizeof(struct property)); - of_prop->name = "linux,pci-domain"; - of_prop->length = sizeof(int); - of_prop->value = (unsigned char *)&of_prop[1]; - *((int *)of_prop->value) = phb->global_number; - prom_add_property(dev, of_prop); -} - struct pci_controller * pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -226,22 +198,13 @@ struct pci_controller * pcibios_alloc_controller(struct device_node *dev) pci_setup_pci_controller(phb); phb->arch_data = dev; phb->is_dynamic = mem_init_done; - if (dev) { + if (dev) PHB_SET_NODE(phb, of_node_to_nid(dev)); - add_linux_pci_domain(dev, phb); - } return phb; } void pcibios_free_controller(struct pci_controller *phb) { - if (phb->arch_data) { - struct device_node *np = phb->arch_data; - int *domain = (int *)get_property(np, - "linux,pci-domain", NULL); - if (domain) - *domain = -1; - } if (phb->is_dynamic) kfree(phb); } From 8b0036eefd7a96f23244b969417684c8627f5ad6 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 11 Jul 2006 09:11:25 +0200 Subject: [PATCH 0019/1063] [MTD] NAND: OOB buffer offset fixups In the case of data-pad-ecc-pad-data... layout the oob start position has to be sizeof(data) in nand_write_oob_syndrom(). In nand_fill_oob() we need to copy to buf + buffer offset instead of buf + write offset. From: Vitaly Wool Signed-off-by: Thomas Gleixner --- drivers/mtd/nand/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 62b861304e03..cffd66309ffa 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1203,7 +1203,7 @@ static int nand_write_oob_syndrome(struct mtd_info *mtd, pos = steps * (eccsize + chunk); steps = 0; } else - pos = eccsize + chunk; + pos = eccsize; chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page); for (i = 0; i < steps; i++) { @@ -1566,7 +1566,7 @@ static uint8_t *nand_fill_oob(struct nand_chip *chip, uint8_t *oob, bytes = min_t(size_t, len, free->length); boffs = free->offset; } - memcpy(chip->oob_poi + woffs, oob, bytes); + memcpy(chip->oob_poi + boffs, oob, bytes); oob += bytes; } return oob; From a749690ecf7ab55aa46df1698bcee3ec110612df Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:01 +1000 Subject: [PATCH 0020/1063] [POWERPC] iseries: Use device tree /system-id in /proc/iSeries/config We export a bunch of info in /proc/iSeries/config. Currently we pull it directly out of some iSeries specific structs, but we could use the device tree instead, this saves decoding it twice and is a little neater. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/viopath.c | 27 +++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index 622a30149b48..efeb6ae9df64 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -41,8 +41,8 @@ #include #include +#include #include -#include #include #include #include @@ -116,6 +116,7 @@ static int proc_viopath_show(struct seq_file *m, void *v) dma_addr_t handle; HvLpEvent_Rc hvrc; DECLARE_MUTEX_LOCKED(Semaphore); + struct device_node *node; buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL); if (!buf) @@ -143,20 +144,26 @@ static int proc_viopath_show(struct seq_file *m, void *v) buf[HW_PAGE_SIZE-1] = '\0'; seq_printf(m, "%s", buf); - seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); - seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", - e2a(xItExtVpdPanel.mfgID[2]), - e2a(xItExtVpdPanel.mfgID[3]), - e2a(xItExtVpdPanel.systemSerial[1]), - e2a(xItExtVpdPanel.systemSerial[2]), - e2a(xItExtVpdPanel.systemSerial[3]), - e2a(xItExtVpdPanel.systemSerial[4]), - e2a(xItExtVpdPanel.systemSerial[5])); dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE, DMA_FROM_DEVICE); kfree(buf); + seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); + + node = of_find_node_by_path("/"); + buf = NULL; + if (node != NULL) + buf = get_property(node, "system-id", NULL); + + if (buf == NULL) + seq_printf(m, "SRLNBR=\n"); + else + /* Skip "IBM," on front of serial number, see dt.c */ + seq_printf(m, "SRLNBR=%s\n", buf + 4); + + of_node_put(node); + return 0; } From dac411e7aa92d23dadbcb8721845ab88577294c7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:04 +1000 Subject: [PATCH 0021/1063] [POWERPC] iseries: Move e2a()/strne2a() into their only caller The ASCII -> EBCDIC functions, e2a() and strne2a() are now only used in dt.c, so move them in there. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/lib/Makefile | 1 - arch/powerpc/lib/e2a.c | 116 ---------------------------- arch/powerpc/platforms/iseries/dt.c | 98 ++++++++++++++++++++++- include/asm-powerpc/system.h | 5 -- 4 files changed, 97 insertions(+), 123 deletions(-) delete mode 100644 arch/powerpc/lib/e2a.c diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index ff7096458249..336dd191f768 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -14,7 +14,6 @@ endif obj-$(CONFIG_PPC64) += checksum_64.o copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o \ strcase.o -obj-$(CONFIG_PPC_ISERIES) += e2a.o obj-$(CONFIG_XMON) += sstep.o ifeq ($(CONFIG_PPC64),y) diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c deleted file mode 100644 index 4b72ed8fd50e..000000000000 --- a/arch/powerpc/lib/e2a.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * EBCDIC to ASCII conversion - * - * This function moved here from arch/powerpc/platforms/iseries/viopath.c - * - * (C) Copyright 2000-2004 IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) anyu later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include - -unsigned char e2a(unsigned char x) -{ - switch (x) { - case 0xF0: - return '0'; - case 0xF1: - return '1'; - case 0xF2: - return '2'; - case 0xF3: - return '3'; - case 0xF4: - return '4'; - case 0xF5: - return '5'; - case 0xF6: - return '6'; - case 0xF7: - return '7'; - case 0xF8: - return '8'; - case 0xF9: - return '9'; - case 0xC1: - return 'A'; - case 0xC2: - return 'B'; - case 0xC3: - return 'C'; - case 0xC4: - return 'D'; - case 0xC5: - return 'E'; - case 0xC6: - return 'F'; - case 0xC7: - return 'G'; - case 0xC8: - return 'H'; - case 0xC9: - return 'I'; - case 0xD1: - return 'J'; - case 0xD2: - return 'K'; - case 0xD3: - return 'L'; - case 0xD4: - return 'M'; - case 0xD5: - return 'N'; - case 0xD6: - return 'O'; - case 0xD7: - return 'P'; - case 0xD8: - return 'Q'; - case 0xD9: - return 'R'; - case 0xE2: - return 'S'; - case 0xE3: - return 'T'; - case 0xE4: - return 'U'; - case 0xE5: - return 'V'; - case 0xE6: - return 'W'; - case 0xE7: - return 'X'; - case 0xE8: - return 'Y'; - case 0xE9: - return 'Z'; - } - return ' '; -} -EXPORT_SYMBOL(e2a); - -unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n) -{ - int i; - - n = strnlen(src, n); - - for (i = 0; i < n; i++) - dest[i] = e2a(src[i]); - - return dest; -} diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index d194140c1ebf..39c676ab9d6d 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2005-2006 Michael Ellerman, IBM Corporation + * Copyright (C) 2005-2006 Michael Ellerman, IBM Corporation + * Copyright (C) 2000-2004, IBM Corporation * * Description: * This file contains all the routines to build a flattened device @@ -76,6 +77,101 @@ static char __initdata device_type_pci[] = "pci"; static char __initdata device_type_vdevice[] = "vdevice"; static char __initdata device_type_vscsi[] = "vscsi"; + +/* EBCDIC to ASCII conversion routines */ + +unsigned char e2a(unsigned char x) +{ + switch (x) { + case 0xF0: + return '0'; + case 0xF1: + return '1'; + case 0xF2: + return '2'; + case 0xF3: + return '3'; + case 0xF4: + return '4'; + case 0xF5: + return '5'; + case 0xF6: + return '6'; + case 0xF7: + return '7'; + case 0xF8: + return '8'; + case 0xF9: + return '9'; + case 0xC1: + return 'A'; + case 0xC2: + return 'B'; + case 0xC3: + return 'C'; + case 0xC4: + return 'D'; + case 0xC5: + return 'E'; + case 0xC6: + return 'F'; + case 0xC7: + return 'G'; + case 0xC8: + return 'H'; + case 0xC9: + return 'I'; + case 0xD1: + return 'J'; + case 0xD2: + return 'K'; + case 0xD3: + return 'L'; + case 0xD4: + return 'M'; + case 0xD5: + return 'N'; + case 0xD6: + return 'O'; + case 0xD7: + return 'P'; + case 0xD8: + return 'Q'; + case 0xD9: + return 'R'; + case 0xE2: + return 'S'; + case 0xE3: + return 'T'; + case 0xE4: + return 'U'; + case 0xE5: + return 'V'; + case 0xE6: + return 'W'; + case 0xE7: + return 'X'; + case 0xE8: + return 'Y'; + case 0xE9: + return 'Z'; + } + return ' '; +} +EXPORT_SYMBOL(e2a); + +unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n) +{ + int i; + + n = strnlen(src, n); + + for (i = 0; i < n; i++) + dest[i] = e2a(src[i]); + + return dest; +} + static struct iseries_flat_dt * __init dt_init(void) { struct iseries_flat_dt *dt; diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index d075725bf444..5deb7bc7bb1f 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -169,11 +169,6 @@ extern u32 booke_wdt_enabled; extern u32 booke_wdt_period; #endif /* CONFIG_BOOKE_WDT */ -/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */ -extern unsigned char e2a(unsigned char); -extern unsigned char* strne2a(unsigned char *dest, - const unsigned char *src, size_t n); - struct device_node; extern void note_scsi_host(struct device_node *, void *); From a892e5d7fa7fb893b5873f7150a83f6f1ee141b5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:06 +1000 Subject: [PATCH 0022/1063] [POWERPC] iseries: Cleanup e2a() and strne2a() e2a() was formally used by lparcfg, and so had to be exported, but isn't anymore, so don't. e2a() and strne2a() can both be static, and __init. And e2a can be made much more concise if we use x ... y case labels, while we're there add support for lower case letters. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/dt.c | 92 ++++++----------------------- 1 file changed, 17 insertions(+), 75 deletions(-) diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index 39c676ab9d6d..c5f59a8b9ef3 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -80,87 +80,29 @@ static char __initdata device_type_vscsi[] = "vscsi"; /* EBCDIC to ASCII conversion routines */ -unsigned char e2a(unsigned char x) +static unsigned char __init e2a(unsigned char x) { switch (x) { - case 0xF0: - return '0'; - case 0xF1: - return '1'; - case 0xF2: - return '2'; - case 0xF3: - return '3'; - case 0xF4: - return '4'; - case 0xF5: - return '5'; - case 0xF6: - return '6'; - case 0xF7: - return '7'; - case 0xF8: - return '8'; - case 0xF9: - return '9'; - case 0xC1: - return 'A'; - case 0xC2: - return 'B'; - case 0xC3: - return 'C'; - case 0xC4: - return 'D'; - case 0xC5: - return 'E'; - case 0xC6: - return 'F'; - case 0xC7: - return 'G'; - case 0xC8: - return 'H'; - case 0xC9: - return 'I'; - case 0xD1: - return 'J'; - case 0xD2: - return 'K'; - case 0xD3: - return 'L'; - case 0xD4: - return 'M'; - case 0xD5: - return 'N'; - case 0xD6: - return 'O'; - case 0xD7: - return 'P'; - case 0xD8: - return 'Q'; - case 0xD9: - return 'R'; - case 0xE2: - return 'S'; - case 0xE3: - return 'T'; - case 0xE4: - return 'U'; - case 0xE5: - return 'V'; - case 0xE6: - return 'W'; - case 0xE7: - return 'X'; - case 0xE8: - return 'Y'; - case 0xE9: - return 'Z'; + case 0x81 ... 0x89: + return x - 0x81 + 'a'; + case 0x91 ... 0x99: + return x - 0x91 + 'j'; + case 0xA2 ... 0xA9: + return x - 0xA2 + 's'; + case 0xC1 ... 0xC9: + return x - 0xC1 + 'A'; + case 0xD1 ... 0xD9: + return x - 0xD1 + 'J'; + case 0xE2 ... 0xE9: + return x - 0xE2 + 'S'; + case 0xF0 ... 0xF9: + return x - 0xF0 + '0'; } return ' '; } -EXPORT_SYMBOL(e2a); -unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n) +static unsigned char * __init strne2a(unsigned char *dest, + const unsigned char *src, size_t n) { int i; From c59acae85409fdf5d7574e90009c8410daf38938 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:09 +1000 Subject: [PATCH 0023/1063] [POWERPC] iseries: Make ItExtVpdPanel private to iSeries No one outside platforms/iseries needs ItExtVpdPanel anymore, so move it in there. It used to be needed by lparcfg, and so was exported, but isn't needed anymore, so unexport it. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/lparcfg.c | 1 - arch/powerpc/platforms/iseries/dt.c | 2 +- .../powerpc/platforms}/iseries/it_exp_vpd_panel.h | 6 +++--- arch/powerpc/platforms/iseries/lpardata.c | 3 +-- 4 files changed, 5 insertions(+), 7 deletions(-) rename {include/asm-powerpc => arch/powerpc/platforms}/iseries/it_exp_vpd_panel.h (89%) diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 23f34daa044a..2d94b372d49b 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index c5f59a8b9ef3..aa582262aefd 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -34,13 +34,13 @@ #include #include #include -#include #include #include "processor_vpd.h" #include "call_hpt.h" #include "call_pci.h" #include "pci.h" +#include "it_exp_vpd_panel.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) diff --git a/include/asm-powerpc/iseries/it_exp_vpd_panel.h b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h similarity index 89% rename from include/asm-powerpc/iseries/it_exp_vpd_panel.h rename to arch/powerpc/platforms/iseries/it_exp_vpd_panel.h index 304a609ae21a..6de9097b7f57 100644 --- a/include/asm-powerpc/iseries/it_exp_vpd_panel.h +++ b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h @@ -15,8 +15,8 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H -#define _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H +#ifndef _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H +#define _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H /* * This struct maps the panel information @@ -48,4 +48,4 @@ struct ItExtVpdPanel { extern struct ItExtVpdPanel xItExtVpdPanel; -#endif /* _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H */ +#endif /* _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H */ diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c index a7769445d6c7..13e9bd132254 100644 --- a/arch/powerpc/platforms/iseries/lpardata.c +++ b/arch/powerpc/platforms/iseries/lpardata.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "naca.h" @@ -27,6 +26,7 @@ #include "ipl_parms.h" #include "processor_vpd.h" #include "release_data.h" +#include "it_exp_vpd_panel.h" /* The HvReleaseData is the root of the information shared between * the hypervisor and Linux. @@ -134,7 +134,6 @@ struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data"))); /* May be filled in by the hypervisor so cannot end up in the BSS */ struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data"))); -EXPORT_SYMBOL(xItExtVpdPanel); #define maxPhysicalProcessors 32 From a2ced11b6af59854cc2a2791dccd8b6c0da2f733 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:12 +1000 Subject: [PATCH 0024/1063] [POWERPC] iseries: Make HvLpConfig_get(Primary)LpIndex functions HvLpConfig_get(Primary)LpIndex are currently static inlines that return fields from the itLpNaca, if we make them real functions we can make the itLpNaca private to iSeries. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/hvlpconfig.c | 13 +++++++++++++ arch/powerpc/platforms/iseries/setup.c | 1 + include/asm-powerpc/iseries/hv_lp_config.h | 13 ++----------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c index 663a1affb4bb..cfcedaae6ea2 100644 --- a/arch/powerpc/platforms/iseries/hvlpconfig.c +++ b/arch/powerpc/platforms/iseries/hvlpconfig.c @@ -18,9 +18,22 @@ #include #include +#include HvLpIndex HvLpConfig_getLpIndex_outline(void) { return HvLpConfig_getLpIndex(); } EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline); + +HvLpIndex HvLpConfig_getLpIndex(void) +{ + return itLpNaca.xLpIndex; +} +EXPORT_SYMBOL(HvLpConfig_getLpIndex); + +HvLpIndex HvLpConfig_getPrimaryLpIndex(void) +{ + return itLpNaca.xPrimaryLpIndex; +} +EXPORT_SYMBOL_GPL(HvLpConfig_getPrimaryLpIndex); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index c9605d773a77..c34299b18d8b 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/include/asm-powerpc/iseries/hv_lp_config.h b/include/asm-powerpc/iseries/hv_lp_config.h index df8b20739719..a006fd1e4a2c 100644 --- a/include/asm-powerpc/iseries/hv_lp_config.h +++ b/include/asm-powerpc/iseries/hv_lp_config.h @@ -25,7 +25,6 @@ #include #include -#include enum { HvCallCfg_Cur = 0, @@ -44,16 +43,8 @@ enum { #define HvCallCfgGetHostingLpIndex HvCallCfg + 32 extern HvLpIndex HvLpConfig_getLpIndex_outline(void); - -static inline HvLpIndex HvLpConfig_getLpIndex(void) -{ - return itLpNaca.xLpIndex; -} - -static inline HvLpIndex HvLpConfig_getPrimaryLpIndex(void) -{ - return itLpNaca.xPrimaryLpIndex; -} +extern HvLpIndex HvLpConfig_getLpIndex(void); +extern HvLpIndex HvLpConfig_getPrimaryLpIndex(void); static inline u64 HvLpConfig_getMsChunks(void) { From 06a36db1d712242a00cb30aaebdd088b4be28082 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:52:17 +1000 Subject: [PATCH 0025/1063] [POWERPC] iseries: Move ItLpNaca into platforms/iseries Move ItLpNaca into platforms/iseries now that it's not used elsewhere. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/platforms/iseries/hvlpconfig.c | 2 +- .../powerpc/platforms}/iseries/it_lp_naca.h | 6 +++--- arch/powerpc/platforms/iseries/lpardata.c | 3 +-- arch/powerpc/platforms/iseries/lpevents.c | 2 +- arch/powerpc/platforms/iseries/setup.c | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) rename {include/asm-powerpc => arch/powerpc/platforms}/iseries/it_lp_naca.h (96%) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fd1785e4c9bb..e2447aef3a8f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c index cfcedaae6ea2..f0475f0b1853 100644 --- a/arch/powerpc/platforms/iseries/hvlpconfig.c +++ b/arch/powerpc/platforms/iseries/hvlpconfig.c @@ -18,7 +18,7 @@ #include #include -#include +#include "it_lp_naca.h" HvLpIndex HvLpConfig_getLpIndex_outline(void) { diff --git a/include/asm-powerpc/iseries/it_lp_naca.h b/arch/powerpc/platforms/iseries/it_lp_naca.h similarity index 96% rename from include/asm-powerpc/iseries/it_lp_naca.h rename to arch/powerpc/platforms/iseries/it_lp_naca.h index 4fdcf052927f..9bbf58986819 100644 --- a/include/asm-powerpc/iseries/it_lp_naca.h +++ b/arch/powerpc/platforms/iseries/it_lp_naca.h @@ -15,8 +15,8 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_POWERPC_ISERIES_IT_LP_NACA_H -#define _ASM_POWERPC_ISERIES_IT_LP_NACA_H +#ifndef _PLATFORMS_ISERIES_IT_LP_NACA_H +#define _PLATFORMS_ISERIES_IT_LP_NACA_H #include @@ -77,4 +77,4 @@ extern struct ItLpNaca itLpNaca; #define ITLPNACA_HWSYNCEDTBS 0x20 /* Hardware synced TBs */ #define ITLPNACA_HMTINT 0x10 /* Utilize MHT for interrupts */ -#endif /* _ASM_POWERPC_ISERIES_IT_LP_NACA_H */ +#endif /* _PLATFORMS_ISERIES_IT_LP_NACA_H */ diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c index 13e9bd132254..8162049bb04d 100644 --- a/arch/powerpc/platforms/iseries/lpardata.c +++ b/arch/powerpc/platforms/iseries/lpardata.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include "processor_vpd.h" #include "release_data.h" #include "it_exp_vpd_panel.h" +#include "it_lp_naca.h" /* The HvReleaseData is the root of the information shared between * the hypervisor and Linux. @@ -127,7 +127,6 @@ struct ItLpNaca itLpNaca = { (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */ } }; -EXPORT_SYMBOL(itLpNaca); /* May be filled in by the hypervisor so cannot end up in the BSS */ struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data"))); diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c index 2a9f81ea27d6..98c1c2440aad 100644 --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include "it_lp_naca.h" /* * The LpQueue is used to pass event data from the hypervisor to diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index c34299b18d8b..7f1953066ff8 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -60,6 +59,7 @@ #include "irq.h" #include "vpd_areas.h" #include "processor_vpd.h" +#include "it_lp_naca.h" #include "main_store.h" #include "call_sm.h" #include "call_hpt.h" From f357b4cc5826ae55a5f3893424502cb15c6b6eba Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:54:39 +1000 Subject: [PATCH 0026/1063] [POWERPC] iseries: Fix a compiler warning in platforms/iseries/vpdinfo.c iSeries_Get_Location_Code() has error paths, but currently returns void, so give it a return code and only print the output if it returns successfully. Gcc isn't smart enough to be quiet though, so set frame to 0 to shut it up. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/vpdinfo.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c index 23a6d1e5b429..ba7f6a62c3bf 100644 --- a/arch/powerpc/platforms/iseries/vpdinfo.c +++ b/arch/powerpc/platforms/iseries/vpdinfo.c @@ -205,15 +205,16 @@ static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen, } } -static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent, +static int __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent, u8 *frame, char card[4]) { + int status = 0; int BusVpdLen = 0; u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL); if (BusVpdPtr == NULL) { printk("PCI: Bus VPD Buffer allocation failure.\n"); - return; + return 0; } BusVpdLen = HvCallPci_getBusVpd(bus, iseries_hv_addr(BusVpdPtr), BUS_VPDSIZE); @@ -228,8 +229,10 @@ static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent, goto out_free; } iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card); + status = 1; out_free: kfree(BusVpdPtr); + return status; } /* @@ -246,7 +249,7 @@ void __init iSeries_Device_Information(struct pci_dev *PciDev, int count) struct device_node *DevNode = PciDev->sysdata; struct pci_dn *pdn; u16 bus; - u8 frame; + u8 frame = 0; char card[4]; HvSubBusNumber subbus; HvAgentId agent; @@ -262,10 +265,11 @@ void __init iSeries_Device_Information(struct pci_dev *PciDev, int count) subbus = pdn->bussubno; agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus), ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus)); - iSeries_Get_Location_Code(bus, agent, &frame, card); - printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s ", - count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor, - frame, card); - printk("0x%04X\n", (int)(PciDev->class >> 8)); + if (iSeries_Get_Location_Code(bus, agent, &frame, card)) { + printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, " + "Card %4s 0x%04X\n", count, bus, + PCI_SLOT(PciDev->devfn), PciDev->vendor, frame, + card, (int)(PciDev->class >> 8)); + } } From 463c61928c453c2998d39b683c86385ee877c289 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:54:44 +1000 Subject: [PATCH 0027/1063] [POWERPC] iseries: Fix a compiler warning in platforms/iseries/vpdinfo.c PhbId might be used unitialised, so set it to 0xff (nothing) always. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/vpdinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c index ba7f6a62c3bf..9f83878a0c2e 100644 --- a/arch/powerpc/platforms/iseries/vpdinfo.c +++ b/arch/powerpc/platforms/iseries/vpdinfo.c @@ -188,7 +188,7 @@ static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen, { u8 *TagPtr = VpdData; int DataLen = VpdDataLen - 3; - u8 PhbId; + u8 PhbId = 0xff; while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) { int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256); From 2d69ff32ebf3dff9e9b48bbbbafe2b9b6f188d48 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Jul 2006 17:54:44 +1000 Subject: [PATCH 0028/1063] [POWERPC] Fix a compiler warning in mm/tlb_64.c The compiler doesn't understand that BUG() never returns, so complains that psize isn't set. Just set it to the normal value, which seems to produce nice code and keeps gcc happy. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/mm/tlb_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index f6eef78efd29..b58baa65c4a7 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -146,6 +146,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, psize = mmu_huge_psize; #else BUG(); + psize = pte_pagesize_index(pte); /* shutup gcc */ #endif } else psize = pte_pagesize_index(pte); From 8bff05b052db7a4cfaaf0eee7f8145600548e9c9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 13 Jul 2006 18:51:22 +1000 Subject: [PATCH 0029/1063] [POWERPC] iseries: A new iSeries console This driver uses the hvc_console.c infrastructure that is used by the pSeries virtual and RTAS consoles. This will allow us to make viocons.c obsolete and is another step along the way to a combined kernel (as viocons could not coexist with CONFIG_VT). Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/Kconfig | 8 +- arch/powerpc/platforms/iseries/dt.c | 3 +- drivers/char/Kconfig | 7 + drivers/char/Makefile | 1 + drivers/char/hvc_iseries.c | 593 +++++++++++++++++++++++++ drivers/char/viocons.c | 31 +- include/asm-powerpc/iseries/vio.h | 28 ++ 7 files changed, 638 insertions(+), 33 deletions(-) create mode 100644 drivers/char/hvc_iseries.c diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig index 3d957a30c8c2..887b68804e6d 100644 --- a/arch/powerpc/platforms/iseries/Kconfig +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -3,13 +3,17 @@ menu "iSeries device drivers" depends on PPC_ISERIES config VIOCONS - tristate "iSeries Virtual Console Support" + tristate "iSeries Virtual Console Support (Obsolete)" + help + This is the old virtual console driver for legacy iSeries. + You should use the iSeries Hypervisor Virtual Console + support instead. config VIODASD tristate "iSeries Virtual I/O disk support" help If you are running on an iSeries system and you want to use - virtual disks created and managed by OS/400, say Y. + virtual disks created and managed by OS/400, say Y. config VIOCD tristate "iSeries Virtual I/O CD support" diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c index d194140c1ebf..894b534669d0 100644 --- a/arch/powerpc/platforms/iseries/dt.c +++ b/arch/powerpc/platforms/iseries/dt.c @@ -298,7 +298,8 @@ static void __init dt_vdevices(struct iseries_flat_dt *dt) dt_prop_u32(dt, "#address-cells", 1); dt_prop_u32(dt, "#size-cells", 0); - dt_do_vdevice(dt, "vty", reg, -1, device_type_serial, NULL, 1); + dt_do_vdevice(dt, "vty", reg, -1, device_type_serial, + "IBM,iSeries-vty", 1); reg++; dt_do_vdevice(dt, "v-scsi", reg, -1, device_type_vscsi, diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 11de59ff4229..a7ef542afbc2 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -610,6 +610,13 @@ config HVC_CONSOLE console. This driver allows each pSeries partition to have a console which is accessed via the HMC. +config HVC_ISERIES + bool "iSeries Hypervisor Virtual Console support" + depends on PPC_ISERIES && !VIOCONS + select HVC_DRIVER + help + iSeries machines support a hypervisor virtual console. + config HVC_RTAS bool "IBM RTAS Console support" depends on PPC_RTAS diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 7a7ee5721279..8c6dfc621520 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o obj-$(CONFIG_SX) += sx.o generic_serial.o obj-$(CONFIG_RIO) += rio/ generic_serial.o obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o +obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o obj-$(CONFIG_RAW_DRIVER) += raw.o diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c new file mode 100644 index 000000000000..256afc8e5838 --- /dev/null +++ b/drivers/char/hvc_iseries.c @@ -0,0 +1,593 @@ +/* + * iSeries vio driver interface to hvc_console.c + * + * This code is based heavily on hvc_vio.c and viocons.c + * + * Copyright (C) 2006 Stephen Rothwell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "hvc_console.h" + +#define VTTY_PORTS 10 + +static DEFINE_SPINLOCK(consolelock); +static DEFINE_SPINLOCK(consoleloglock); + +static const char hvc_driver_name[] = "hvc_console"; + +#define IN_BUF_SIZE 200 + +/* + * Our port information. + */ +static struct port_info { + HvLpIndex lp; + u64 seq; /* sequence number of last HV send */ + u64 ack; /* last ack from HV */ + struct hvc_struct *hp; + int in_start; + int in_end; + unsigned char in_buf[IN_BUF_SIZE]; +} port_info[VTTY_PORTS] = { + [ 0 ... VTTY_PORTS - 1 ] = { + .lp = HvLpIndexInvalid + } +}; + +#define viochar_is_console(pi) ((pi) == &port_info[0]) + +static struct vio_device_id hvc_driver_table[] __devinitdata = { + {"serial", "IBM,iSeries-vty"}, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, hvc_driver_table); + +static void hvlog(char *fmt, ...) +{ + int i; + unsigned long flags; + va_list args; + static char buf[256]; + + spin_lock_irqsave(&consoleloglock, flags); + va_start(args, fmt); + i = vscnprintf(buf, sizeof(buf) - 1, fmt, args); + va_end(args); + buf[i++] = '\r'; + HvCall_writeLogBuffer(buf, i); + spin_unlock_irqrestore(&consoleloglock, flags); +} + +/* + * Initialize the common fields in a charLpEvent + */ +static void init_data_event(struct viocharlpevent *viochar, HvLpIndex lp) +{ + struct HvLpEvent *hev = &viochar->event; + + memset(viochar, 0, sizeof(struct viocharlpevent)); + + hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DEFERRED_ACK | + HV_LP_EVENT_INT; + hev->xType = HvLpEvent_Type_VirtualIo; + hev->xSubtype = viomajorsubtype_chario | viochardata; + hev->xSourceLp = HvLpConfig_getLpIndex(); + hev->xTargetLp = lp; + hev->xSizeMinus1 = sizeof(struct viocharlpevent); + hev->xSourceInstanceId = viopath_sourceinst(lp); + hev->xTargetInstanceId = viopath_targetinst(lp); +} + +static int get_chars(uint32_t vtermno, char *buf, int count) +{ + struct port_info *pi; + int n = 0; + unsigned long flags; + + if (vtermno >= VTTY_PORTS) + return -EINVAL; + if (count == 0) + return 0; + + pi = &port_info[vtermno]; + spin_lock_irqsave(&consolelock, flags); + + if (pi->in_end == 0) + goto done; + + n = pi->in_end - pi->in_start; + if (n > count) + n = count; + memcpy(buf, &pi->in_buf[pi->in_start], n); + pi->in_start += n; + if (pi->in_start == pi->in_end) { + pi->in_start = 0; + pi->in_end = 0; + } +done: + spin_unlock_irqrestore(&consolelock, flags); + return n; +} + +static int put_chars(uint32_t vtermno, const char *buf, int count) +{ + struct viocharlpevent *viochar; + struct port_info *pi; + HvLpEvent_Rc hvrc; + unsigned long flags; + int sent = 0; + + if (vtermno >= VTTY_PORTS) + return -EINVAL; + + pi = &port_info[vtermno]; + + spin_lock_irqsave(&consolelock, flags); + + if (viochar_is_console(pi) && !viopath_isactive(pi->lp)) { + spin_lock_irqsave(&consoleloglock, flags); + HvCall_writeLogBuffer(buf, count); + spin_unlock_irqrestore(&consoleloglock, flags); + sent = count; + goto done; + } + + viochar = vio_get_event_buffer(viomajorsubtype_chario); + if (viochar == NULL) { + hvlog("\n\rviocons: Can't get viochar buffer."); + goto done; + } + + while ((count > 0) && ((pi->seq - pi->ack) < VIOCHAR_WINDOW)) { + int len; + + len = (count > VIOCHAR_MAX_DATA) ? VIOCHAR_MAX_DATA : count; + + if (viochar_is_console(pi)) { + spin_lock_irqsave(&consoleloglock, flags); + HvCall_writeLogBuffer(buf, len); + spin_unlock_irqrestore(&consoleloglock, flags); + } + + init_data_event(viochar, pi->lp); + + viochar->len = len; + viochar->event.xCorrelationToken = pi->seq++; + viochar->event.xSizeMinus1 = + offsetof(struct viocharlpevent, data) + len; + + memcpy(viochar->data, buf, len); + + hvrc = HvCallEvent_signalLpEvent(&viochar->event); + if (hvrc) + hvlog("\n\rerror sending event! return code %d\n\r", + (int)hvrc); + sent += len; + count -= len; + buf += len; + } + + vio_free_event_buffer(viomajorsubtype_chario, viochar); +done: + spin_unlock_irqrestore(&consolelock, flags); + return sent; +} + +static struct hv_ops hvc_get_put_ops = { + .get_chars = get_chars, + .put_chars = put_chars, +}; + +static int __devinit hvc_vio_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + struct hvc_struct *hp; + struct port_info *pi; + + /* probed with invalid parameters. */ + if (!vdev || !id) + return -EPERM; + + if (vdev->unit_address >= VTTY_PORTS) + return -ENODEV; + + pi = &port_info[vdev->unit_address]; + + hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops); + if (IS_ERR(hp)) + return PTR_ERR(hp); + pi->hp = hp; + dev_set_drvdata(&vdev->dev, pi); + + return 0; +} + +static int __devexit hvc_vio_remove(struct vio_dev *vdev) +{ + struct port_info *pi = dev_get_drvdata(&vdev->dev); + struct hvc_struct *hp = pi->hp; + + return hvc_remove(hp); +} + +static struct vio_driver hvc_vio_driver = { + .id_table = hvc_driver_table, + .probe = hvc_vio_probe, + .remove = hvc_vio_remove, + .driver = { + .name = hvc_driver_name, + .owner = THIS_MODULE, + } +}; + +static void hvc_open_event(struct HvLpEvent *event) +{ + unsigned long flags; + struct viocharlpevent *cevent = (struct viocharlpevent *)event; + u8 port = cevent->virtual_device; + struct port_info *pi; + int reject = 0; + + if (hvlpevent_is_ack(event)) { + if (port >= VTTY_PORTS) + return; + + spin_lock_irqsave(&consolelock, flags); + + pi = &port_info[port]; + if (event->xRc == HvLpEvent_Rc_Good) { + pi->seq = pi->ack = 0; + /* + * This line allows connections from the primary + * partition but once one is connected from the + * primary partition nothing short of a reboot + * of linux will allow access from the hosting + * partition again without a required iSeries fix. + */ + pi->lp = event->xTargetLp; + } + + spin_unlock_irqrestore(&consolelock, flags); + if (event->xRc != HvLpEvent_Rc_Good) + printk(KERN_WARNING + "hvc: handle_open_event: event->xRc == (%d).\n", + event->xRc); + + if (event->xCorrelationToken != 0) { + atomic_t *aptr= (atomic_t *)event->xCorrelationToken; + atomic_set(aptr, 1); + } else + printk(KERN_WARNING + "hvc: weird...got open ack without atomic\n"); + return; + } + + /* This had better require an ack, otherwise complain */ + if (!hvlpevent_need_ack(event)) { + printk(KERN_WARNING "hvc: viocharopen without ack bit!\n"); + return; + } + + spin_lock_irqsave(&consolelock, flags); + + /* Make sure this is a good virtual tty */ + if (port >= VTTY_PORTS) { + event->xRc = HvLpEvent_Rc_SubtypeError; + cevent->subtype_result_code = viorc_openRejected; + /* + * Flag state here since we can't printk while holding + * the consolelock spinlock. + */ + reject = 1; + } else { + pi = &port_info[port]; + if ((pi->lp != HvLpIndexInvalid) && + (pi->lp != event->xSourceLp)) { + /* + * If this is tty is already connected to a different + * partition, fail. + */ + event->xRc = HvLpEvent_Rc_SubtypeError; + cevent->subtype_result_code = viorc_openRejected; + reject = 2; + } else { + pi->lp = event->xSourceLp; + event->xRc = HvLpEvent_Rc_Good; + cevent->subtype_result_code = viorc_good; + pi->seq = pi->ack = 0; + } + } + + spin_unlock_irqrestore(&consolelock, flags); + + if (reject == 1) + printk(KERN_WARNING "hvc: open rejected: bad virtual tty.\n"); + else if (reject == 2) + printk(KERN_WARNING "hvc: open rejected: console in exclusive " + "use by another partition.\n"); + + /* Return the acknowledgement */ + HvCallEvent_ackLpEvent(event); +} + +/* + * Handle a close charLpEvent. This should ONLY be an Interrupt because the + * virtual console should never actually issue a close event to the hypervisor + * because the virtual console never goes away. A close event coming from the + * hypervisor simply means that there are no client consoles connected to the + * virtual console. + */ +static void hvc_close_event(struct HvLpEvent *event) +{ + unsigned long flags; + struct viocharlpevent *cevent = (struct viocharlpevent *)event; + u8 port = cevent->virtual_device; + + if (!hvlpevent_is_int(event)) { + printk(KERN_WARNING + "hvc: got unexpected close acknowlegement\n"); + return; + } + + if (port >= VTTY_PORTS) { + printk(KERN_WARNING + "hvc: close message from invalid virtual device.\n"); + return; + } + + /* For closes, just mark the console partition invalid */ + spin_lock_irqsave(&consolelock, flags); + + if (port_info[port].lp == event->xSourceLp) + port_info[port].lp = HvLpIndexInvalid; + + spin_unlock_irqrestore(&consolelock, flags); +} + +static void hvc_data_event(struct HvLpEvent *event) +{ + unsigned long flags; + struct viocharlpevent *cevent = (struct viocharlpevent *)event; + struct port_info *pi; + int n; + u8 port = cevent->virtual_device; + + if (port >= VTTY_PORTS) { + printk(KERN_WARNING "hvc: data on invalid virtual device %d\n", + port); + return; + } + if (cevent->len == 0) + return; + + /* + * Change 05/01/2003 - Ryan Arnold: If a partition other than + * the current exclusive partition tries to send us data + * events then just drop them on the floor because we don't + * want his stinking data. He isn't authorized to receive + * data because he wasn't the first one to get the console, + * therefore he shouldn't be allowed to send data either. + * This will work without an iSeries fix. + */ + pi = &port_info[port]; + if (pi->lp != event->xSourceLp) + return; + + spin_lock_irqsave(&consolelock, flags); + + n = IN_BUF_SIZE - pi->in_end; + if (n > cevent->len) + n = cevent->len; + if (n > 0) { + memcpy(&pi->in_buf[pi->in_end], cevent->data, n); + pi->in_end += n; + } + spin_unlock_irqrestore(&consolelock, flags); + if (n == 0) + printk(KERN_WARNING "hvc: input buffer overflow\n"); +} + +static void hvc_ack_event(struct HvLpEvent *event) +{ + struct viocharlpevent *cevent = (struct viocharlpevent *)event; + unsigned long flags; + u8 port = cevent->virtual_device; + + if (port >= VTTY_PORTS) { + printk(KERN_WARNING "hvc: data on invalid virtual device\n"); + return; + } + + spin_lock_irqsave(&consolelock, flags); + port_info[port].ack = event->xCorrelationToken; + spin_unlock_irqrestore(&consolelock, flags); +} + +static void hvc_config_event(struct HvLpEvent *event) +{ + struct viocharlpevent *cevent = (struct viocharlpevent *)event; + + if (cevent->data[0] == 0x01) + printk(KERN_INFO "hvc: window resized to %d: %d: %d: %d\n", + cevent->data[1], cevent->data[2], + cevent->data[3], cevent->data[4]); + else + printk(KERN_WARNING "hvc: unknown config event\n"); +} + +static void hvc_handle_event(struct HvLpEvent *event) +{ + int charminor; + + if (event == NULL) + return; + + charminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK; + switch (charminor) { + case viocharopen: + hvc_open_event(event); + break; + case viocharclose: + hvc_close_event(event); + break; + case viochardata: + hvc_data_event(event); + break; + case viocharack: + hvc_ack_event(event); + break; + case viocharconfig: + hvc_config_event(event); + break; + default: + if (hvlpevent_is_int(event) && hvlpevent_need_ack(event)) { + event->xRc = HvLpEvent_Rc_InvalidSubtype; + HvCallEvent_ackLpEvent(event); + } + } +} + +static int send_open(HvLpIndex remoteLp, void *sem) +{ + return HvCallEvent_signalLpEventFast(remoteLp, + HvLpEvent_Type_VirtualIo, + viomajorsubtype_chario | viocharopen, + HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck, + viopath_sourceinst(remoteLp), + viopath_targetinst(remoteLp), + (u64)(unsigned long)sem, VIOVERSION << 16, + 0, 0, 0, 0); +} + +static int hvc_vio_init(void) +{ + atomic_t wait_flag; + int rc; + + /* +2 for fudge */ + rc = viopath_open(HvLpConfig_getPrimaryLpIndex(), + viomajorsubtype_chario, VIOCHAR_WINDOW + 2); + if (rc) + printk(KERN_WARNING "hvc: error opening to primary %d\n", rc); + + if (viopath_hostLp == HvLpIndexInvalid) + vio_set_hostlp(); + + /* + * And if the primary is not the same as the hosting LP, open to the + * hosting lp + */ + if ((viopath_hostLp != HvLpIndexInvalid) && + (viopath_hostLp != HvLpConfig_getPrimaryLpIndex())) { + printk(KERN_INFO "hvc: open path to hosting (%d)\n", + viopath_hostLp); + rc = viopath_open(viopath_hostLp, viomajorsubtype_chario, + VIOCHAR_WINDOW + 2); /* +2 for fudge */ + if (rc) + printk(KERN_WARNING + "error opening to partition %d: %d\n", + viopath_hostLp, rc); + } + + if (vio_setHandler(viomajorsubtype_chario, hvc_handle_event) < 0) + printk(KERN_WARNING + "hvc: error seting handler for console events!\n"); + + /* + * First, try to open the console to the hosting lp. + * Wait on a semaphore for the response. + */ + atomic_set(&wait_flag, 0); + if ((viopath_isactive(viopath_hostLp)) && + (send_open(viopath_hostLp, &wait_flag) == 0)) { + printk(KERN_INFO "hvc: hosting partition %d\n", viopath_hostLp); + while (atomic_read(&wait_flag) == 0) + mb(); + atomic_set(&wait_flag, 0); + } + + /* + * If we don't have an active console, try the primary + */ + if ((!viopath_isactive(port_info[0].lp)) && + (viopath_isactive(HvLpConfig_getPrimaryLpIndex())) && + (send_open(HvLpConfig_getPrimaryLpIndex(), &wait_flag) == 0)) { + printk(KERN_INFO "hvc: opening console to primary partition\n"); + while (atomic_read(&wait_flag) == 0) + mb(); + } + + /* Register as a vio device to receive callbacks */ + rc = vio_register_driver(&hvc_vio_driver); + + return rc; +} +module_init(hvc_vio_init); /* after drivers/char/hvc_console.c */ + +static void hvc_vio_exit(void) +{ + vio_unregister_driver(&hvc_vio_driver); +} +module_exit(hvc_vio_exit); + +/* the device tree order defines our numbering */ +static int hvc_find_vtys(void) +{ + struct device_node *vty; + int num_found = 0; + + for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; + vty = of_find_node_by_name(vty, "vty")) { + uint32_t *vtermno; + + /* We have statically defined space for only a certain number + * of console adapters. + */ + if ((num_found >= MAX_NR_HVC_CONSOLES) || + (num_found >= VTTY_PORTS)) + break; + + vtermno = (uint32_t *)get_property(vty, "reg", NULL); + if (!vtermno) + continue; + + if (!device_is_compatible(vty, "IBM,iSeries-vty")) + continue; + + if (num_found == 0) + add_preferred_console("hvc", 0, NULL); + hvc_instantiate(*vtermno, num_found, &hvc_get_put_ops); + ++num_found; + } + + return num_found; +} +console_initcall(hvc_find_vtys); diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c index 766f7864c6c6..f3efeaf2826e 100644 --- a/drivers/char/viocons.c +++ b/drivers/char/viocons.c @@ -43,7 +43,6 @@ #include #include - #include #include #include @@ -67,35 +66,6 @@ static int vio_sysrq_pressed; extern int sysrq_enabled; #endif -/* - * The structure of the events that flow between us and OS/400. You can't - * mess with this unless the OS/400 side changes too - */ -struct viocharlpevent { - struct HvLpEvent event; - u32 reserved; - u16 version; - u16 subtype_result_code; - u8 virtual_device; - u8 len; - u8 data[VIOCHAR_MAX_DATA]; -}; - -#define VIOCHAR_WINDOW 10 -#define VIOCHAR_HIGHWATERMARK 3 - -enum viocharsubtype { - viocharopen = 0x0001, - viocharclose = 0x0002, - viochardata = 0x0003, - viocharack = 0x0004, - viocharconfig = 0x0005 -}; - -enum viochar_rc { - viochar_rc_ebusy = 1 -}; - #define VIOCHAR_NUM_BUF 16 /* @@ -1183,6 +1153,7 @@ static int __init viocons_init(void) port_info[i].magic = VIOTTY_MAGIC; } HvCall_setLogBufferFormatAndCodepage(HvCall_LogBuffer_ASCII, 437); + add_preferred_console("viocons", 0, NULL); register_console(&viocons_early); return 0; } diff --git a/include/asm-powerpc/iseries/vio.h b/include/asm-powerpc/iseries/vio.h index 72a97d37aac3..7a95d296abd1 100644 --- a/include/asm-powerpc/iseries/vio.h +++ b/include/asm-powerpc/iseries/vio.h @@ -122,6 +122,34 @@ enum viorc { viorc_openRejected = 0x0301 }; +/* + * The structure of the events that flow between us and OS/400 for chario + * events. You can't mess with this unless the OS/400 side changes too. + */ +struct viocharlpevent { + struct HvLpEvent event; + u32 reserved; + u16 version; + u16 subtype_result_code; + u8 virtual_device; + u8 len; + u8 data[VIOCHAR_MAX_DATA]; +}; + +#define VIOCHAR_WINDOW 10 + +enum viocharsubtype { + viocharopen = 0x0001, + viocharclose = 0x0002, + viochardata = 0x0003, + viocharack = 0x0004, + viocharconfig = 0x0005 +}; + +enum viochar_rc { + viochar_rc_ebusy = 1 +}; + struct device; extern struct device *iSeries_vio_dev; From 4e9e95a3554e98e7383a3591283ffcd850c9ef48 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 13 Jul 2006 18:53:32 +1000 Subject: [PATCH 0030/1063] [POWERPC] Make the hvc_console output buffer size settable So the iSeries console will be faster since it can send up to 200 bytes at a time to the Hypervisor. This only affects the tty part of the console, the console writes are still in 16 byte lots. Signed-off-by: Stephen Rothwell --- drivers/char/hvc_console.c | 14 +++++++++----- drivers/char/hvc_console.h | 2 +- drivers/char/hvc_iseries.c | 3 ++- drivers/char/hvc_rtas.c | 2 +- drivers/char/hvc_vio.c | 3 ++- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index ca2f538e549e..dbee8bed0530 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -80,7 +80,8 @@ struct hvc_struct { struct tty_struct *tty; unsigned int count; int do_wakeup; - char outbuf[N_OUTBUF] __ALIGNED__; + char *outbuf; + int outbuf_size; int n_outbuf; uint32_t vtermno; struct hv_ops *ops; @@ -505,7 +506,7 @@ static int hvc_write(struct tty_struct *tty, const unsigned char *buf, int count if (hp->n_outbuf > 0) hvc_push(hp); - while (count > 0 && (rsize = N_OUTBUF - hp->n_outbuf) > 0) { + while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) { if (rsize > count) rsize = count; memcpy(hp->outbuf + hp->n_outbuf, buf, rsize); @@ -538,7 +539,7 @@ static int hvc_write_room(struct tty_struct *tty) if (!hp) return -1; - return N_OUTBUF - hp->n_outbuf; + return hp->outbuf_size - hp->n_outbuf; } static int hvc_chars_in_buffer(struct tty_struct *tty) @@ -728,12 +729,13 @@ static struct kobj_type hvc_kobj_type = { }; struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, - struct hv_ops *ops) + struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; int i; - hp = kmalloc(sizeof(*hp), GFP_KERNEL); + hp = kmalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size, + GFP_KERNEL); if (!hp) return ERR_PTR(-ENOMEM); @@ -742,6 +744,8 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, hp->vtermno = vtermno; hp->irq = irq; hp->ops = ops; + hp->outbuf_size = outbuf_size; + hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; kobject_init(&hp->kobj); hp->kobj.ktype = &hvc_kobj_type; diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 96b7401319c1..8c59818050e6 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -56,7 +56,7 @@ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, - struct hv_ops *ops); + struct hv_ops *ops, int outbuf_size); /* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index 256afc8e5838..4747729459c7 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -221,7 +221,8 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev, pi = &port_info[vdev->unit_address]; - hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops); + hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops, + VIOCHAR_MAX_DATA); if (IS_ERR(hp)) return PTR_ERR(hp); pi->hp = hp; diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c index 57106e02fd2e..4b97eaf18602 100644 --- a/drivers/char/hvc_rtas.c +++ b/drivers/char/hvc_rtas.c @@ -94,7 +94,7 @@ static int hvc_rtas_init(void) /* Allocate an hvc_struct for the console device we instantiated * earlier. Save off hp so that we can return it on exit */ - hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops); + hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops, 16); if (IS_ERR(hp)) return PTR_ERR(hp); diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 9add81ceb440..651e5d25f58b 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -90,7 +90,8 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev, if (!vdev || !id) return -EPERM; - hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops); + hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops, + MAX_VIO_PUT_CHARS); if (IS_ERR(hp)) return PTR_ERR(hp); dev_set_drvdata(&vdev->dev, hp); From 380ed24b1b81a188c5b716286143157a27935aab Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 13 Jul 2006 18:56:00 +1000 Subject: [PATCH 0031/1063] [POWERPC] iseries: Small viotape cleanup allowed by devfs removal Signed-off-by: Stephen Rothwell --- drivers/char/viotape.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index b72b2049aaae..73c78bf75d7f 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -940,7 +940,6 @@ static void vioHandleTapeEvent(struct HvLpEvent *event) static int viotape_probe(struct vio_dev *vdev, const struct vio_device_id *id) { - char tapename[32]; int i = vdev->unit_address; int j; @@ -956,10 +955,9 @@ static int viotape_probe(struct vio_dev *vdev, const struct vio_device_id *id) "iseries!vt%d", i); class_device_create(tape_class, NULL, MKDEV(VIOTAPE_MAJOR, i | 0x80), NULL, "iseries!nvt%d", i); - sprintf(tapename, "iseries/vt%d", i); - printk(VIOTAPE_KERN_INFO "tape %s is iSeries " + printk(VIOTAPE_KERN_INFO "tape iseries/vt%d is iSeries " "resource %10.10s type %4.4s, model %3.3s\n", - tapename, viotape_unitinfo[i].rsrcname, + i, viotape_unitinfo[i].rsrcname, viotape_unitinfo[i].type, viotape_unitinfo[i].model); return 0; } From 54f5cd8afa1c9c9f8b152a946b0a7e0ecdef1631 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 13 Jul 2006 18:56:56 +1000 Subject: [PATCH 0032/1063] [POWERPC] iseries: Remove unnecessary include of iseries/hv_lp_event.h Also remove unnecessary reference to struct HvLpEvent. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/asm-offsets.c | 1 - include/asm-powerpc/iseries/it_lp_queue.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7ee84968087b..ac0631958b20 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -40,7 +40,6 @@ #ifdef CONFIG_PPC64 #include #include -#include #include #include #endif diff --git a/include/asm-powerpc/iseries/it_lp_queue.h b/include/asm-powerpc/iseries/it_lp_queue.h index 284c5a7db3ac..3f6814769295 100644 --- a/include/asm-powerpc/iseries/it_lp_queue.h +++ b/include/asm-powerpc/iseries/it_lp_queue.h @@ -27,8 +27,6 @@ #include #include -struct HvLpEvent; - #define IT_LP_MAX_QUEUES 8 #define IT_LP_NOT_USED 0 /* Queue will not be used by PLIC */ From ca652c9396fa052815518e2b2ce2ebee6d9fb861 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Jul 2006 14:25:33 +1000 Subject: [PATCH 0033/1063] [POWERPC] iseries: Move iommu_table_cb into platforms/iseries Although we pass the address of an iommu_table_cb to HvCallXm_getTceTableParms, we don't actually need the structure definition anywhere except in the iseries iommu code, so move the struct in there. Signed-off-by: Michael Ellerman Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/iseries/iommu.c | 17 +++++++++++++++++ include/asm-powerpc/iseries/hv_call_xm.h | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index e3bd2015f2c9..2c3dbcd4613c 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -87,6 +87,23 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) } } +/* + * Structure passed to HvCallXm_getTceTableParms + */ +struct iommu_table_cb { + unsigned long itc_busno; /* Bus number for this tce table */ + unsigned long itc_start; /* Will be NULL for secondary */ + unsigned long itc_totalsize; /* Size (in pages) of whole table */ + unsigned long itc_offset; /* Index into real tce table of the + start of our section */ + unsigned long itc_size; /* Size (in pages) of our section */ + unsigned long itc_index; /* Index of this tce table */ + unsigned short itc_maxtables; /* Max num of tables for partition */ + unsigned char itc_virtbus; /* Flag to indicate virtual bus */ + unsigned char itc_slotno; /* IOA Tce Slot Index */ + unsigned char itc_rsvd[4]; +}; + /* * Call Hv with the architected data structure to get TCE table info. * info. Put the returned data into the Linux representation of the diff --git a/include/asm-powerpc/iseries/hv_call_xm.h b/include/asm-powerpc/iseries/hv_call_xm.h index ca9202cb01ed..392ac3f54df0 100644 --- a/include/asm-powerpc/iseries/hv_call_xm.h +++ b/include/asm-powerpc/iseries/hv_call_xm.h @@ -16,23 +16,6 @@ #define HvCallXmSetTce HvCallXm + 11 #define HvCallXmSetTces HvCallXm + 13 -/* - * Structure passed to HvCallXm_getTceTableParms - */ -struct iommu_table_cb { - unsigned long itc_busno; /* Bus number for this tce table */ - unsigned long itc_start; /* Will be NULL for secondary */ - unsigned long itc_totalsize; /* Size (in pages) of whole table */ - unsigned long itc_offset; /* Index into real tce table of the - start of our section */ - unsigned long itc_size; /* Size (in pages) of our section */ - unsigned long itc_index; /* Index of this tce table */ - unsigned short itc_maxtables; /* Max num of tables for partition */ - unsigned char itc_virtbus; /* Flag to indicate virtual bus */ - unsigned char itc_slotno; /* IOA Tce Slot Index */ - unsigned char itc_rsvd[4]; -}; - static inline void HvCallXm_getTceTableParms(u64 cb) { HvCall1(HvCallXmGetTceTableParms, cb); From 6a5a297cf78e64ed68577f3e3480bc10abf0124b Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Sat, 15 Jul 2006 13:05:24 +0100 Subject: [PATCH 0034/1063] MTD: [NAND] Fix the sharpsl driver after breakage from a core conversion The CNE bits are inverted on the device and writeb function is missing a NOT operation. Signed-off-by: Richard Purdie Signed-off-by: David Woodhouse --- drivers/mtd/nand/sharpsl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c index fbeedc3184e9..51c7288ab49a 100644 --- a/drivers/mtd/nand/sharpsl.c +++ b/drivers/mtd/nand/sharpsl.c @@ -78,7 +78,7 @@ static struct mtd_partition sharpsl_nand_default_partition_info[] = { /* * hardware specific access to control-lines * ctrl: - * NAND_CNE: bit 0 -> bit 0 & 4 + * NAND_CNE: bit 0 -> ! bit 0 & 4 * NAND_CLE: bit 1 -> bit 1 * NAND_ALE: bit 2 -> bit 2 * @@ -92,7 +92,10 @@ static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd, unsigned char bits = ctrl & 0x07; bits |= (ctrl & 0x01) << 4; - writeb((readb(FLASHCTL) & 0x17) | bits, FLASHCTL); + + bits ^= 0x11; + + writeb((readb(FLASHCTL) & ~0x17) | bits, FLASHCTL); } if (cmd != NAND_CMD_NONE) From 9d05cd51780c3855976b26cbee265490a0a10be9 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Fri, 14 Jul 2006 14:39:06 +0200 Subject: [PATCH 0035/1063] remove #error on !PCI from pmc551.c PMC551 depends on PCI in Kconfig so there is no need to #error in code if PCI is not set. Signed-off-by: Rolf Eike Beer Signed-off-by: David Woodhouse --- drivers/mtd/devices/pmc551.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index 6f9bbf6fee4d..2c0149708739 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -99,10 +99,6 @@ #include #include -#ifndef CONFIG_PCI -#error Enable PCI in your kernel config -#endif - #include #include #include From 9a909867d2eca7727d0d5884df96e791e3531f24 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 15 Jul 2006 13:26:18 +0100 Subject: [PATCH 0036/1063] [MTD NAND] Fix lookup error in nand_get_flash_type() Spotted by liyu Signed-off-by: David Woodhouse --- drivers/mtd/nand/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index cffd66309ffa..119d17cdb780 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2222,7 +2222,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, } /* Try to identify manufacturer */ - for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_id++) { + for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) { if (nand_manuf_ids[maf_idx].id == *maf_id) break; } From c4e7fb313771ac03dfdca26d30e8b721731c562b Mon Sep 17 00:00:00 2001 From: Ville Herva Date: Fri, 14 Jul 2006 00:31:16 +0300 Subject: [PATCH 0037/1063] block2mtd.c: Make kernel boot command line arguments work (try 4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to pass kernel command line arguments to block2mtd at boot-time does not work currently. block2mtd_setup() is called so early that kmalloc() fails nevermind being able to do open_bdev_excl() (which requires rootfs to be mounted. This patch only saves the option string at the early boot stage, and parses them later when block2mtd_init() is called. If open_bdev_excl() fails, open_by_devnum(name_to_dev_t()) is tried instead, which makes it possible to initialize the driver before rootfs has been mounted. Also gets rid of the superfluous parse_name() that only checks if name is longer than 80 chars and copies it to a string that is not kfreed. With this patch, I can boot statically compiled block2mtd, and mount jffs2 as rootfs (without modules or initrd), with lilo config like this: root=/dev/mtdblock0 append="rootfstype=jffs2 block2mtd.block2mtd=/dev/hdc2,65536" (Note that rootfstype=jffs2 is required, since the kernel only tries filesystems without "nodev" attribute by default, and jffs is "nodev"). Compared to first version of this patch, this one does not copy the parameters to the global buffer if init has already been called, and the global array is marked as __initdata. Compared to the second version of this patch, module build is fixed. Compared to the third version of this patch, statically compiled block2mtd driver with no boot-time parameter no longer gives spurious error 'cannot open device ""' Signed-off-by: Ville Herva Acked-by: Jörn Engel Signed-off-by: David Woodhouse --- drivers/mtd/devices/block2mtd.c | 93 ++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index ede3561be870..401c6a294baa 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -18,6 +18,7 @@ #include #include #include +#include #define VERSION "$Revision: 1.30 $" @@ -236,6 +237,8 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf, } return 0; } + + static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { @@ -299,6 +302,19 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) /* Get a handle on the device */ bdev = open_bdev_excl(devname, O_RDWR, NULL); +#ifndef MODULE + if (IS_ERR(bdev)) { + + /* We might not have rootfs mounted at this point. Try + to resolve the device name by other means. */ + + dev_t dev = name_to_dev_t(devname); + if (dev != 0) { + bdev = open_by_devnum(dev, FMODE_WRITE | FMODE_READ); + } + } +#endif + if (IS_ERR(bdev)) { ERROR("error: cannot open device %s", devname); goto devinit_err; @@ -393,26 +409,6 @@ static int parse_num(size_t *num, const char *token) } -static int parse_name(char **pname, const char *token, size_t limit) -{ - size_t len; - char *name; - - len = strlen(token) + 1; - if (len > limit) - return -ENOSPC; - - name = kmalloc(len, GFP_KERNEL); - if (!name) - return -ENOMEM; - - strcpy(name, token); - - *pname = name; - return 0; -} - - static inline void kill_final_newline(char *str) { char *newline = strrchr(str, '\n'); @@ -426,9 +422,15 @@ static inline void kill_final_newline(char *str) return 0; \ } while (0) -static int block2mtd_setup(const char *val, struct kernel_param *kp) +#ifndef MODULE +static int block2mtd_init_called = 0; +static __initdata char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */ +#endif + + +static int block2mtd_setup2(const char *val) { - char buf[80+12]; /* 80 for device, 12 for erase size */ + char buf[80 + 12]; /* 80 for device, 12 for erase size */ char *str = buf; char *token[2]; char *name; @@ -450,13 +452,9 @@ static int block2mtd_setup(const char *val, struct kernel_param *kp) if (!token[0]) parse_err("no argument"); - ret = parse_name(&name, token[0], 80); - if (ret == -ENOMEM) - parse_err("out of memory"); - if (ret == -ENOSPC) - parse_err("name too long"); - if (ret) - return 0; + name = token[0]; + if (strlen(name) + 1 > 80) + parse_err("device name too long"); if (token[1]) { ret = parse_num(&erase_size, token[1]); @@ -472,13 +470,48 @@ static int block2mtd_setup(const char *val, struct kernel_param *kp) } +static int block2mtd_setup(const char *val, struct kernel_param *kp) +{ +#ifdef MODULE + return block2mtd_setup2(val); +#else + /* If more parameters are later passed in via + /sys/module/block2mtd/parameters/block2mtd + and block2mtd_init() has already been called, + we can parse the argument now. */ + + if (block2mtd_init_called) + return block2mtd_setup2(val); + + /* During early boot stage, we only save the parameters + here. We must parse them later: if the param passed + from kernel boot command line, block2mtd_setup() is + called so early that it is not possible to resolve + the device (even kmalloc() fails). Deter that work to + block2mtd_setup2(). */ + + strlcpy(block2mtd_paramline, val, sizeof(block2mtd_paramline)); + + return 0; +#endif +} + + module_param_call(block2mtd, block2mtd_setup, NULL, NULL, 0200); MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=[,]\""); static int __init block2mtd_init(void) { + int ret = 0; INFO("version " VERSION); - return 0; + +#ifndef MODULE + if (strlen(block2mtd_paramline)) + ret = block2mtd_setup2(block2mtd_paramline); + block2mtd_init_called = 1; +#endif + + return ret; } From 46a1652c28fc4f4e9d46ea12b0c36b5b6b600f58 Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Wed, 28 Jun 2006 19:22:07 +0100 Subject: [PATCH 0038/1063] [MTD] Fixes of performance and stability issues in CFI driver. Fix of performance and stability issues on Intel NOR chips. It fixes: 1. Very low write performance on Sibley (perf tests demonstrated write performance less than 100Kb/sec when it should be over 400Kb/sec). 2. Low erase performance. (perf tests on Sibleuy demonstrated erase performance 246Kb/sec when it should be over 300Kb/sec). 3. Error on JFFS2 tests with CPU loading application when MTD returns "block erase error: (status timeout)" To fix the issue it does the following: 1. Removes the timeout tuning from inval_cache_and_wait_for_operation. 2. Waiting conditions in inval_cache_and_wait_for_operation now is based on timer resolution If timeout is lower than timer resolution then we do in cycle "Checking the status" udelay(1); cond_resched(); If timeout is greater than timer resolution (probably erase operation) We do the following sleep for half of operation timeout and do in cycle the following "Checking the status" sleep for timer resolution Signed-off-by: Nicolas Pitre Signed-off-by: Alexey Korolev Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0001.c | 87 ++++++++++++++--------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 39edb8250fbc..7ea49a0d5ec3 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -908,7 +908,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip, static int __xipram xip_wait_for_operation( struct map_info *map, struct flchip *chip, - unsigned long adr, int *chip_op_time ) + unsigned long adr, unsigned int chip_op_time ) { struct cfi_private *cfi = map->fldrv_priv; struct cfi_pri_intelext *cfip = cfi->cmdset_priv; @@ -917,7 +917,7 @@ static int __xipram xip_wait_for_operation( flstate_t oldstate, newstate; start = xip_currtime(); - usec = *chip_op_time * 8; + usec = chip_op_time * 8; if (usec == 0) usec = 500000; done = 0; @@ -1027,8 +1027,8 @@ static int __xipram xip_wait_for_operation( #define XIP_INVAL_CACHED_RANGE(map, from, size) \ INVALIDATE_CACHED_RANGE(map, from, size) -#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, p_usec) \ - xip_wait_for_operation(map, chip, cmd_adr, p_usec) +#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \ + xip_wait_for_operation(map, chip, cmd_adr, usec) #else @@ -1040,64 +1040,64 @@ static int __xipram xip_wait_for_operation( static int inval_cache_and_wait_for_operation( struct map_info *map, struct flchip *chip, unsigned long cmd_adr, unsigned long inval_adr, int inval_len, - int *chip_op_time ) + unsigned int chip_op_time) { struct cfi_private *cfi = map->fldrv_priv; map_word status, status_OK = CMD(0x80); - int z, chip_state = chip->state; - unsigned long timeo; + int chip_state = chip->state; + unsigned int timeo, sleep_time; spin_unlock(chip->mutex); if (inval_len) INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len); - if (*chip_op_time) - cfi_udelay(*chip_op_time); spin_lock(chip->mutex); - timeo = *chip_op_time * 8 * HZ / 1000000; - if (timeo < HZ/2) - timeo = HZ/2; - timeo += jiffies; + /* set our timeout to 8 times the expected delay */ + timeo = chip_op_time * 8; + if (!timeo) + timeo = 500000; + sleep_time = chip_op_time / 2; - z = 0; for (;;) { - if (chip->state != chip_state) { - /* Someone's suspended the operation: sleep */ - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); - spin_unlock(chip->mutex); - schedule(); - remove_wait_queue(&chip->wq, &wait); - timeo = jiffies + (HZ / 2); /* FIXME */ - spin_lock(chip->mutex); - continue; - } - status = map_read(map, cmd_adr); if (map_word_andequal(map, status, status_OK, status_OK)) break; - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { + if (!timeo) { map_write(map, CMD(0x70), cmd_adr); chip->state = FL_STATUS; return -ETIME; } - /* Latency issues. Drop the lock, wait a while and retry */ - z++; + /* OK Still waiting. Drop the lock, wait a while and retry. */ spin_unlock(chip->mutex); - cfi_udelay(1); + if (sleep_time >= 1000000/HZ) { + /* + * Half of the normal delay still remaining + * can be performed with a sleeping delay instead + * of busy waiting. + */ + msleep(sleep_time/1000); + timeo -= sleep_time; + sleep_time = 1000000/HZ; + } else { + udelay(1); + cond_resched(); + timeo--; + } spin_lock(chip->mutex); - } - if (!z) { - if (!--(*chip_op_time)) - *chip_op_time = 1; - } else if (z > 1) - ++(*chip_op_time); + if (chip->state != chip_state) { + /* Someone's suspended the operation: sleep */ + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&chip->wq, &wait); + spin_unlock(chip->mutex); + schedule(); + remove_wait_queue(&chip->wq, &wait); + spin_lock(chip->mutex); + } + } /* Done and happy. */ chip->state = FL_STATUS; @@ -1107,8 +1107,7 @@ static int inval_cache_and_wait_for_operation( #endif #define WAIT_TIMEOUT(map, chip, adr, udelay) \ - ({ int __udelay = (udelay); \ - INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, &__udelay); }) + INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay); static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len) @@ -1332,7 +1331,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, adr, adr, map_bankwidth(map), - &chip->word_write_time); + chip->word_write_time); if (ret) { xip_enable(map, chip, adr); printk(KERN_ERR "%s: word write error (status timeout)\n", map->name); @@ -1569,7 +1568,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, adr, len, - &chip->buffer_write_time); + chip->buffer_write_time); if (ret) { map_write(map, CMD(0x70), cmd_adr); chip->state = FL_STATUS; @@ -1704,7 +1703,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, adr, adr, len, - &chip->erase_time); + chip->erase_time); if (ret) { map_write(map, CMD(0x70), adr); chip->state = FL_STATUS; From 804af2cf6e7af31d2e664b54e657dddd9b531dbd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 26 Jul 2006 21:39:49 +0100 Subject: [PATCH 0039/1063] [AGPGART] remove private page protection map AGP keeps its own copy of the protection_map, upcoming DRM changes will also require access to this map from modules. Signed-off-by: Hugh Dickins Signed-off-by: Dave Airlie Signed-off-by: Dave Jones --- drivers/char/agp/frontend.c | 27 ++------------------------- include/linux/mm.h | 1 + mm/mmap.c | 7 +++++++ 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index d9c5a9142ad1..0f2ed2aa2d81 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -151,35 +151,12 @@ static void agp_add_seg_to_client(struct agp_client *client, client->segments = seg; } -/* Originally taken from linux/mm/mmap.c from the array - * protection_map. - * The original really should be exported to modules, or - * some routine which does the conversion for you - */ - -static const pgprot_t my_protect_map[16] = -{ - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 -}; - static pgprot_t agp_convert_mmap_flags(int prot) { -#define _trans(x,bit1,bit2) \ -((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) - unsigned long prot_bits; - pgprot_t temp; - prot_bits = _trans(prot, PROT_READ, VM_READ) | - _trans(prot, PROT_WRITE, VM_WRITE) | - _trans(prot, PROT_EXEC, VM_EXEC); - - prot_bits |= VM_SHARED; - - temp = my_protect_map[prot_bits & 0x0000000f]; - - return temp; + prot_bits = calc_vm_prot_bits(prot) | VM_SHARED; + return vm_get_page_prot(prot_bits); } static int agp_create_segment(struct agp_client *client, struct agp_region *region) diff --git a/include/linux/mm.h b/include/linux/mm.h index 990957e0929f..4fba4560699b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1012,6 +1012,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +pgprot_t vm_get_page_prot(unsigned long vm_flags); struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); struct page *vmalloc_to_page(void *addr); unsigned long vmalloc_to_pfn(void *addr); diff --git a/mm/mmap.c b/mm/mmap.c index c1868ecdbc5f..c7ed061f4507 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -60,6 +60,13 @@ pgprot_t protection_map[16] = { __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; +pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; +} +EXPORT_SYMBOL(vm_get_page_prot); + int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; From f4c8aa1107969c26b1984eb2996a58f816dea71f Mon Sep 17 00:00:00 2001 From: "brking@charter.net" Date: Wed, 5 Jul 2006 17:00:01 -0500 Subject: [PATCH 0040/1063] [SCSI] megaraid: Add support for change_queue_depth Adds support for change_queue_depth so that device queue depth can be changed at runtime through sysfs. Signed-off-by: Acked-by: Seokmann Ju Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_mbox.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 92715130ac09..7ae580f17e64 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -330,6 +330,21 @@ static struct device_attribute *megaraid_sdev_attrs[] = { NULL, }; +/** + * megaraid_change_queue_depth - Change the device's queue depth + * @sdev: scsi device struct + * @qdepth: depth to set + * + * Return value: + * actual depth set + **/ +static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth) +{ + if (qdepth > MBOX_MAX_SCSI_CMDS) + qdepth = MBOX_MAX_SCSI_CMDS; + scsi_adjust_queue_depth(sdev, 0, qdepth); + return sdev->queue_depth; +} /* * Scsi host template for megaraid unified driver @@ -343,6 +358,7 @@ static struct scsi_host_template megaraid_template_g = { .eh_device_reset_handler = megaraid_reset_handler, .eh_bus_reset_handler = megaraid_reset_handler, .eh_host_reset_handler = megaraid_reset_handler, + .change_queue_depth = megaraid_change_queue_depth, .use_clustering = ENABLE_CLUSTERING, .sdev_attrs = megaraid_sdev_attrs, .shost_attrs = megaraid_shost_attrs, From 0c269e6d3c615403a6e0acbe6e88f1c0da9c2396 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 12 Jul 2006 09:51:04 -0400 Subject: [PATCH 0041/1063] [SCSI] mptsas: add parent port backlink This takes advantage of the sas class backlink function to show which port on an expander is used to communicate with the parent. Signed-off-by: James Bottomley --- drivers/message/fusion/mptsas.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index f66f2203143a..dfdd1e445768 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1642,14 +1642,18 @@ static int mptsas_probe_one_phy(struct device *dev, for (i = 0; i < port_info->num_phys; i++) if (port_info->phy_info[i].identify.sas_address == - identify.sas_address) + identify.sas_address) { + sas_port_mark_backlink(port); goto out; + } } else if (scsi_is_sas_rphy(parent)) { struct sas_rphy *parent_rphy = dev_to_rphy(parent); if (identify.sas_address == - parent_rphy->identify.sas_address) + parent_rphy->identify.sas_address) { + sas_port_mark_backlink(port); goto out; + } } switch (identify.device_type) { From 1c57e86d75cf162bdadb3a5fe0cd3f65aa1a9ca3 Mon Sep 17 00:00:00 2001 From: Erich Chen Date: Wed, 12 Jul 2006 08:59:32 -0700 Subject: [PATCH 0042/1063] [SCSI] arcmsr: initial driver, version 1.20.00.13 arcmsr is a driver for the Areca Raid controller, a host based RAID subsystem that speaks SCSI at the firmware level. This patch is quite a clean up over the initial submission with contributions from: Randy Dunlap Christoph Hellwig Matthew Wilcox Adrian Bunk Signed-off-by: Erich Chen Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- Documentation/scsi/ChangeLog.arcmsr | 56 + Documentation/scsi/arcmsr_spec.txt | 574 ++++++++++ drivers/scsi/Kconfig | 14 + drivers/scsi/Makefile | 1 + drivers/scsi/arcmsr/Makefile | 6 + drivers/scsi/arcmsr/arcmsr.h | 472 +++++++++ drivers/scsi/arcmsr/arcmsr_attr.c | 392 +++++++ drivers/scsi/arcmsr/arcmsr_hba.c | 1496 +++++++++++++++++++++++++++ include/linux/pci_ids.h | 17 + 9 files changed, 3028 insertions(+) create mode 100644 Documentation/scsi/ChangeLog.arcmsr create mode 100644 Documentation/scsi/arcmsr_spec.txt create mode 100644 drivers/scsi/arcmsr/Makefile create mode 100644 drivers/scsi/arcmsr/arcmsr.h create mode 100644 drivers/scsi/arcmsr/arcmsr_attr.c create mode 100644 drivers/scsi/arcmsr/arcmsr_hba.c diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr new file mode 100644 index 000000000000..162c47fdf45f --- /dev/null +++ b/Documentation/scsi/ChangeLog.arcmsr @@ -0,0 +1,56 @@ +************************************************************************** +** History +** +** REV# DATE NAME DESCRIPTION +** 1.00.00.00 3/31/2004 Erich Chen First release +** 1.10.00.04 7/28/2004 Erich Chen modify for ioctl +** 1.10.00.06 8/28/2004 Erich Chen modify for 2.6.x +** 1.10.00.08 9/28/2004 Erich Chen modify for x86_64 +** 1.10.00.10 10/10/2004 Erich Chen bug fix for SMP & ioctl +** 1.20.00.00 11/29/2004 Erich Chen bug fix with arcmsr_bus_reset when PHY error +** 1.20.00.02 12/09/2004 Erich Chen bug fix with over 2T bytes RAID Volume +** 1.20.00.04 1/09/2005 Erich Chen fits for Debian linux kernel version 2.2.xx +** 1.20.00.05 2/20/2005 Erich Chen cleanly as look like a Linux driver at 2.6.x +** thanks for peoples kindness comment +** Kornel Wieliczek +** Christoph Hellwig +** Adrian Bunk +** Andrew Morton +** Christoph Hellwig +** James Bottomley +** Arjan van de Ven +** 1.20.00.06 3/12/2005 Erich Chen fix with arcmsr_pci_unmap_dma "unsigned long" cast, +** modify PCCB POOL allocated by "dma_alloc_coherent" +** (Kornel Wieliczek's comment) +** 1.20.00.07 3/23/2005 Erich Chen bug fix with arcmsr_scsi_host_template_init +** occur segmentation fault, +** if RAID adapter does not on PCI slot +** and modprobe/rmmod this driver twice. +** bug fix enormous stack usage (Adrian Bunk's comment) +** 1.20.00.08 6/23/2005 Erich Chen bug fix with abort command, +** in case of heavy loading when sata cable +** working on low quality connection +** 1.20.00.09 9/12/2005 Erich Chen bug fix with abort command handling, firmware version check +** and firmware update notify for hardware bug fix +** 1.20.00.10 9/23/2005 Erich Chen enhance sysfs function for change driver's max tag Q number. +** add DMA_64BIT_MASK for backward compatible with all 2.6.x +** add some useful message for abort command +** add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE' +** customer can send this command for sync raid volume data +** 1.20.00.11 9/29/2005 Erich Chen by comment of Arjan van de Ven fix incorrect msleep redefine +** cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask +** 1.20.00.12 9/30/2005 Erich Chen bug fix with 64bit platform's ccbs using if over 4G system memory +** change 64bit pci_set_consistent_dma_mask into 32bit +** increcct adapter count if adapter initialize fail. +** miss edit at arcmsr_build_ccb.... +** psge += sizeof(struct _SG64ENTRY *) => +** psge += sizeof(struct _SG64ENTRY) +** 64 bits sg entry would be incorrectly calculated +** thanks Kornel Wieliczek give me kindly notify +** and detail description +** 1.20.00.13 11/15/2005 Erich Chen scheduling pending ccb with FIFO +** change the architecture of arcmsr command queue list +** for linux standard list +** enable usage of pci message signal interrupt +** follow Randy.Danlup kindness suggestion cleanup this code +************************************************************************** \ No newline at end of file diff --git a/Documentation/scsi/arcmsr_spec.txt b/Documentation/scsi/arcmsr_spec.txt new file mode 100644 index 000000000000..5e0042340fd3 --- /dev/null +++ b/Documentation/scsi/arcmsr_spec.txt @@ -0,0 +1,574 @@ +******************************************************************************* +** ARECA FIRMWARE SPEC +******************************************************************************* +** Usage of IOP331 adapter +** (All In/Out is in IOP331's view) +** 1. Message 0 --> InitThread message and retrun code +** 2. Doorbell is used for RS-232 emulation +** inDoorBell : bit0 -- data in ready +** (DRIVER DATA WRITE OK) +** bit1 -- data out has been read +** (DRIVER DATA READ OK) +** outDooeBell: bit0 -- data out ready +** (IOP331 DATA WRITE OK) +** bit1 -- data in has been read +** (IOP331 DATA READ OK) +** 3. Index Memory Usage +** offset 0xf00 : for RS232 out (request buffer) +** offset 0xe00 : for RS232 in (scratch buffer) +** offset 0xa00 : for inbound message code message_rwbuffer +** (driver send to IOP331) +** offset 0xa00 : for outbound message code message_rwbuffer +** (IOP331 send to driver) +** 4. RS-232 emulation +** Currently 128 byte buffer is used +** 1st uint32_t : Data length (1--124) +** Byte 4--127 : Max 124 bytes of data +** 5. PostQ +** All SCSI Command must be sent through postQ: +** (inbound queue port) Request frame must be 32 bytes aligned +** #bit27--bit31 => flag for post ccb +** #bit0--bit26 => real address (bit27--bit31) of post arcmsr_cdb +** bit31 : +** 0 : 256 bytes frame +** 1 : 512 bytes frame +** bit30 : +** 0 : normal request +** 1 : BIOS request +** bit29 : reserved +** bit28 : reserved +** bit27 : reserved +** --------------------------------------------------------------------------- +** (outbount queue port) Request reply +** #bit27--bit31 +** => flag for reply +** #bit0--bit26 +** => real address (bit27--bit31) of reply arcmsr_cdb +** bit31 : must be 0 (for this type of reply) +** bit30 : reserved for BIOS handshake +** bit29 : reserved +** bit28 : +** 0 : no error, ignore AdapStatus/DevStatus/SenseData +** 1 : Error, error code in AdapStatus/DevStatus/SenseData +** bit27 : reserved +** 6. BIOS request +** All BIOS request is the same with request from PostQ +** Except : +** Request frame is sent from configuration space +** offset: 0x78 : Request Frame (bit30 == 1) +** offset: 0x18 : writeonly to generate +** IRQ to IOP331 +** Completion of request: +** (bit30 == 0, bit28==err flag) +** 7. Definition of SGL entry (structure) +** 8. Message1 Out - Diag Status Code (????) +** 9. Message0 message code : +** 0x00 : NOP +** 0x01 : Get Config +** ->offset 0xa00 :for outbound message code message_rwbuffer +** (IOP331 send to driver) +** Signature 0x87974060(4) +** Request len 0x00000200(4) +** numbers of queue 0x00000100(4) +** SDRAM Size 0x00000100(4)-->256 MB +** IDE Channels 0x00000008(4) +** vendor 40 bytes char +** model 8 bytes char +** FirmVer 16 bytes char +** Device Map 16 bytes char +** FirmwareVersion DWORD <== Added for checking of +** new firmware capability +** 0x02 : Set Config +** ->offset 0xa00 :for inbound message code message_rwbuffer +** (driver send to IOP331) +** Signature 0x87974063(4) +** UPPER32 of Request Frame (4)-->Driver Only +** 0x03 : Reset (Abort all queued Command) +** 0x04 : Stop Background Activity +** 0x05 : Flush Cache +** 0x06 : Start Background Activity +** (re-start if background is halted) +** 0x07 : Check If Host Command Pending +** (Novell May Need This Function) +** 0x08 : Set controller time +** ->offset 0xa00 : for inbound message code message_rwbuffer +** (driver to IOP331) +** byte 0 : 0xaa <-- signature +** byte 1 : 0x55 <-- signature +** byte 2 : year (04) +** byte 3 : month (1..12) +** byte 4 : date (1..31) +** byte 5 : hour (0..23) +** byte 6 : minute (0..59) +** byte 7 : second (0..59) +******************************************************************************* +******************************************************************************* +** RS-232 Interface for Areca Raid Controller +** The low level command interface is exclusive with VT100 terminal +** -------------------------------------------------------------------- +** 1. Sequence of command execution +** -------------------------------------------------------------------- +** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61) +** (B) Command block : variable length of data including length, +** command code, data and checksum byte +** (C) Return data : variable length of data +** -------------------------------------------------------------------- +** 2. Command block +** -------------------------------------------------------------------- +** (A) 1st byte : command block length (low byte) +** (B) 2nd byte : command block length (high byte) +** note ..command block length shouldn't > 2040 bytes, +** length excludes these two bytes +** (C) 3rd byte : command code +** (D) 4th and following bytes : variable length data bytes +** depends on command code +** (E) last byte : checksum byte (sum of 1st byte until last data byte) +** -------------------------------------------------------------------- +** 3. Command code and associated data +** -------------------------------------------------------------------- +** The following are command code defined in raid controller Command +** code 0x10--0x1? are used for system level management, +** no password checking is needed and should be implemented in separate +** well controlled utility and not for end user access. +** Command code 0x20--0x?? always check the password, +** password must be entered to enable these command. +** enum +** { +** GUI_SET_SERIAL=0x10, +** GUI_SET_VENDOR, +** GUI_SET_MODEL, +** GUI_IDENTIFY, +** GUI_CHECK_PASSWORD, +** GUI_LOGOUT, +** GUI_HTTP, +** GUI_SET_ETHERNET_ADDR, +** GUI_SET_LOGO, +** GUI_POLL_EVENT, +** GUI_GET_EVENT, +** GUI_GET_HW_MONITOR, +** // GUI_QUICK_CREATE=0x20, (function removed) +** GUI_GET_INFO_R=0x20, +** GUI_GET_INFO_V, +** GUI_GET_INFO_P, +** GUI_GET_INFO_S, +** GUI_CLEAR_EVENT, +** GUI_MUTE_BEEPER=0x30, +** GUI_BEEPER_SETTING, +** GUI_SET_PASSWORD, +** GUI_HOST_INTERFACE_MODE, +** GUI_REBUILD_PRIORITY, +** GUI_MAX_ATA_MODE, +** GUI_RESET_CONTROLLER, +** GUI_COM_PORT_SETTING, +** GUI_NO_OPERATION, +** GUI_DHCP_IP, +** GUI_CREATE_PASS_THROUGH=0x40, +** GUI_MODIFY_PASS_THROUGH, +** GUI_DELETE_PASS_THROUGH, +** GUI_IDENTIFY_DEVICE, +** GUI_CREATE_RAIDSET=0x50, +** GUI_DELETE_RAIDSET, +** GUI_EXPAND_RAIDSET, +** GUI_ACTIVATE_RAIDSET, +** GUI_CREATE_HOT_SPARE, +** GUI_DELETE_HOT_SPARE, +** GUI_CREATE_VOLUME=0x60, +** GUI_MODIFY_VOLUME, +** GUI_DELETE_VOLUME, +** GUI_START_CHECK_VOLUME, +** GUI_STOP_CHECK_VOLUME +** }; +** Command description : +** GUI_SET_SERIAL : Set the controller serial# +** byte 0,1 : length +** byte 2 : command code 0x10 +** byte 3 : password length (should be 0x0f) +** byte 4-0x13 : should be "ArEcATecHnoLogY" +** byte 0x14--0x23 : Serial number string (must be 16 bytes) +** GUI_SET_VENDOR : Set vendor string for the controller +** byte 0,1 : length +** byte 2 : command code 0x11 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x3B : vendor string (must be 40 bytes) +** GUI_SET_MODEL : Set the model name of the controller +** byte 0,1 : length +** byte 2 : command code 0x12 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x1B : model string (must be 8 bytes) +** GUI_IDENTIFY : Identify device +** byte 0,1 : length +** byte 2 : command code 0x13 +** return "Areca RAID Subsystem " +** GUI_CHECK_PASSWORD : Verify password +** byte 0,1 : length +** byte 2 : command code 0x14 +** byte 3 : password length +** byte 4-0x?? : user password to be checked +** GUI_LOGOUT : Logout GUI (force password checking on next command) +** byte 0,1 : length +** byte 2 : command code 0x15 +** GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16) +** +** GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address +** byte 0,1 : length +** byte 2 : command code 0x17 +** byte 3 : password length (should be 0x08) +** byte 4-0x13 : should be "ArEcAvAr" +** byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes) +** GUI_SET_LOGO : Set logo in HTTP +** byte 0,1 : length +** byte 2 : command code 0x18 +** byte 3 : Page# (0/1/2/3) (0xff --> clear OEM logo) +** byte 4/5/6/7 : 0x55/0xaa/0xa5/0x5a +** byte 8 : TITLE.JPG data (each page must be 2000 bytes) +** note page0 1st 2 byte must be +** actual length of the JPG file +** GUI_POLL_EVENT : Poll If Event Log Changed +** byte 0,1 : length +** byte 2 : command code 0x19 +** GUI_GET_EVENT : Read Event +** byte 0,1 : length +** byte 2 : command code 0x1a +** byte 3 : Event Page (0:1st page/1/2/3:last page) +** GUI_GET_HW_MONITOR : Get HW monitor data +** byte 0,1 : length +** byte 2 : command code 0x1b +** byte 3 : # of FANs(example 2) +** byte 4 : # of Voltage sensor(example 3) +** byte 5 : # of temperature sensor(example 2) +** byte 6 : # of power +** byte 7/8 : Fan#0 (RPM) +** byte 9/10 : Fan#1 +** byte 11/12 : Voltage#0 original value in *1000 +** byte 13/14 : Voltage#0 value +** byte 15/16 : Voltage#1 org +** byte 17/18 : Voltage#1 +** byte 19/20 : Voltage#2 org +** byte 21/22 : Voltage#2 +** byte 23 : Temp#0 +** byte 24 : Temp#1 +** byte 25 : Power indicator (bit0 : power#0, +** bit1 : power#1) +** byte 26 : UPS indicator +** GUI_QUICK_CREATE : Quick create raid/volume set +** byte 0,1 : length +** byte 2 : command code 0x20 +** byte 3/4/5/6 : raw capacity +** byte 7 : raid level +** byte 8 : stripe size +** byte 9 : spare +** byte 10/11/12/13: device mask (the devices to create raid/volume) +** This function is removed, application like +** to implement quick create function +** need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function. +** GUI_GET_INFO_R : Get Raid Set Information +** byte 0,1 : length +** byte 2 : command code 0x20 +** byte 3 : raidset# +** typedef struct sGUI_RAIDSET +** { +** BYTE grsRaidSetName[16]; +** DWORD grsCapacity; +** DWORD grsCapacityX; +** DWORD grsFailMask; +** BYTE grsDevArray[32]; +** BYTE grsMemberDevices; +** BYTE grsNewMemberDevices; +** BYTE grsRaidState; +** BYTE grsVolumes; +** BYTE grsVolumeList[16]; +** BYTE grsRes1; +** BYTE grsRes2; +** BYTE grsRes3; +** BYTE grsFreeSegments; +** DWORD grsRawStripes[8]; +** DWORD grsRes4; +** DWORD grsRes5; // Total to 128 bytes +** DWORD grsRes6; // Total to 128 bytes +** } sGUI_RAIDSET, *pGUI_RAIDSET; +** GUI_GET_INFO_V : Get Volume Set Information +** byte 0,1 : length +** byte 2 : command code 0x21 +** byte 3 : volumeset# +** typedef struct sGUI_VOLUMESET +** { +** BYTE gvsVolumeName[16]; // 16 +** DWORD gvsCapacity; +** DWORD gvsCapacityX; +** DWORD gvsFailMask; +** DWORD gvsStripeSize; +** DWORD gvsNewFailMask; +** DWORD gvsNewStripeSize; +** DWORD gvsVolumeStatus; +** DWORD gvsProgress; // 32 +** sSCSI_ATTR gvsScsi; +** BYTE gvsMemberDisks; +** BYTE gvsRaidLevel; // 8 +** BYTE gvsNewMemberDisks; +** BYTE gvsNewRaidLevel; +** BYTE gvsRaidSetNumber; +** BYTE gvsRes0; // 4 +** BYTE gvsRes1[4]; // 64 bytes +** } sGUI_VOLUMESET, *pGUI_VOLUMESET; +** GUI_GET_INFO_P : Get Physical Drive Information +** byte 0,1 : length +** byte 2 : command code 0x22 +** byte 3 : drive # (from 0 to max-channels - 1) +** typedef struct sGUI_PHY_DRV +** { +** BYTE gpdModelName[40]; +** BYTE gpdSerialNumber[20]; +** BYTE gpdFirmRev[8]; +** DWORD gpdCapacity; +** DWORD gpdCapacityX; // Reserved for expansion +** BYTE gpdDeviceState; +** BYTE gpdPioMode; +** BYTE gpdCurrentUdmaMode; +** BYTE gpdUdmaMode; +** BYTE gpdDriveSelect; +** BYTE gpdRaidNumber; // 0xff if not belongs to a raid set +** sSCSI_ATTR gpdScsi; +** BYTE gpdReserved[40]; // Total to 128 bytes +** } sGUI_PHY_DRV, *pGUI_PHY_DRV; +** GUI_GET_INFO_S : Get System Information +** byte 0,1 : length +** byte 2 : command code 0x23 +** typedef struct sCOM_ATTR +** { +** BYTE comBaudRate; +** BYTE comDataBits; +** BYTE comStopBits; +** BYTE comParity; +** BYTE comFlowControl; +** } sCOM_ATTR, *pCOM_ATTR; +** typedef struct sSYSTEM_INFO +** { +** BYTE gsiVendorName[40]; +** BYTE gsiSerialNumber[16]; +** BYTE gsiFirmVersion[16]; +** BYTE gsiBootVersion[16]; +** BYTE gsiMbVersion[16]; +** BYTE gsiModelName[8]; +** BYTE gsiLocalIp[4]; +** BYTE gsiCurrentIp[4]; +** DWORD gsiTimeTick; +** DWORD gsiCpuSpeed; +** DWORD gsiICache; +** DWORD gsiDCache; +** DWORD gsiScache; +** DWORD gsiMemorySize; +** DWORD gsiMemorySpeed; +** DWORD gsiEvents; +** BYTE gsiMacAddress[6]; +** BYTE gsiDhcp; +** BYTE gsiBeeper; +** BYTE gsiChannelUsage; +** BYTE gsiMaxAtaMode; +** BYTE gsiSdramEcc; // 1:if ECC enabled +** BYTE gsiRebuildPriority; +** sCOM_ATTR gsiComA; // 5 bytes +** sCOM_ATTR gsiComB; // 5 bytes +** BYTE gsiIdeChannels; +** BYTE gsiScsiHostChannels; +** BYTE gsiIdeHostChannels; +** BYTE gsiMaxVolumeSet; +** BYTE gsiMaxRaidSet; +** BYTE gsiEtherPort; // 1:if ether net port supported +** BYTE gsiRaid6Engine; // 1:Raid6 engine supported +** BYTE gsiRes[75]; +** } sSYSTEM_INFO, *pSYSTEM_INFO; +** GUI_CLEAR_EVENT : Clear System Event +** byte 0,1 : length +** byte 2 : command code 0x24 +** GUI_MUTE_BEEPER : Mute current beeper +** byte 0,1 : length +** byte 2 : command code 0x30 +** GUI_BEEPER_SETTING : Disable beeper +** byte 0,1 : length +** byte 2 : command code 0x31 +** byte 3 : 0->disable, 1->enable +** GUI_SET_PASSWORD : Change password +** byte 0,1 : length +** byte 2 : command code 0x32 +** byte 3 : pass word length ( must <= 15 ) +** byte 4 : password (must be alpha-numerical) +** GUI_HOST_INTERFACE_MODE : Set host interface mode +** byte 0,1 : length +** byte 2 : command code 0x33 +** byte 3 : 0->Independent, 1->cluster +** GUI_REBUILD_PRIORITY : Set rebuild priority +** byte 0,1 : length +** byte 2 : command code 0x34 +** byte 3 : 0/1/2/3 (low->high) +** GUI_MAX_ATA_MODE : Set maximum ATA mode to be used +** byte 0,1 : length +** byte 2 : command code 0x35 +** byte 3 : 0/1/2/3 (133/100/66/33) +** GUI_RESET_CONTROLLER : Reset Controller +** byte 0,1 : length +** byte 2 : command code 0x36 +** *Response with VT100 screen (discard it) +** GUI_COM_PORT_SETTING : COM port setting +** byte 0,1 : length +** byte 2 : command code 0x37 +** byte 3 : 0->COMA (term port), +** 1->COMB (debug port) +** byte 4 : 0/1/2/3/4/5/6/7 +** (1200/2400/4800/9600/19200/38400/57600/115200) +** byte 5 : data bit +** (0:7 bit, 1:8 bit : must be 8 bit) +** byte 6 : stop bit (0:1, 1:2 stop bits) +** byte 7 : parity (0:none, 1:off, 2:even) +** byte 8 : flow control +** (0:none, 1:xon/xoff, 2:hardware => must use none) +** GUI_NO_OPERATION : No operation +** byte 0,1 : length +** byte 2 : command code 0x38 +** GUI_DHCP_IP : Set DHCP option and local IP address +** byte 0,1 : length +** byte 2 : command code 0x39 +** byte 3 : 0:dhcp disabled, 1:dhcp enabled +** byte 4/5/6/7 : IP address +** GUI_CREATE_PASS_THROUGH : Create pass through disk +** byte 0,1 : length +** byte 2 : command code 0x40 +** byte 3 : device # +** byte 4 : scsi channel (0/1) +** byte 5 : scsi id (0-->15) +** byte 6 : scsi lun (0-->7) +** byte 7 : tagged queue (1 : enabled) +** byte 8 : cache mode (1 : enabled) +** byte 9 : max speed (0/1/2/3/4, +** async/20/40/80/160 for scsi) +** (0/1/2/3/4, 33/66/100/133/150 for ide ) +** GUI_MODIFY_PASS_THROUGH : Modify pass through disk +** byte 0,1 : length +** byte 2 : command code 0x41 +** byte 3 : device # +** byte 4 : scsi channel (0/1) +** byte 5 : scsi id (0-->15) +** byte 6 : scsi lun (0-->7) +** byte 7 : tagged queue (1 : enabled) +** byte 8 : cache mode (1 : enabled) +** byte 9 : max speed (0/1/2/3/4, +** async/20/40/80/160 for scsi) +** (0/1/2/3/4, 33/66/100/133/150 for ide ) +** GUI_DELETE_PASS_THROUGH : Delete pass through disk +** byte 0,1 : length +** byte 2 : command code 0x42 +** byte 3 : device# to be deleted +** GUI_IDENTIFY_DEVICE : Identify Device +** byte 0,1 : length +** byte 2 : command code 0x43 +** byte 3 : Flash Method +** (0:flash selected, 1:flash not selected) +** byte 4/5/6/7 : IDE device mask to be flashed +** note .... no response data available +** GUI_CREATE_RAIDSET : Create Raid Set +** byte 0,1 : length +** byte 2 : command code 0x50 +** byte 3/4/5/6 : device mask +** byte 7-22 : raidset name (if byte 7 == 0:use default) +** GUI_DELETE_RAIDSET : Delete Raid Set +** byte 0,1 : length +** byte 2 : command code 0x51 +** byte 3 : raidset# +** GUI_EXPAND_RAIDSET : Expand Raid Set +** byte 0,1 : length +** byte 2 : command code 0x52 +** byte 3 : raidset# +** byte 4/5/6/7 : device mask for expansion +** byte 8/9/10 : (8:0 no change, 1 change, 0xff:terminate, +** 9:new raid level, +** 10:new stripe size +** 0/1/2/3/4/5->4/8/16/32/64/128K ) +** byte 11/12/13 : repeat for each volume in the raidset +** GUI_ACTIVATE_RAIDSET : Activate incomplete raid set +** byte 0,1 : length +** byte 2 : command code 0x53 +** byte 3 : raidset# +** GUI_CREATE_HOT_SPARE : Create hot spare disk +** byte 0,1 : length +** byte 2 : command code 0x54 +** byte 3/4/5/6 : device mask for hot spare creation +** GUI_DELETE_HOT_SPARE : Delete hot spare disk +** byte 0,1 : length +** byte 2 : command code 0x55 +** byte 3/4/5/6 : device mask for hot spare deletion +** GUI_CREATE_VOLUME : Create volume set +** byte 0,1 : length +** byte 2 : command code 0x60 +** byte 3 : raidset# +** byte 4-19 : volume set name +** (if byte4 == 0, use default) +** byte 20-27 : volume capacity (blocks) +** byte 28 : raid level +** byte 29 : stripe size +** (0/1/2/3/4/5->4/8/16/32/64/128K) +** byte 30 : channel +** byte 31 : ID +** byte 32 : LUN +** byte 33 : 1 enable tag +** byte 34 : 1 enable cache +** byte 35 : speed +** (0/1/2/3/4->async/20/40/80/160 for scsi) +** (0/1/2/3/4->33/66/100/133/150 for IDE ) +** byte 36 : 1 to select quick init +** +** GUI_MODIFY_VOLUME : Modify volume Set +** byte 0,1 : length +** byte 2 : command code 0x61 +** byte 3 : volumeset# +** byte 4-19 : new volume set name +** (if byte4 == 0, not change) +** byte 20-27 : new volume capacity (reserved) +** byte 28 : new raid level +** byte 29 : new stripe size +** (0/1/2/3/4/5->4/8/16/32/64/128K) +** byte 30 : new channel +** byte 31 : new ID +** byte 32 : new LUN +** byte 33 : 1 enable tag +** byte 34 : 1 enable cache +** byte 35 : speed +** (0/1/2/3/4->async/20/40/80/160 for scsi) +** (0/1/2/3/4->33/66/100/133/150 for IDE ) +** GUI_DELETE_VOLUME : Delete volume set +** byte 0,1 : length +** byte 2 : command code 0x62 +** byte 3 : volumeset# +** GUI_START_CHECK_VOLUME : Start volume consistency check +** byte 0,1 : length +** byte 2 : command code 0x63 +** byte 3 : volumeset# +** GUI_STOP_CHECK_VOLUME : Stop volume consistency check +** byte 0,1 : length +** byte 2 : command code 0x64 +** --------------------------------------------------------------------- +** 4. Returned data +** --------------------------------------------------------------------- +** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61) +** (B) Length : 2 bytes +** (low byte 1st, excludes length and checksum byte) +** (C) status or data : +** <1> If length == 1 ==> 1 byte status code +** #define GUI_OK 0x41 +** #define GUI_RAIDSET_NOT_NORMAL 0x42 +** #define GUI_VOLUMESET_NOT_NORMAL 0x43 +** #define GUI_NO_RAIDSET 0x44 +** #define GUI_NO_VOLUMESET 0x45 +** #define GUI_NO_PHYSICAL_DRIVE 0x46 +** #define GUI_PARAMETER_ERROR 0x47 +** #define GUI_UNSUPPORTED_COMMAND 0x48 +** #define GUI_DISK_CONFIG_CHANGED 0x49 +** #define GUI_INVALID_PASSWORD 0x4a +** #define GUI_NO_DISK_SPACE 0x4b +** #define GUI_CHECKSUM_ERROR 0x4c +** #define GUI_PASSWORD_REQUIRED 0x4d +** <2> If length > 1 ==> +** data block returned from controller +** and the contents depends on the command code +** (E) Checksum : checksum of length and status or data byte +************************************************************************** diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 96a81cd17617..d61662c1a0ee 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -469,6 +469,20 @@ config SCSI_IN2000 To compile this driver as a module, choose M here: the module will be called in2000. +config SCSI_ARCMSR + tristate "ARECA ARC11X0[PCI-X]/ARC12X0[PCI-EXPRESS] SATA-RAID support" + depends on PCI && SCSI + help + This driver supports all of ARECA's SATA RAID controller cards. + This is an ARECA-maintained driver by Erich Chen. + If you have any problems, please mail to: < erich@areca.com.tw > + Areca supports Linux RAID config tools. + + < http://www.areca.com.tw > + + To compile this driver as a module, choose M here: the + module will be called arcmsr (modprobe arcmsr). + source "drivers/scsi/megaraid/Kconfig.megaraid" config SCSI_SATA diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index ebd0cf00bf3e..b2de9bfdfdcd 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_SCSI_PSI240I) += psi240i.o obj-$(CONFIG_SCSI_BUSLOGIC) += BusLogic.o obj-$(CONFIG_SCSI_DPT_I2O) += dpt_i2o.o obj-$(CONFIG_SCSI_U14_34F) += u14-34f.o +obj-$(CONFIG_SCSI_ARCMSR) += arcmsr/ obj-$(CONFIG_SCSI_ULTRASTOR) += ultrastor.o obj-$(CONFIG_SCSI_AHA152X) += aha152x.o obj-$(CONFIG_SCSI_AHA1542) += aha1542.o diff --git a/drivers/scsi/arcmsr/Makefile b/drivers/scsi/arcmsr/Makefile new file mode 100644 index 000000000000..721aced39168 --- /dev/null +++ b/drivers/scsi/arcmsr/Makefile @@ -0,0 +1,6 @@ +# File: drivers/arcmsr/Makefile +# Makefile for the ARECA PCI-X PCI-EXPRESS SATA RAID controllers SCSI driver. + +arcmsr-objs := arcmsr_attr.o arcmsr_hba.o + +obj-$(CONFIG_SCSI_ARCMSR) := arcmsr.o diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h new file mode 100644 index 000000000000..aff96db9ccf6 --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr.h @@ -0,0 +1,472 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr.h +** BY : Erich Chen +** Description: SCSI RAID Device Driver for +** ARECA RAID Host adapter +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved. +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +*/ +#include + +struct class_device_attribute; + +#define ARCMSR_MAX_OUTSTANDING_CMD 256 +#define ARCMSR_MAX_FREECCB_NUM 288 +#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.13" +#define ARCMSR_SCSI_INITIATOR_ID 255 +#define ARCMSR_MAX_XFER_SECTORS 512 +#define ARCMSR_MAX_TARGETID 17 +#define ARCMSR_MAX_TARGETLUN 8 +#define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD +#define ARCMSR_MAX_QBUFFER 4096 +#define ARCMSR_MAX_SG_ENTRIES 38 + +/* +******************************************************************************* +** split 64bits dma addressing +******************************************************************************* +*/ +#define dma_addr_hi32(addr) (uint32_t) ((addr>>16)>>16) +#define dma_addr_lo32(addr) (uint32_t) (addr & 0xffffffff) +/* +******************************************************************************* +** MESSAGE CONTROL CODE +******************************************************************************* +*/ +struct CMD_MESSAGE +{ + uint32_t HeaderLength; + uint8_t Signature[8]; + uint32_t Timeout; + uint32_t ControlCode; + uint32_t ReturnCode; + uint32_t Length; +}; +/* +******************************************************************************* +** IOP Message Transfer Data for user space +******************************************************************************* +*/ +struct CMD_MESSAGE_FIELD +{ + struct CMD_MESSAGE cmdmessage; + uint8_t messagedatabuffer[1032]; +}; +/* IOP message transfer */ +#define ARCMSR_MESSAGE_FAIL 0x0001 +/* DeviceType */ +#define ARECA_SATA_RAID 0x90000000 +/* FunctionCode */ +#define FUNCTION_READ_RQBUFFER 0x0801 +#define FUNCTION_WRITE_WQBUFFER 0x0802 +#define FUNCTION_CLEAR_RQBUFFER 0x0803 +#define FUNCTION_CLEAR_WQBUFFER 0x0804 +#define FUNCTION_CLEAR_ALLQBUFFER 0x0805 +#define FUNCTION_RETURN_CODE_3F 0x0806 +#define FUNCTION_SAY_HELLO 0x0807 +#define FUNCTION_SAY_GOODBYE 0x0808 +#define FUNCTION_FLUSH_ADAPTER_CACHE 0x0809 +/* ARECA IO CONTROL CODE*/ +#define ARCMSR_MESSAGE_READ_RQBUFFER \ + ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER +#define ARCMSR_MESSAGE_WRITE_WQBUFFER \ + ARECA_SATA_RAID | FUNCTION_WRITE_WQBUFFER +#define ARCMSR_MESSAGE_CLEAR_RQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_RQBUFFER +#define ARCMSR_MESSAGE_CLEAR_WQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_WQBUFFER +#define ARCMSR_MESSAGE_CLEAR_ALLQBUFFER \ + ARECA_SATA_RAID | FUNCTION_CLEAR_ALLQBUFFER +#define ARCMSR_MESSAGE_RETURN_CODE_3F \ + ARECA_SATA_RAID | FUNCTION_RETURN_CODE_3F +#define ARCMSR_MESSAGE_SAY_HELLO \ + ARECA_SATA_RAID | FUNCTION_SAY_HELLO +#define ARCMSR_MESSAGE_SAY_GOODBYE \ + ARECA_SATA_RAID | FUNCTION_SAY_GOODBYE +#define ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE \ + ARECA_SATA_RAID | FUNCTION_FLUSH_ADAPTER_CACHE +/* ARECA IOCTL ReturnCode */ +#define ARCMSR_MESSAGE_RETURNCODE_OK 0x00000001 +#define ARCMSR_MESSAGE_RETURNCODE_ERROR 0x00000006 +#define ARCMSR_MESSAGE_RETURNCODE_3F 0x0000003F +/* +************************************************************* +** structure for holding DMA address data +************************************************************* +*/ +#define IS_SG64_ADDR 0x01000000 /* bit24 */ +struct SG32ENTRY +{ + uint32_t length; + uint32_t address; +}; +struct SG64ENTRY +{ + uint32_t length; + uint32_t address; + uint32_t addresshigh; +}; +struct SGENTRY_UNION +{ + union + { + struct SG32ENTRY sg32entry; + struct SG64ENTRY sg64entry; + }u; +}; +/* +******************************************************************** +** Q Buffer of IOP Message Transfer +******************************************************************** +*/ +struct QBUFFER +{ + uint32_t data_len; + uint8_t data[124]; +}; +/* +******************************************************************************* +** FIRMWARE INFO +******************************************************************************* +*/ +struct FIRMWARE_INFO +{ + uint32_t signature; /*0, 00-03*/ + uint32_t request_len; /*1, 04-07*/ + uint32_t numbers_queue; /*2, 08-11*/ + uint32_t sdram_size; /*3, 12-15*/ + uint32_t ide_channels; /*4, 16-19*/ + char vendor[40]; /*5, 20-59*/ + char model[8]; /*15, 60-67*/ + char firmware_ver[16]; /*17, 68-83*/ + char device_map[16]; /*21, 84-99*/ +}; +/* signature of set and get firmware config */ +#define ARCMSR_SIGNATURE_GET_CONFIG 0x87974060 +#define ARCMSR_SIGNATURE_SET_CONFIG 0x87974063 +/* message code of inbound message register */ +#define ARCMSR_INBOUND_MESG0_NOP 0x00000000 +#define ARCMSR_INBOUND_MESG0_GET_CONFIG 0x00000001 +#define ARCMSR_INBOUND_MESG0_SET_CONFIG 0x00000002 +#define ARCMSR_INBOUND_MESG0_ABORT_CMD 0x00000003 +#define ARCMSR_INBOUND_MESG0_STOP_BGRB 0x00000004 +#define ARCMSR_INBOUND_MESG0_FLUSH_CACHE 0x00000005 +#define ARCMSR_INBOUND_MESG0_START_BGRB 0x00000006 +#define ARCMSR_INBOUND_MESG0_CHK331PENDING 0x00000007 +#define ARCMSR_INBOUND_MESG0_SYNC_TIMER 0x00000008 +/* doorbell interrupt generator */ +#define ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK 0x00000001 +#define ARCMSR_INBOUND_DRIVER_DATA_READ_OK 0x00000002 +#define ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK 0x00000001 +#define ARCMSR_OUTBOUND_IOP331_DATA_READ_OK 0x00000002 +/* ccb areca cdb flag */ +#define ARCMSR_CCBPOST_FLAG_SGL_BSIZE 0x80000000 +#define ARCMSR_CCBPOST_FLAG_IAM_BIOS 0x40000000 +#define ARCMSR_CCBREPLY_FLAG_IAM_BIOS 0x40000000 +#define ARCMSR_CCBREPLY_FLAG_ERROR 0x10000000 +/* outbound firmware ok */ +#define ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK 0x80000000 +/* +******************************************************************************* +** ARECA SCSI COMMAND DESCRIPTOR BLOCK size 0x1F8 (504) +******************************************************************************* +*/ +struct ARCMSR_CDB +{ + uint8_t Bus; + uint8_t TargetID; + uint8_t LUN; + uint8_t Function; + + uint8_t CdbLength; + uint8_t sgcount; + uint8_t Flags; +#define ARCMSR_CDB_FLAG_SGL_BSIZE 0x01 +#define ARCMSR_CDB_FLAG_BIOS 0x02 +#define ARCMSR_CDB_FLAG_WRITE 0x04 +#define ARCMSR_CDB_FLAG_SIMPLEQ 0x00 +#define ARCMSR_CDB_FLAG_HEADQ 0x08 +#define ARCMSR_CDB_FLAG_ORDEREDQ 0x10 + uint8_t Reserved1; + + uint32_t Context; + uint32_t DataLength; + + uint8_t Cdb[16]; + + uint8_t DeviceStatus; +#define ARCMSR_DEV_CHECK_CONDITION 0x02 +#define ARCMSR_DEV_SELECT_TIMEOUT 0xF0 +#define ARCMSR_DEV_ABORTED 0xF1 +#define ARCMSR_DEV_INIT_FAIL 0xF2 + uint8_t SenseData[15]; + + union + { + struct SG32ENTRY sg32entry[ARCMSR_MAX_SG_ENTRIES]; + struct SG64ENTRY sg64entry[ARCMSR_MAX_SG_ENTRIES]; + } u; +}; +/* +******************************************************************************* +** Messaging Unit (MU) of the Intel R 80331 I/O processor (80331) +******************************************************************************* +*/ +struct MessageUnit +{ + uint32_t resrved0[4]; /*0000 000F*/ + uint32_t inbound_msgaddr0; /*0010 0013*/ + uint32_t inbound_msgaddr1; /*0014 0017*/ + uint32_t outbound_msgaddr0; /*0018 001B*/ + uint32_t outbound_msgaddr1; /*001C 001F*/ + uint32_t inbound_doorbell; /*0020 0023*/ + uint32_t inbound_intstatus; /*0024 0027*/ + uint32_t inbound_intmask; /*0028 002B*/ + uint32_t outbound_doorbell; /*002C 002F*/ + uint32_t outbound_intstatus; /*0030 0033*/ + uint32_t outbound_intmask; /*0034 0037*/ + uint32_t reserved1[2]; /*0038 003F*/ + uint32_t inbound_queueport; /*0040 0043*/ + uint32_t outbound_queueport; /*0044 0047*/ + uint32_t reserved2[2]; /*0048 004F*/ + uint32_t reserved3[492]; /*0050 07FF 492*/ + uint32_t reserved4[128]; /*0800 09FF 128*/ + uint32_t message_rwbuffer[256]; /*0a00 0DFF 256*/ + uint32_t message_wbuffer[32]; /*0E00 0E7F 32*/ + uint32_t reserved5[32]; /*0E80 0EFF 32*/ + uint32_t message_rbuffer[32]; /*0F00 0F7F 32*/ + uint32_t reserved6[32]; /*0F80 0FFF 32*/ +}; +/* +******************************************************************************* +** Adapter Control Block +******************************************************************************* +*/ +struct AdapterControlBlock +{ + struct pci_dev * pdev; + struct Scsi_Host * host; + unsigned long vir2phy_offset; + /* Offset is used in making arc cdb physical to virtual calculations */ + uint32_t outbound_int_enable; + + struct MessageUnit __iomem * pmu; + /* message unit ATU inbound base address0 */ + + uint32_t acb_flags; +#define ACB_F_SCSISTOPADAPTER 0x0001 +#define ACB_F_MSG_STOP_BGRB 0x0002 + /* stop RAID background rebuild */ +#define ACB_F_MSG_START_BGRB 0x0004 + /* stop RAID background rebuild */ +#define ACB_F_IOPDATA_OVERFLOW 0x0008 + /* iop message data rqbuffer overflow */ +#define ACB_F_MESSAGE_WQBUFFER_CLEARED 0x0010 + /* message clear wqbuffer */ +#define ACB_F_MESSAGE_RQBUFFER_CLEARED 0x0020 + /* message clear rqbuffer */ +#define ACB_F_MESSAGE_WQBUFFER_READED 0x0040 +#define ACB_F_BUS_RESET 0x0080 +#define ACB_F_IOP_INITED 0x0100 + /* iop init */ + + struct CommandControlBlock * pccb_pool[ARCMSR_MAX_FREECCB_NUM]; + /* used for memory free */ + struct list_head ccb_free_list; + /* head of free ccb list */ + atomic_t ccboutstandingcount; + + void * dma_coherent; + /* dma_coherent used for memory free */ + dma_addr_t dma_coherent_handle; + /* dma_coherent_handle used for memory free */ + + uint8_t rqbuffer[ARCMSR_MAX_QBUFFER]; + /* data collection buffer for read from 80331 */ + int32_t rqbuf_firstindex; + /* first of read buffer */ + int32_t rqbuf_lastindex; + /* last of read buffer */ + uint8_t wqbuffer[ARCMSR_MAX_QBUFFER]; + /* data collection buffer for write to 80331 */ + int32_t wqbuf_firstindex; + /* first of write buffer */ + int32_t wqbuf_lastindex; + /* last of write buffer */ + uint8_t devstate[ARCMSR_MAX_TARGETID][ARCMSR_MAX_TARGETLUN]; + /* id0 ..... id15, lun0...lun7 */ +#define ARECA_RAID_GONE 0x55 +#define ARECA_RAID_GOOD 0xaa + uint32_t num_resets; + uint32_t num_aborts; + uint32_t firm_request_len; + uint32_t firm_numbers_queue; + uint32_t firm_sdram_size; + uint32_t firm_hd_channels; + char firm_model[12]; + char firm_version[20]; +};/* HW_DEVICE_EXTENSION */ +/* +******************************************************************************* +** Command Control Block +** this CCB length must be 32 bytes boundary +******************************************************************************* +*/ +struct CommandControlBlock +{ + struct ARCMSR_CDB arcmsr_cdb; + /* + ** 0-503 (size of CDB=504): + ** arcmsr messenger scsi command descriptor size 504 bytes + */ + uint32_t cdb_shifted_phyaddr; + /* 504-507 */ + uint32_t reserved1; + /* 508-511 */ +#if BITS_PER_LONG == 64 + /* ======================512+64 bytes======================== */ + struct list_head list; + /* 512-527 16 bytes next/prev ptrs for ccb lists */ + struct scsi_cmnd * pcmd; + /* 528-535 8 bytes pointer of linux scsi command */ + struct AdapterControlBlock * acb; + /* 536-543 8 bytes pointer of acb */ + + uint16_t ccb_flags; + /* 544-545 */ + #define CCB_FLAG_READ 0x0000 + #define CCB_FLAG_WRITE 0x0001 + #define CCB_FLAG_ERROR 0x0002 + #define CCB_FLAG_FLUSHCACHE 0x0004 + #define CCB_FLAG_MASTER_ABORTED 0x0008 + uint16_t startdone; + /* 546-547 */ + #define ARCMSR_CCB_DONE 0x0000 + #define ARCMSR_CCB_START 0x55AA + #define ARCMSR_CCB_ABORTED 0xAA55 + #define ARCMSR_CCB_ILLEGAL 0xFFFF + uint32_t reserved2[7]; + /* 548-551 552-555 556-559 560-563 564-567 568-571 572-575 */ +#else + /* ======================512+32 bytes======================== */ + struct list_head list; + /* 512-519 8 bytes next/prev ptrs for ccb lists */ + struct scsi_cmnd * pcmd; + /* 520-523 4 bytes pointer of linux scsi command */ + struct AdapterControlBlock * acb; + /* 524-527 4 bytes pointer of acb */ + + uint16_t ccb_flags; + /* 528-529 */ + #define CCB_FLAG_READ 0x0000 + #define CCB_FLAG_WRITE 0x0001 + #define CCB_FLAG_ERROR 0x0002 + #define CCB_FLAG_FLUSHCACHE 0x0004 + #define CCB_FLAG_MASTER_ABORTED 0x0008 + uint16_t startdone; + /* 530-531 */ + #define ARCMSR_CCB_DONE 0x0000 + #define ARCMSR_CCB_START 0x55AA + #define ARCMSR_CCB_ABORTED 0xAA55 + #define ARCMSR_CCB_ILLEGAL 0xFFFF + uint32_t reserved2[3]; + /* 532-535 536-539 540-543 */ +#endif + /* ========================================================== */ +}; +/* +******************************************************************************* +** ARECA SCSI sense data +******************************************************************************* +*/ +struct SENSE_DATA +{ + uint8_t ErrorCode:7; +#define SCSI_SENSE_CURRENT_ERRORS 0x70 +#define SCSI_SENSE_DEFERRED_ERRORS 0x71 + uint8_t Valid:1; + uint8_t SegmentNumber; + uint8_t SenseKey:4; + uint8_t Reserved:1; + uint8_t IncorrectLength:1; + uint8_t EndOfMedia:1; + uint8_t FileMark:1; + uint8_t Information[4]; + uint8_t AdditionalSenseLength; + uint8_t CommandSpecificInformation[4]; + uint8_t AdditionalSenseCode; + uint8_t AdditionalSenseCodeQualifier; + uint8_t FieldReplaceableUnitCode; + uint8_t SenseKeySpecific[3]; +}; +/* +******************************************************************************* +** Outbound Interrupt Status Register - OISR +******************************************************************************* +*/ +#define ARCMSR_MU_OUTBOUND_INTERRUPT_STATUS_REG 0x30 +#define ARCMSR_MU_OUTBOUND_PCI_INT 0x10 +#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INT 0x08 +#define ARCMSR_MU_OUTBOUND_DOORBELL_INT 0x04 +#define ARCMSR_MU_OUTBOUND_MESSAGE1_INT 0x02 +#define ARCMSR_MU_OUTBOUND_MESSAGE0_INT 0x01 +#define ARCMSR_MU_OUTBOUND_HANDLE_INT \ + (ARCMSR_MU_OUTBOUND_MESSAGE0_INT \ + |ARCMSR_MU_OUTBOUND_MESSAGE1_INT \ + |ARCMSR_MU_OUTBOUND_DOORBELL_INT \ + |ARCMSR_MU_OUTBOUND_POSTQUEUE_INT \ + |ARCMSR_MU_OUTBOUND_PCI_INT) +/* +******************************************************************************* +** Outbound Interrupt Mask Register - OIMR +******************************************************************************* +*/ +#define ARCMSR_MU_OUTBOUND_INTERRUPT_MASK_REG 0x34 +#define ARCMSR_MU_OUTBOUND_PCI_INTMASKENABLE 0x10 +#define ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE 0x08 +#define ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE 0x04 +#define ARCMSR_MU_OUTBOUND_MESSAGE1_INTMASKENABLE 0x02 +#define ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE 0x01 +#define ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE 0x1F + +extern void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb); +extern struct class_device_attribute *arcmsr_host_attrs[]; +extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb); +void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb); + diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c new file mode 100644 index 000000000000..0459f4194d7c --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -0,0 +1,392 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr_attr.c +** BY : Erich Chen +** Description: attributes exported to sysfs and device host +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +** For history of changes, see Documentation/scsi/ChangeLog.arcmsr +** Firmware Specification, see Documentation/scsi/arcmsr_spec.txt +******************************************************************************* +*/ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "arcmsr.h" + +struct class_device_attribute *arcmsr_host_attrs[]; + +static ssize_t +arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + uint8_t *pQbuffer,*ptmpQbuffer; + int32_t allxfer_len = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + /* do message unit read. */ + ptmpQbuffer = (uint8_t *)buf; + while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex) + && (allxfer_len < 1031)) { + pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex]; + memcpy(ptmpQbuffer, pQbuffer, 1); + acb->rqbuf_firstindex++; + acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + ptmpQbuffer++; + allxfer_len++; + } + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *) + ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t iop_len; + + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + iop_len = readl(&prbuffer->data_len); + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + return (allxfer_len); +} + +static ssize_t +arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex; + uint8_t *pQbuffer, *ptmpuserbuffer; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (count > 1032) + return -EINVAL; + /* do message unit write. */ + ptmpuserbuffer = (uint8_t *)buf; + user_len = (int32_t)count; + wqbuf_lastindex = acb->wqbuf_lastindex; + wqbuf_firstindex = acb->wqbuf_firstindex; + if (wqbuf_lastindex != wqbuf_firstindex) { + arcmsr_post_Qbuffer(acb); + return 0; /*need retry*/ + } else { + my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= user_len) { + while (user_len > 0) { + pQbuffer = + &acb->wqbuffer[acb->wqbuf_lastindex]; + memcpy(pQbuffer, ptmpuserbuffer, 1); + acb->wqbuf_lastindex++; + acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + ptmpuserbuffer++; + user_len--; + } + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) { + acb->acb_flags &= + ~ACB_F_MESSAGE_WQBUFFER_CLEARED; + arcmsr_post_Qbuffer(acb); + } + return count; + } else { + return 0; /*need retry*/ + } + } +} + +static ssize_t +arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + uint8_t *pQbuffer; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED + | ACB_F_MESSAGE_RQBUFFER_CLEARED + | ACB_F_MESSAGE_WQBUFFER_READED); + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + pQbuffer = acb->rqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pQbuffer = acb->wqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + return 1; +} + +static struct bin_attribute arcmsr_sysfs_message_read_attr = { + .attr = { + .name = "mu_read", + .mode = S_IRUSR , + .owner = THIS_MODULE, + }, + .size = 1032, + .read = arcmsr_sysfs_iop_message_read, +}; + +static struct bin_attribute arcmsr_sysfs_message_write_attr = { + .attr = { + .name = "mu_write", + .mode = S_IWUSR, + .owner = THIS_MODULE, + }, + .size = 1032, + .write = arcmsr_sysfs_iop_message_write, +}; + +static struct bin_attribute arcmsr_sysfs_message_clear_attr = { + .attr = { + .name = "mu_clear", + .mode = S_IWUSR, + .owner = THIS_MODULE, + }, + .size = 1, + .write = arcmsr_sysfs_iop_message_clear, +}; + +int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb) +{ + struct Scsi_Host *host = acb->host; + int error; + + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_read failed\n"); + goto error_bin_file_message_read; + } + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_write failed\n"); + goto error_bin_file_message_write; + } + error = sysfs_create_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_clear_attr); + if (error) { + printk(KERN_ERR "arcmsr: alloc sysfs mu_clear failed\n"); + goto error_bin_file_message_clear; + } + return 0; +error_bin_file_message_clear: + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) + printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_write failed\n"); +error_bin_file_message_write: + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) + printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_read failed\n"); +error_bin_file_message_read: + return error; +} + +void +arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) { + struct Scsi_Host *host = acb->host; + int error; + + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_clear_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfs mu_clear failed\n"); + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_write_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfs mu_write failed\n"); + error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + &arcmsr_sysfs_message_read_attr); + if (error) + printk(KERN_ERR "arcmsr: free sysfss mu_read failed\n"); +} + + +static ssize_t +arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) { + return snprintf(buf, PAGE_SIZE, + "ARCMSR: %s\n", + ARCMSR_DRIVER_VERSION); +} + +static ssize_t +arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "Current commands posted: %4d\n", + atomic_read(&acb->ccboutstandingcount)); +} + +static ssize_t +arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "SCSI Host Resets: %4d\n", + acb->num_resets); +} + +static ssize_t +arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "SCSI Aborts/Timeouts: %4d\n", + acb->num_aborts); +} + +static ssize_t +arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + return snprintf(buf, PAGE_SIZE, + "Adapter Model: %s\n", + acb->firm_model); +} + +static ssize_t +arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Firmware Version: %s\n", + acb->firm_version); +} + +static ssize_t +arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Reguest Lenth: %4d\n", + acb->firm_request_len); +} + +static ssize_t +arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Numbers of Queue: %4d\n", + acb->firm_numbers_queue); +} + +static ssize_t +arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "SDRAM Size: %4d\n", + acb->firm_sdram_size); +} + +static ssize_t +arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) { + struct Scsi_Host *host = class_to_shost(cdev); + struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; + + return snprintf(buf, PAGE_SIZE, + "Hard Disk Channels: %4d\n", + acb->firm_hd_channels); +} + +static CLASS_DEVICE_ATTR(host_driver_version, S_IRUGO, arcmsr_attr_host_driver_version, NULL); +static CLASS_DEVICE_ATTR(host_driver_posted_cmd, S_IRUGO, arcmsr_attr_host_driver_posted_cmd, NULL); +static CLASS_DEVICE_ATTR(host_driver_reset, S_IRUGO, arcmsr_attr_host_driver_reset, NULL); +static CLASS_DEVICE_ATTR(host_driver_abort, S_IRUGO, arcmsr_attr_host_driver_abort, NULL); +static CLASS_DEVICE_ATTR(host_fw_model, S_IRUGO, arcmsr_attr_host_fw_model, NULL); +static CLASS_DEVICE_ATTR(host_fw_version, S_IRUGO, arcmsr_attr_host_fw_version, NULL); +static CLASS_DEVICE_ATTR(host_fw_request_len, S_IRUGO, arcmsr_attr_host_fw_request_len, NULL); +static CLASS_DEVICE_ATTR(host_fw_numbers_queue, S_IRUGO, arcmsr_attr_host_fw_numbers_queue, NULL); +static CLASS_DEVICE_ATTR(host_fw_sdram_size, S_IRUGO, arcmsr_attr_host_fw_sdram_size, NULL); +static CLASS_DEVICE_ATTR(host_fw_hd_channels, S_IRUGO, arcmsr_attr_host_fw_hd_channels, NULL); + +struct class_device_attribute *arcmsr_host_attrs[] = { + &class_device_attr_host_driver_version, + &class_device_attr_host_driver_posted_cmd, + &class_device_attr_host_driver_reset, + &class_device_attr_host_driver_abort, + &class_device_attr_host_fw_model, + &class_device_attr_host_fw_version, + &class_device_attr_host_fw_request_len, + &class_device_attr_host_fw_numbers_queue, + &class_device_attr_host_fw_sdram_size, + &class_device_attr_host_fw_hd_channels, + NULL, +}; diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c new file mode 100644 index 000000000000..475f978ff8f0 --- /dev/null +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -0,0 +1,1496 @@ +/* +******************************************************************************* +** O.S : Linux +** FILE NAME : arcmsr_hba.c +** BY : Erich Chen +** Description: SCSI RAID Device Driver for +** ARECA RAID Host adapter +******************************************************************************* +** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved +** +** Web site: www.areca.com.tw +** E-mail: erich@areca.com.tw +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License version 2 as +** published by the Free Software Foundation. +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +******************************************************************************* +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +******************************************************************************* +** For history of changes, see Documentation/scsi/ChangeLog.arcmsr +** Firmware Specification, see Documentation/scsi/arcmsr_spec.txt +******************************************************************************* +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "arcmsr.h" + +MODULE_AUTHOR("Erich Chen "); +MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(ARCMSR_DRIVER_VERSION); + +static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd); +static int arcmsr_abort(struct scsi_cmnd *); +static int arcmsr_bus_reset(struct scsi_cmnd *); +static int arcmsr_bios_param(struct scsi_device *sdev, + struct block_device *bdev, sector_t capacity, int *info); +static int arcmsr_queue_command(struct scsi_cmnd * cmd, + void (*done) (struct scsi_cmnd *)); +static int arcmsr_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void arcmsr_remove(struct pci_dev *pdev); +static void arcmsr_shutdown(struct pci_dev *pdev); +static void arcmsr_iop_init(struct AdapterControlBlock *acb); +static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb); +static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb); +static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb); +static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb); +static const char *arcmsr_info(struct Scsi_Host *); +static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb); + +static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth) +{ + if (queue_depth > ARCMSR_MAX_CMD_PERLUN) + queue_depth = ARCMSR_MAX_CMD_PERLUN; + scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth); + return queue_depth; +} + +static struct scsi_host_template arcmsr_scsi_host_template = { + .module = THIS_MODULE, + .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, + .info = arcmsr_info, + .queuecommand = arcmsr_queue_command, + .eh_abort_handler = arcmsr_abort, + .eh_bus_reset_handler = arcmsr_bus_reset, + .bios_param = arcmsr_bios_param, + .change_queue_depth = arcmsr_adjust_disk_queue_depth, + .can_queue = ARCMSR_MAX_OUTSTANDING_CMD, + .this_id = ARCMSR_SCSI_INITIATOR_ID, + .sg_tablesize = ARCMSR_MAX_SG_ENTRIES, + .max_sectors = ARCMSR_MAX_XFER_SECTORS, + .cmd_per_lun = ARCMSR_MAX_CMD_PERLUN, + .use_clustering = ENABLE_CLUSTERING, + .shost_attrs = arcmsr_host_attrs, +}; + +static struct pci_device_id arcmsr_device_id_table[] = { + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1120)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1130)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1160)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1170)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1210)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1220)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1230)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1260)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1270)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1280)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1380)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1381)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1680)}, + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1681)}, + {0, 0}, /* Terminating entry */ +}; +MODULE_DEVICE_TABLE(pci, arcmsr_device_id_table); +static struct pci_driver arcmsr_pci_driver = { + .name = "arcmsr", + .id_table = arcmsr_device_id_table, + .probe = arcmsr_probe, + .remove = arcmsr_remove, + .shutdown = arcmsr_shutdown +}; + +static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + irqreturn_t handle_state; + struct AdapterControlBlock *acb; + unsigned long flags; + + acb = (struct AdapterControlBlock *)dev_id; + + spin_lock_irqsave(acb->host->host_lock, flags); + handle_state = arcmsr_interrupt(acb); + spin_unlock_irqrestore(acb->host->host_lock, flags); + return handle_state; +} + +static int arcmsr_bios_param(struct scsi_device *sdev, + struct block_device *bdev, sector_t capacity, int *geom) +{ + int ret, heads, sectors, cylinders, total_capacity; + unsigned char *buffer;/* return copy of block device's partition table */ + + buffer = scsi_bios_ptable(bdev); + if (buffer) { + ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]); + kfree(buffer); + if (ret != -1) + return ret; + } + total_capacity = capacity; + heads = 64; + sectors = 32; + cylinders = total_capacity / (heads * sectors); + if (cylinders > 1024) { + heads = 255; + sectors = 63; + cylinders = total_capacity / (heads * sectors); + } + geom[0] = heads; + geom[1] = sectors; + geom[2] = cylinders; + return 0; +} + +static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb) +{ + struct pci_dev *pdev = acb->pdev; + struct MessageUnit __iomem *reg = acb->pmu; + u32 ccb_phyaddr_hi32; + void *dma_coherent; + dma_addr_t dma_coherent_handle, dma_addr; + struct CommandControlBlock *ccb_tmp; + int i, j; + + dma_coherent = dma_alloc_coherent(&pdev->dev, + ARCMSR_MAX_FREECCB_NUM * + sizeof (struct CommandControlBlock) + 0x20, + &dma_coherent_handle, GFP_KERNEL); + if (!dma_coherent) + return -ENOMEM; + + acb->dma_coherent = dma_coherent; + acb->dma_coherent_handle = dma_coherent_handle; + + if (((unsigned long)dma_coherent & 0x1F)) { + dma_coherent = dma_coherent + + (0x20 - ((unsigned long)dma_coherent & 0x1F)); + dma_coherent_handle = dma_coherent_handle + + (0x20 - ((unsigned long)dma_coherent_handle & 0x1F)); + } + + dma_addr = dma_coherent_handle; + ccb_tmp = (struct CommandControlBlock *)dma_coherent; + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + ccb_tmp->cdb_shifted_phyaddr = dma_addr >> 5; + ccb_tmp->acb = acb; + acb->pccb_pool[i] = ccb_tmp; + list_add_tail(&ccb_tmp->list, &acb->ccb_free_list); + dma_addr = dma_addr + sizeof (struct CommandControlBlock); + ccb_tmp++; + } + + acb->vir2phy_offset = (unsigned long)ccb_tmp - + (unsigned long)dma_addr; + for (i = 0; i < ARCMSR_MAX_TARGETID; i++) + for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++) + acb->devstate[i][j] = ARECA_RAID_GOOD; + + /* + ** here we need to tell iop 331 our ccb_tmp.HighPart + ** if ccb_tmp.HighPart is not zero + */ + ccb_phyaddr_hi32 = (uint32_t) ((dma_coherent_handle >> 16) >> 16); + if (ccb_phyaddr_hi32 != 0) { + writel(ARCMSR_SIGNATURE_SET_CONFIG, ®->message_rwbuffer[0]); + writel(ccb_phyaddr_hi32, ®->message_rwbuffer[1]); + writel(ARCMSR_INBOUND_MESG0_SET_CONFIG, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE "arcmsr%d: " + "'set ccb high part physical address' timeout\n", + acb->host->host_no); + } + + writel(readl(®->outbound_intmask) | + ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + return 0; +} + +static int arcmsr_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct Scsi_Host *host; + struct AdapterControlBlock *acb; + uint8_t bus, dev_fun; + int error; + + error = pci_enable_device(pdev); + if (error) + goto out; + pci_set_master(pdev); + + host = scsi_host_alloc(&arcmsr_scsi_host_template, + sizeof(struct AdapterControlBlock)); + if (!host) { + error = -ENOMEM; + goto out_disable_device; + } + acb = (struct AdapterControlBlock *)host->hostdata; + memset(acb, 0, sizeof (struct AdapterControlBlock)); + + error = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + if (error) { + error = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (error) { + printk(KERN_WARNING + "scsi%d: No suitable DMA mask available\n", + host->host_no); + goto out_host_put; + } + } + bus = pdev->bus->number; + dev_fun = pdev->devfn; + acb->host = host; + acb->pdev = pdev; + host->max_sectors = ARCMSR_MAX_XFER_SECTORS; + host->max_lun = ARCMSR_MAX_TARGETLUN; + host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/ + host->max_cmd_len = 16; /*this is issue of 64bit LBA, over 2T byte*/ + host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES; + host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */ + host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN; + host->this_id = ARCMSR_SCSI_INITIATOR_ID; + host->unique_id = (bus << 8) | dev_fun; + host->irq = pdev->irq; + error = pci_request_regions(pdev, "arcmsr"); + if (error) + goto out_host_put; + + acb->pmu = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!acb->pmu) { + printk(KERN_NOTICE "arcmsr%d: memory" + " mapping region fail \n", acb->host->host_no); + goto out_release_regions; + } + acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED | + ACB_F_MESSAGE_RQBUFFER_CLEARED | + ACB_F_MESSAGE_WQBUFFER_READED); + acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER; + INIT_LIST_HEAD(&acb->ccb_free_list); + + error = arcmsr_alloc_ccb_pool(acb); + if (error) + goto out_iounmap; + + error = request_irq(pdev->irq, arcmsr_do_interrupt, + SA_INTERRUPT | SA_SHIRQ, "arcmsr", acb); + if (error) + goto out_free_ccb_pool; + + arcmsr_iop_init(acb); + pci_set_drvdata(pdev, host); + + error = scsi_add_host(host, &pdev->dev); + if (error) + goto out_free_irq; + + error = arcmsr_alloc_sysfs_attr(acb); + if (error) + goto out_free_sysfs; + + scsi_scan_host(host); + return 0; + out_free_sysfs: + out_free_irq: + free_irq(pdev->irq, acb); + out_free_ccb_pool: + arcmsr_free_ccb_pool(acb); + out_iounmap: + iounmap(acb->pmu); + out_release_regions: + pci_release_regions(pdev); + out_host_put: + scsi_host_put(host); + out_disable_device: + pci_disable_device(pdev); + out: + return error; +} + +static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + + writel(ARCMSR_INBOUND_MESG0_ABORT_CMD, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'abort all outstanding command' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb) +{ + struct AdapterControlBlock *acb = ccb->acb; + struct scsi_cmnd *pcmd = ccb->pcmd; + + if (pcmd->use_sg != 0) { + struct scatterlist *sl; + + sl = (struct scatterlist *)pcmd->request_buffer; + pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction); + } + else if (pcmd->request_bufflen != 0) + pci_unmap_single(acb->pdev, + pcmd->SCp.dma_handle, + pcmd->request_bufflen, pcmd->sc_data_direction); +} + +static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag) +{ + struct AdapterControlBlock *acb = ccb->acb; + struct scsi_cmnd *pcmd = ccb->pcmd; + + arcmsr_pci_unmap_dma(ccb); + if (stand_flag == 1) + atomic_dec(&acb->ccboutstandingcount); + ccb->startdone = ARCMSR_CCB_DONE; + ccb->ccb_flags = 0; + list_add_tail(&ccb->list, &acb->ccb_free_list); + pcmd->scsi_done(pcmd); +} + +static void arcmsr_remove(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + struct MessageUnit __iomem *reg = acb->pmu; + int poll_count = 0; + + arcmsr_free_sysfs_attr(acb); + scsi_remove_host(host); + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); + writel(readl(®->outbound_intmask) | + ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + acb->acb_flags |= ACB_F_SCSISTOPADAPTER; + acb->acb_flags &= ~ACB_F_IOP_INITED; + + for (poll_count = 0; poll_count < 256; poll_count++) { + if (!atomic_read(&acb->ccboutstandingcount)) + break; + arcmsr_interrupt(acb); + msleep(25); + } + + if (atomic_read(&acb->ccboutstandingcount)) { + int i; + + arcmsr_abort_allcmd(acb); + for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) + readl(®->outbound_queueport); + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + struct CommandControlBlock *ccb = acb->pccb_pool[i]; + if (ccb->startdone == ARCMSR_CCB_START) { + ccb->startdone = ARCMSR_CCB_ABORTED; + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + } + } + } + + free_irq(pdev->irq, acb); + iounmap(acb->pmu); + arcmsr_free_ccb_pool(acb); + pci_release_regions(pdev); + + scsi_host_put(host); + + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static void arcmsr_shutdown(struct pci_dev *pdev) +{ + struct Scsi_Host *host = pci_get_drvdata(pdev); + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)host->hostdata; + + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); +} + +static int arcmsr_module_init(void) +{ + int error = 0; + + error = pci_register_driver(&arcmsr_pci_driver); + return error; +} + +static void arcmsr_module_exit(void) +{ + pci_unregister_driver(&arcmsr_pci_driver); +} +module_init(arcmsr_module_init); +module_exit(arcmsr_module_exit); + +static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + u32 orig_mask = readl(®->outbound_intmask); + + writel(orig_mask | ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, + ®->outbound_intmask); + return orig_mask; +} + +static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb, + u32 orig_mask) +{ + struct MessageUnit __iomem *reg = acb->pmu; + u32 mask; + + mask = orig_mask & ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE | + ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE); + writel(mask, ®->outbound_intmask); +} + +static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg=acb->pmu; + + writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'flush adapter cache' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_report_sense_info(struct CommandControlBlock *ccb) +{ + struct scsi_cmnd *pcmd = ccb->pcmd; + struct SENSE_DATA *sensebuffer = (struct SENSE_DATA *)pcmd->sense_buffer; + + pcmd->result = DID_OK << 16; + if (sensebuffer) { + int sense_data_length = + sizeof (struct SENSE_DATA) < sizeof (pcmd->sense_buffer) + ? sizeof (struct SENSE_DATA) : sizeof (pcmd->sense_buffer); + memset(sensebuffer, 0, sizeof (pcmd->sense_buffer)); + memcpy(sensebuffer, ccb->arcmsr_cdb.SenseData, sense_data_length); + sensebuffer->ErrorCode = SCSI_SENSE_CURRENT_ERRORS; + sensebuffer->Valid = 1; + } +} + +static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t Index; + uint8_t Retries = 0x00; + + do { + for (Index = 0; Index < 100; Index++) { + if (readl(®->outbound_intstatus) + & ARCMSR_MU_OUTBOUND_MESSAGE0_INT) { + writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT + , ®->outbound_intstatus); + return 0x00; + } + msleep_interruptible(10); + }/*max 1 seconds*/ + } while (Retries++ < 20);/*max 20 sec*/ + return 0xff; +} + +static void arcmsr_build_ccb(struct AdapterControlBlock *acb, + struct CommandControlBlock *ccb, struct scsi_cmnd *pcmd) +{ + struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb; + int8_t *psge = (int8_t *)&arcmsr_cdb->u; + uint32_t address_lo, address_hi; + int arccdbsize = 0x30; + + ccb->pcmd = pcmd; + memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB)); + arcmsr_cdb->Bus = 0; + arcmsr_cdb->TargetID = pcmd->device->id; + arcmsr_cdb->LUN = pcmd->device->lun; + arcmsr_cdb->Function = 1; + arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len; + arcmsr_cdb->Context = (unsigned long)arcmsr_cdb; + memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len); + if (pcmd->use_sg) { + int length, sgcount, i, cdb_sgcount = 0; + struct scatterlist *sl; + + /* Get Scatter Gather List from scsiport. */ + sl = (struct scatterlist *) pcmd->request_buffer; + sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg, + pcmd->sc_data_direction); + /* map stor port SG list to our iop SG List. */ + for (i = 0; i < sgcount; i++) { + /* Get the physical address of the current data pointer */ + length = cpu_to_le32(sg_dma_len(sl)); + address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl))); + address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl))); + if (address_hi == 0) { + struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; + + pdma_sg->address = address_lo; + pdma_sg->length = length; + psge += sizeof (struct SG32ENTRY); + arccdbsize += sizeof (struct SG32ENTRY); + } else { + struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; + + pdma_sg->addresshigh = address_hi; + pdma_sg->address = address_lo; + pdma_sg->length = length|IS_SG64_ADDR; + psge += sizeof (struct SG64ENTRY); + arccdbsize += sizeof (struct SG64ENTRY); + } + sl++; + cdb_sgcount++; + } + arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount; + arcmsr_cdb->DataLength = pcmd->request_bufflen; + if ( arccdbsize > 256) + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE; + } else if (pcmd->request_bufflen) { + dma_addr_t dma_addr; + dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer, + pcmd->request_bufflen, pcmd->sc_data_direction); + pcmd->SCp.dma_handle = dma_addr; + address_lo = cpu_to_le32(dma_addr_lo32(dma_addr)); + address_hi = cpu_to_le32(dma_addr_hi32(dma_addr)); + if (address_hi == 0) { + struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; + pdma_sg->address = address_lo; + pdma_sg->length = pcmd->request_bufflen; + } else { + struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; + pdma_sg->addresshigh = address_hi; + pdma_sg->address = address_lo; + pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR; + } + arcmsr_cdb->sgcount = 1; + arcmsr_cdb->DataLength = pcmd->request_bufflen; + } + if (pcmd->sc_data_direction == DMA_TO_DEVICE ) { + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE; + ccb->ccb_flags |= CCB_FLAG_WRITE; + } +} + +static void arcmsr_post_ccb(struct AdapterControlBlock *acb, struct CommandControlBlock *ccb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t cdb_shifted_phyaddr = ccb->cdb_shifted_phyaddr; + struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb; + + atomic_inc(&acb->ccboutstandingcount); + ccb->startdone = ARCMSR_CCB_START; + if (arcmsr_cdb->Flags & ARCMSR_CDB_FLAG_SGL_BSIZE) + writel(cdb_shifted_phyaddr | ARCMSR_CCBPOST_FLAG_SGL_BSIZE, + ®->inbound_queueport); + else + writel(cdb_shifted_phyaddr, ®->inbound_queueport); +} + +void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct QBUFFER __iomem *pwbuffer = (struct QBUFFER __iomem *) ®->message_wbuffer; + uint8_t __iomem *iop_data = (uint8_t __iomem *) pwbuffer->data; + int32_t allxfer_len = 0; + + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READED) { + acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED); + while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex) + && (allxfer_len < 124)) { + writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data); + acb->wqbuf_firstindex++; + acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + allxfer_len++; + } + writel(allxfer_len, &pwbuffer->data_len); + writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK + , ®->inbound_doorbell); + } +} + +static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + + acb->acb_flags &= ~ACB_F_MSG_START_BGRB; + writel(ARCMSR_INBOUND_MESG0_STOP_BGRB, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait 'stop adapter background rebulid' timeout \n" + , acb->host->host_no); +} + +static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb) +{ + dma_free_coherent(&acb->pdev->dev, + ARCMSR_MAX_FREECCB_NUM * sizeof (struct CommandControlBlock) + 0x20, + acb->dma_coherent, + acb->dma_coherent_handle); +} + +static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t flag_ccb, outbound_intstatus, outbound_doorbell; + + outbound_intstatus = readl(®->outbound_intstatus) + & acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus); + if (outbound_intstatus & ARCMSR_MU_OUTBOUND_DOORBELL_INT) { + outbound_doorbell = readl(®->outbound_doorbell); + writel(outbound_doorbell, ®->outbound_doorbell); + if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK) { + struct QBUFFER __iomem * prbuffer = + (struct QBUFFER __iomem *) ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t my_empty_len, iop_len, rqbuf_firstindex, rqbuf_lastindex; + + rqbuf_lastindex = acb->rqbuf_lastindex; + rqbuf_firstindex = acb->rqbuf_firstindex; + iop_len = readl(&prbuffer->data_len); + my_empty_len = (rqbuf_firstindex - rqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= iop_len) { + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } else + acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; + } + if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_READ_OK) { + acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_READED; + if (acb->wqbuf_firstindex != acb->wqbuf_lastindex) { + struct QBUFFER __iomem * pwbuffer = + (struct QBUFFER __iomem *) ®->message_wbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *) pwbuffer->data; + int32_t allxfer_len = 0; + + acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED); + while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex) + && (allxfer_len < 124)) { + writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data); + acb->wqbuf_firstindex++; + acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + allxfer_len++; + } + writel(allxfer_len, &pwbuffer->data_len); + writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK, + ®->inbound_doorbell); + } + if (acb->wqbuf_firstindex == acb->wqbuf_lastindex) + acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_CLEARED; + } + } + if (outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT) { + int id, lun; + /* + **************************************************************** + ** areca cdb command done + **************************************************************** + */ + while (1) { + if ((flag_ccb = readl(®->outbound_queueport)) == 0xFFFFFFFF) + break;/*chip FIFO no ccb for completion already*/ + /* check if command done with no error*/ + ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + + (flag_ccb << 5)); + if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) { + if (ccb->startdone == ARCMSR_CCB_ABORTED) { + struct scsi_cmnd *abortcmd=ccb->pcmd; + if (abortcmd) { + abortcmd->result |= DID_ABORT >> 16; + arcmsr_ccb_complete(ccb, 1); + printk(KERN_NOTICE + "arcmsr%d: ccb='0x%p' isr got aborted command \n" + , acb->host->host_no, ccb); + } + continue; + } + printk(KERN_NOTICE + "arcmsr%d: isr get an illegal ccb command done acb='0x%p'" + "ccb='0x%p' ccbacb='0x%p' startdone = 0x%x" + " ccboutstandingcount=%d \n" + , acb->host->host_no + , acb + , ccb + , ccb->acb + , ccb->startdone + , atomic_read(&acb->ccboutstandingcount)); + continue; + } + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_TIME_OUT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_ABORTED: + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + default: + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d" + " isr get command error done," + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no + , id + , lun + , ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + }/*drain reply FIFO*/ + } + if (!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT)) + return IRQ_NONE; + return IRQ_HANDLED; +} + +static void arcmsr_iop_parking(struct AdapterControlBlock *acb) +{ + if (acb) { + /* stop adapter background rebuild */ + if (acb->acb_flags & ACB_F_MSG_START_BGRB) { + acb->acb_flags &= ~ACB_F_MSG_START_BGRB; + arcmsr_stop_adapter_bgrb(acb); + arcmsr_flush_adapter_cache(acb); + } + } +} + +static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CMD_MESSAGE_FIELD *pcmdmessagefld; + int retvalue = 0, transfer_len = 0; + char *buffer; + uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 | + (uint32_t ) cmd->cmnd[6] << 16 | + (uint32_t ) cmd->cmnd[7] << 8 | + (uint32_t ) cmd->cmnd[8]; + /* 4 bytes: Areca io control code */ + if (cmd->use_sg) { + struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; + + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + if (cmd->use_sg > 1) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + transfer_len += sg->length; + } else { + buffer = cmd->request_buffer; + transfer_len = cmd->request_bufflen; + } + if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + pcmdmessagefld = (struct CMD_MESSAGE_FIELD *) buffer; + switch(controlcode) { + case ARCMSR_MESSAGE_READ_RQBUFFER: { + unsigned long *ver_addr; + dma_addr_t buf_handle; + uint8_t *pQbuffer, *ptmpQbuffer; + int32_t allxfer_len = 0; + + ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle); + if (!ver_addr) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + ptmpQbuffer = (uint8_t *) ver_addr; + while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex) + && (allxfer_len < 1031)) { + pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex]; + memcpy(ptmpQbuffer, pQbuffer, 1); + acb->rqbuf_firstindex++; + acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + ptmpQbuffer++; + allxfer_len++; + } + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *) + ®->message_rbuffer; + uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data; + int32_t iop_len; + + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + iop_len = readl(&prbuffer->data_len); + while (iop_len > 0) { + acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data); + acb->rqbuf_lastindex++; + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + iop_data++; + iop_len--; + } + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + memcpy(pcmdmessagefld->messagedatabuffer, + (uint8_t *)ver_addr, allxfer_len); + pcmdmessagefld->cmdmessage.Length = allxfer_len; + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle); + } + break; + case ARCMSR_MESSAGE_WRITE_WQBUFFER: { + unsigned long *ver_addr; + dma_addr_t buf_handle; + int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex; + uint8_t *pQbuffer, *ptmpuserbuffer; + + ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle); + if (!ver_addr) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + ptmpuserbuffer = (uint8_t *)ver_addr; + user_len = pcmdmessagefld->cmdmessage.Length; + memcpy(ptmpuserbuffer, pcmdmessagefld->messagedatabuffer, user_len); + wqbuf_lastindex = acb->wqbuf_lastindex; + wqbuf_firstindex = acb->wqbuf_firstindex; + if (wqbuf_lastindex != wqbuf_firstindex) { + struct SENSE_DATA *sensebuffer = + (struct SENSE_DATA *)cmd->sense_buffer; + arcmsr_post_Qbuffer(acb); + /* has error report sensedata */ + sensebuffer->ErrorCode = 0x70; + sensebuffer->SenseKey = ILLEGAL_REQUEST; + sensebuffer->AdditionalSenseLength = 0x0A; + sensebuffer->AdditionalSenseCode = 0x20; + sensebuffer->Valid = 1; + retvalue = ARCMSR_MESSAGE_FAIL; + } else { + my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1) + &(ARCMSR_MAX_QBUFFER - 1); + if (my_empty_len >= user_len) { + while (user_len > 0) { + pQbuffer = + &acb->wqbuffer[acb->wqbuf_lastindex]; + memcpy(pQbuffer, ptmpuserbuffer, 1); + acb->wqbuf_lastindex++; + acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + ptmpuserbuffer++; + user_len--; + } + if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) { + acb->acb_flags &= + ~ACB_F_MESSAGE_WQBUFFER_CLEARED; + arcmsr_post_Qbuffer(acb); + } + } else { + /* has error report sensedata */ + struct SENSE_DATA *sensebuffer = + (struct SENSE_DATA *)cmd->sense_buffer; + sensebuffer->ErrorCode = 0x70; + sensebuffer->SenseKey = ILLEGAL_REQUEST; + sensebuffer->AdditionalSenseLength = 0x0A; + sensebuffer->AdditionalSenseCode = 0x20; + sensebuffer->Valid = 1; + retvalue = ARCMSR_MESSAGE_FAIL; + } + } + pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle); + } + break; + case ARCMSR_MESSAGE_CLEAR_RQBUFFER: { + uint8_t *pQbuffer = acb->rqbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, + ®->inbound_doorbell); + } + acb->acb_flags |= ACB_F_MESSAGE_RQBUFFER_CLEARED; + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER); + pcmdmessagefld->cmdmessage.ReturnCode = + ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_CLEAR_WQBUFFER: { + uint8_t *pQbuffer = acb->wqbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED | + ACB_F_MESSAGE_WQBUFFER_READED); + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER); + pcmdmessagefld->cmdmessage.ReturnCode = + ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_CLEAR_ALLQBUFFER: { + uint8_t *pQbuffer; + + if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) { + acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK + , ®->inbound_doorbell); + } + acb->acb_flags |= + (ACB_F_MESSAGE_WQBUFFER_CLEARED + | ACB_F_MESSAGE_RQBUFFER_CLEARED + | ACB_F_MESSAGE_WQBUFFER_READED); + acb->rqbuf_firstindex = 0; + acb->rqbuf_lastindex = 0; + acb->wqbuf_firstindex = 0; + acb->wqbuf_lastindex = 0; + pQbuffer = acb->rqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pQbuffer = acb->wqbuffer; + memset(pQbuffer, 0, sizeof (struct QBUFFER)); + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_RETURN_CODE_3F: { + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_3F; + } + break; + case ARCMSR_MESSAGE_SAY_HELLO: { + int8_t * hello_string = "Hello! I am ARCMSR"; + + memcpy(pcmdmessagefld->messagedatabuffer, hello_string + , (int16_t)strlen(hello_string)); + pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; + } + break; + case ARCMSR_MESSAGE_SAY_GOODBYE: + arcmsr_iop_parking(acb); + break; + case ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE: + arcmsr_flush_adapter_cache(acb); + break; + default: + retvalue = ARCMSR_MESSAGE_FAIL; + } + message_out: + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + kunmap_atomic(buffer - sg->offset, KM_IRQ0); + } + return retvalue; +} + +static struct CommandControlBlock *arcmsr_get_freeccb(struct AdapterControlBlock *acb) +{ + struct list_head *head = &acb->ccb_free_list; + struct CommandControlBlock *ccb = NULL; + + if (!list_empty(head)) { + ccb = list_entry(head->next, struct CommandControlBlock, list); + list_del(head->next); + } + return ccb; +} + +static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb, + struct scsi_cmnd *cmd) +{ + switch (cmd->cmnd[0]) { + case INQUIRY: { + unsigned char inqdata[36]; + char *buffer; + + if (cmd->device->lun) { + cmd->result = (DID_TIME_OUT << 16); + cmd->scsi_done(cmd); + return; + } + inqdata[0] = TYPE_PROCESSOR; + /* Periph Qualifier & Periph Dev Type */ + inqdata[1] = 0; + /* rem media bit & Dev Type Modifier */ + inqdata[2] = 0; + /* ISO,ECMA,& ANSI versions */ + inqdata[4] = 31; + /* length of additional data */ + strncpy(&inqdata[8], "Areca ", 8); + /* Vendor Identification */ + strncpy(&inqdata[16], "RAID controller ", 16); + /* Product Identification */ + strncpy(&inqdata[32], "R001", 4); /* Product Revision */ + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + } else { + buffer = cmd->request_buffer; + } + memcpy(buffer, inqdata, sizeof(inqdata)); + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + kunmap_atomic(buffer - sg->offset, KM_IRQ0); + } + cmd->scsi_done(cmd); + } + break; + case WRITE_BUFFER: + case READ_BUFFER: { + if (arcmsr_iop_message_xfer(acb, cmd)) + cmd->result = (DID_ERROR << 16); + cmd->scsi_done(cmd); + } + break; + default: + cmd->scsi_done(cmd); + } +} + +static int arcmsr_queue_command(struct scsi_cmnd *cmd, + void (* done)(struct scsi_cmnd *)) +{ + struct Scsi_Host *host = cmd->device->host; + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + struct CommandControlBlock *ccb; + int target = cmd->device->id; + int lun = cmd->device->lun; + + cmd->scsi_done = done; + cmd->host_scribble = NULL; + cmd->result = 0; + if (acb->acb_flags & ACB_F_BUS_RESET) { + printk(KERN_NOTICE "arcmsr%d: bus reset" + " and return busy \n" + , acb->host->host_no); + return SCSI_MLQUEUE_HOST_BUSY; + } + if(target == 16) { + /* virtual device for iop message transfer */ + arcmsr_handle_virtual_command(acb, cmd); + return 0; + } + if (acb->devstate[target][lun] == ARECA_RAID_GONE) { + uint8_t block_cmd; + + block_cmd = cmd->cmnd[0] & 0x0f; + if (block_cmd == 0x08 || block_cmd == 0x0a) { + printk(KERN_NOTICE + "arcmsr%d: block 'read/write'" + "command with gone raid volume" + " Cmd=%2x, TargetId=%d, Lun=%d \n" + , acb->host->host_no + , cmd->cmnd[0] + , target, lun); + cmd->result = (DID_NO_CONNECT << 16); + cmd->scsi_done(cmd); + return 0; + } + } + if (atomic_read(&acb->ccboutstandingcount) >= + ARCMSR_MAX_OUTSTANDING_CMD) + return SCSI_MLQUEUE_HOST_BUSY; + + ccb = arcmsr_get_freeccb(acb); + if (!ccb) + return SCSI_MLQUEUE_HOST_BUSY; + arcmsr_build_ccb(acb, ccb, cmd); + arcmsr_post_ccb(acb, ccb); + return 0; +} + +static void arcmsr_get_firmware_spec(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + char *acb_firm_model = acb->firm_model; + char *acb_firm_version = acb->firm_version; + char __iomem *iop_firm_model = (char __iomem *) ®->message_rwbuffer[15]; + char __iomem *iop_firm_version = (char __iomem *) ®->message_rwbuffer[17]; + int count; + + writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) + printk(KERN_NOTICE + "arcmsr%d: wait " + "'get adapter firmware miscellaneous data' timeout \n" + , acb->host->host_no); + count = 8; + while (count) { + *acb_firm_model = readb(iop_firm_model); + acb_firm_model++; + iop_firm_model++; + count--; + } + count = 16; + while (count) { + *acb_firm_version = readb(iop_firm_version); + acb_firm_version++; + iop_firm_version++; + count--; + } + printk(KERN_INFO + "ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n" + , acb->host->host_no + , acb->firm_version); + acb->firm_request_len = readl(®->message_rwbuffer[1]); + acb->firm_numbers_queue = readl(®->message_rwbuffer[2]); + acb->firm_sdram_size = readl(®->message_rwbuffer[3]); + acb->firm_hd_channels = readl(®->message_rwbuffer[4]); +} + +static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb, + struct CommandControlBlock *poll_ccb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t flag_ccb, outbound_intstatus, poll_ccb_done = 0, poll_count = 0; + int id, lun; + + polling_ccb_retry: + poll_count++; + outbound_intstatus = readl(®->outbound_intstatus) + & acb->outbound_int_enable; + writel(outbound_intstatus, ®->outbound_intstatus);/*clear interrupt*/ + while (1) { + if ((flag_ccb = readl(®->outbound_queueport)) == 0xFFFFFFFF) { + if (poll_ccb_done) + break; + else { + msleep(25); + if (poll_count > 100) + break; + goto polling_ccb_retry; + } + } + ccb = (struct CommandControlBlock *) + (acb->vir2phy_offset + (flag_ccb << 5)); + if ((ccb->acb != acb) || + (ccb->startdone != ARCMSR_CCB_START)) { + if ((ccb->startdone == ARCMSR_CCB_ABORTED) || + (ccb == poll_ccb)) { + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d ccb='0x%p'" + " poll command abort successfully \n" + , acb->host->host_no + , ccb->pcmd->device->id + , ccb->pcmd->device->lun + , ccb); + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + poll_ccb_done = 1; + continue; + } + printk(KERN_NOTICE + "arcmsr%d: polling get an illegal ccb" + " command done ccb='0x%p'" + "ccboutstandingcount=%d \n" + , acb->host->host_no + , ccb + , atomic_read(&acb->ccboutstandingcount)); + continue; + } + id = ccb->pcmd->device->id; + lun = ccb->pcmd->device->lun; + if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { + if (acb->devstate[id][lun] == ARECA_RAID_GONE) + acb->devstate[id][lun] = ARECA_RAID_GOOD; + ccb->pcmd->result = DID_OK << 16; + arcmsr_ccb_complete(ccb, 1); + } else { + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_TIME_OUT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_ABORTED: + case ARCMSR_DEV_INIT_FAIL: { + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; + case ARCMSR_DEV_CHECK_CONDITION: { + acb->devstate[id][lun] = ARECA_RAID_GOOD; + arcmsr_report_sense_info(ccb); + arcmsr_ccb_complete(ccb, 1); + } + break; + default: + printk(KERN_NOTICE + "arcmsr%d: scsi id=%d lun=%d" + " polling and getting command error done" + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no + , id + , lun + , ccb->arcmsr_cdb.DeviceStatus); + acb->devstate[id][lun] = ARECA_RAID_GONE; + ccb->pcmd->result = DID_BAD_TARGET << 16; + arcmsr_ccb_complete(ccb, 1); + break; + } + } + } +} + +static void arcmsr_iop_init(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + uint32_t intmask_org, mask, outbound_doorbell, firmware_state = 0; + + do { + firmware_state = readl(®->outbound_msgaddr1); + } while (!(firmware_state & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK)); + intmask_org = readl(®->outbound_intmask) + | ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE; + arcmsr_get_firmware_spec(acb); + + acb->acb_flags |= ACB_F_MSG_START_BGRB; + writel(ARCMSR_INBOUND_MESG0_START_BGRB, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) { + printk(KERN_NOTICE "arcmsr%d: " + "wait 'start adapter background rebulid' timeout\n", + acb->host->host_no); + } + + outbound_doorbell = readl(®->outbound_doorbell); + writel(outbound_doorbell, ®->outbound_doorbell); + writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, ®->inbound_doorbell); + mask = ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE + | ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE); + writel(intmask_org & mask, ®->outbound_intmask); + acb->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff; + acb->acb_flags |= ACB_F_IOP_INITED; +} + +static void arcmsr_iop_reset(struct AdapterControlBlock *acb) +{ + struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t intmask_org; + int i = 0; + + if (atomic_read(&acb->ccboutstandingcount) != 0) { + /* talk to iop 331 outstanding command aborted */ + arcmsr_abort_allcmd(acb); + /* wait for 3 sec for all command aborted*/ + msleep_interruptible(3000); + /* disable all outbound interrupt */ + intmask_org = arcmsr_disable_outbound_ints(acb); + /* clear all outbound posted Q */ + for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) + readl(®->outbound_queueport); + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + ccb = acb->pccb_pool[i]; + if ((ccb->startdone == ARCMSR_CCB_START) || + (ccb->startdone == ARCMSR_CCB_ABORTED)) { + ccb->startdone = ARCMSR_CCB_ABORTED; + ccb->pcmd->result = DID_ABORT << 16; + arcmsr_ccb_complete(ccb, 1); + } + } + /* enable all outbound interrupt */ + arcmsr_enable_outbound_ints(acb, intmask_org); + } + atomic_set(&acb->ccboutstandingcount, 0); +} + +static int arcmsr_bus_reset(struct scsi_cmnd *cmd) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)cmd->device->host->hostdata; + int i; + + acb->num_resets++; + acb->acb_flags |= ACB_F_BUS_RESET; + for (i = 0; i < 400; i++) { + if (!atomic_read(&acb->ccboutstandingcount)) + break; + arcmsr_interrupt(acb); + msleep(25); + } + arcmsr_iop_reset(acb); + acb->acb_flags &= ~ACB_F_BUS_RESET; + return SUCCESS; +} + +static void arcmsr_abort_one_cmd(struct AdapterControlBlock *acb, + struct CommandControlBlock *ccb) +{ + u32 intmask; + + ccb->startdone = ARCMSR_CCB_ABORTED; + + /* + ** Wait for 3 sec for all command done. + */ + msleep_interruptible(3000); + + intmask = arcmsr_disable_outbound_ints(acb); + arcmsr_polling_ccbdone(acb, ccb); + arcmsr_enable_outbound_ints(acb, intmask); +} + +static int arcmsr_abort(struct scsi_cmnd *cmd) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *)cmd->device->host->hostdata; + int i = 0; + + printk(KERN_NOTICE + "arcmsr%d: abort device command of scsi id=%d lun=%d \n", + acb->host->host_no, cmd->device->id, cmd->device->lun); + acb->num_aborts++; + + /* + ************************************************ + ** the all interrupt service routine is locked + ** we need to handle it as soon as possible and exit + ************************************************ + */ + if (!atomic_read(&acb->ccboutstandingcount)) + return SUCCESS; + + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + struct CommandControlBlock *ccb = acb->pccb_pool[i]; + if (ccb->startdone == ARCMSR_CCB_START && ccb->pcmd == cmd) { + arcmsr_abort_one_cmd(acb, ccb); + break; + } + } + + return SUCCESS; +} + +static const char *arcmsr_info(struct Scsi_Host *host) +{ + struct AdapterControlBlock *acb = + (struct AdapterControlBlock *) host->hostdata; + static char buf[256]; + char *type; + int raid6 = 1; + + switch (acb->pdev->device) { + case PCI_DEVICE_ID_ARECA_1110: + case PCI_DEVICE_ID_ARECA_1210: + raid6 = 0; + /*FALLTHRU*/ + case PCI_DEVICE_ID_ARECA_1120: + case PCI_DEVICE_ID_ARECA_1130: + case PCI_DEVICE_ID_ARECA_1160: + case PCI_DEVICE_ID_ARECA_1170: + case PCI_DEVICE_ID_ARECA_1220: + case PCI_DEVICE_ID_ARECA_1230: + case PCI_DEVICE_ID_ARECA_1260: + case PCI_DEVICE_ID_ARECA_1270: + case PCI_DEVICE_ID_ARECA_1280: + type = "SATA"; + break; + case PCI_DEVICE_ID_ARECA_1380: + case PCI_DEVICE_ID_ARECA_1381: + case PCI_DEVICE_ID_ARECA_1680: + case PCI_DEVICE_ID_ARECA_1681: + type = "SAS"; + break; + default: + type = "X-TYPE"; + break; + } + sprintf(buf, "Areca %s Host Adapter RAID Controller%s\n %s", + type, raid6 ? "( RAID6 capable)" : "", + ARCMSR_DRIVER_VERSION); + return buf; +} + + diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c09396d2c77b..df7b62676d87 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2004,6 +2004,23 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_ARECA 0x17d3 +#define PCI_DEVICE_ID_ARECA_1110 0x1110 +#define PCI_DEVICE_ID_ARECA_1120 0x1120 +#define PCI_DEVICE_ID_ARECA_1130 0x1130 +#define PCI_DEVICE_ID_ARECA_1160 0x1160 +#define PCI_DEVICE_ID_ARECA_1170 0x1170 +#define PCI_DEVICE_ID_ARECA_1210 0x1210 +#define PCI_DEVICE_ID_ARECA_1220 0x1220 +#define PCI_DEVICE_ID_ARECA_1230 0x1230 +#define PCI_DEVICE_ID_ARECA_1260 0x1260 +#define PCI_DEVICE_ID_ARECA_1270 0x1270 +#define PCI_DEVICE_ID_ARECA_1280 0x1280 +#define PCI_DEVICE_ID_ARECA_1380 0x1380 +#define PCI_DEVICE_ID_ARECA_1381 0x1381 +#define PCI_DEVICE_ID_ARECA_1680 0x1680 +#define PCI_DEVICE_ID_ARECA_1681 0x1681 + #define PCI_VENDOR_ID_S2IO 0x17d5 #define PCI_DEVICE_ID_S2IO_WIN 0x5731 #define PCI_DEVICE_ID_S2IO_UNI 0x5831 From 4288b92b9644fdb4c6168273873fe08f32090d7a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 8 Jul 2006 22:38:56 -0700 Subject: [PATCH 0043/1063] [POWERPC] briq_panel Kconfig fix drivers/char/briq_panel.c:28:22: error: asm/prom.h: No such file or directory Cc: Jeremy Kerr Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Paul Mackerras --- drivers/char/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index a7ef542afbc2..320ad7ba11d4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -497,6 +497,7 @@ config LEGACY_PTY_COUNT config BRIQ_PANEL tristate 'Total Impact briQ front panel driver' + depends on PPC ---help--- The briQ is a small footprint CHRP computer with a frontpanel VFD, a tristate led and two switches. It is the size of a CDROM drive. From a7f67bdf2c9f24509b8e81e0f35573b611987c80 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:35:54 +1000 Subject: [PATCH 0044/1063] [POWERPC] Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powerpc core changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/btext.c | 24 ++++----- arch/powerpc/kernel/ibmebus.c | 6 +-- arch/powerpc/kernel/legacy_serial.c | 35 ++++++------- arch/powerpc/kernel/lparcfg.c | 11 ++--- arch/powerpc/kernel/machine_kexec_64.c | 10 ++-- arch/powerpc/kernel/pci_32.c | 39 ++++++++------- arch/powerpc/kernel/pci_64.c | 28 ++++++----- arch/powerpc/kernel/pci_dn.c | 13 ++--- arch/powerpc/kernel/prom.c | 23 +++++---- arch/powerpc/kernel/prom_parse.c | 68 +++++++++++++------------- arch/powerpc/kernel/rtas-proc.c | 25 +++++----- arch/powerpc/kernel/rtas.c | 28 ++++++----- arch/powerpc/kernel/rtas_pci.c | 22 ++++----- arch/powerpc/kernel/setup-common.c | 19 +++---- arch/powerpc/kernel/setup_64.c | 14 +++--- arch/powerpc/kernel/sysfs.c | 5 +- arch/powerpc/kernel/time.c | 4 +- arch/powerpc/kernel/vio.c | 16 +++--- arch/powerpc/mm/numa.c | 31 ++++++------ arch/powerpc/sysdev/fsl_soc.c | 30 ++++++------ arch/powerpc/sysdev/mmio_nvram.c | 4 +- include/asm-powerpc/ibmebus.h | 2 +- include/asm-powerpc/prom.h | 16 +++--- include/asm-powerpc/vio.h | 4 +- 24 files changed, 239 insertions(+), 238 deletions(-) diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index f4e5e14ee2b6..995fcef156fd 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -158,35 +158,35 @@ int btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; - u32 *prop; + const u32 *prop; - prop = (u32 *)get_property(np, "linux,bootx-width", NULL); + prop = get_property(np, "linux,bootx-width", NULL); if (prop == NULL) - prop = (u32 *)get_property(np, "width", NULL); + prop = get_property(np, "width", NULL); if (prop == NULL) return -EINVAL; width = *prop; - prop = (u32 *)get_property(np, "linux,bootx-height", NULL); + prop = get_property(np, "linux,bootx-height", NULL); if (prop == NULL) - prop = (u32 *)get_property(np, "height", NULL); + prop = get_property(np, "height", NULL); if (prop == NULL) return -EINVAL; height = *prop; - prop = (u32 *)get_property(np, "linux,bootx-depth", NULL); + prop = get_property(np, "linux,bootx-depth", NULL); if (prop == NULL) - prop = (u32 *)get_property(np, "depth", NULL); + prop = get_property(np, "depth", NULL); if (prop == NULL) return -EINVAL; depth = *prop; pitch = width * ((depth + 7) / 8); - prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL); + prop = get_property(np, "linux,bootx-linebytes", NULL); if (prop == NULL) - prop = (u32 *)get_property(np, "linebytes", NULL); + prop = get_property(np, "linebytes", NULL); if (prop) pitch = *prop; if (pitch == 1) pitch = 0x1000; - prop = (u32 *)get_property(np, "address", NULL); + prop = get_property(np, "address", NULL); if (prop) address = *prop; @@ -214,11 +214,11 @@ int btext_initialize(struct device_node *np) int __init btext_find_display(int allow_nonstdout) { - char *name; + const char *name; struct device_node *np = NULL; int rc = -ENODEV; - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + name = get_property(of_chosen, "linux,stdout-path", NULL); if (name != NULL) { np = of_find_node_by_path(name); if (np != NULL) { diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 97ddc02a3d42..d9a0b087fa7f 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -167,7 +167,7 @@ static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name, NULL); static struct ibmebus_dev* __devinit ibmebus_register_device_common( - struct ibmebus_dev *dev, char *name) + struct ibmebus_dev *dev, const char *name) { int err = 0; @@ -194,10 +194,10 @@ static struct ibmebus_dev* __devinit ibmebus_register_device_node( struct device_node *dn) { struct ibmebus_dev *dev; - char *loc_code; + const char *loc_code; int length; - loc_code = (char *)get_property(dn, "ibm,loc-code", NULL); + loc_code = get_property(dn, "ibm,loc-code", NULL); if (!loc_code) { printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n", __FUNCTION__, dn->name ? dn->name : ""); diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 359ab89748e0..ee1e0b8c7f1f 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -39,16 +39,17 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { - u32 *clk, *spd, clock = BASE_BAUD * 16; + const u32 *clk, *spd; + u32 clock = BASE_BAUD * 16; int index; /* get clock freq. if present */ - clk = (u32 *)get_property(np, "clock-frequency", NULL); + clk = get_property(np, "clock-frequency", NULL); if (clk && *clk) clock = *clk; /* get default speed if present */ - spd = (u32 *)get_property(np, "current-speed", NULL); + spd = get_property(np, "current-speed", NULL); /* If we have a location index, then try to use it */ if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS) @@ -113,7 +114,7 @@ static int __init add_legacy_soc_port(struct device_node *np, struct device_node *soc_dev) { u64 addr; - u32 *addrp; + const u32 *addrp; upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; /* We only support ports that have a clock frequency properly @@ -140,15 +141,15 @@ static int __init add_legacy_soc_port(struct device_node *np, static int __init add_legacy_isa_port(struct device_node *np, struct device_node *isa_brg) { - u32 *reg; - char *typep; + const u32 *reg; + const char *typep; int index = -1; u64 taddr; DBG(" -> add_legacy_isa_port(%s)\n", np->full_name); /* Get the ISA port number */ - reg = (u32 *)get_property(np, "reg", NULL); + reg = get_property(np, "reg", NULL); if (reg == NULL) return -1; @@ -159,7 +160,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Now look for an "ibm,aix-loc" property that gives us ordering * if any... */ - typep = (char *)get_property(np, "ibm,aix-loc", NULL); + typep = get_property(np, "ibm,aix-loc", NULL); /* If we have a location index, then use it */ if (typep && *typep == 'S') @@ -184,7 +185,7 @@ static int __init add_legacy_pci_port(struct device_node *np, struct device_node *pci_dev) { u64 addr, base; - u32 *addrp; + const u32 *addrp; unsigned int flags; int iotype, index = -1, lindex = 0; @@ -223,7 +224,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * we get to their "reg" property */ if (np != pci_dev) { - u32 *reg = (u32 *)get_property(np, "reg", NULL); + const u32 *reg = get_property(np, "reg", NULL); if (reg && (*reg < 4)) index = lindex = *reg; } @@ -281,13 +282,13 @@ static void __init setup_legacy_serial_console(int console) void __init find_legacy_serial_ports(void) { struct device_node *np, *stdout = NULL; - char *path; + const char *path; int index; DBG(" -> find_legacy_serial_port()\n"); /* Now find out if one of these is out firmware console */ - path = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + path = get_property(of_chosen, "linux,stdout-path", NULL); if (path != NULL) { stdout = of_find_node_by_path(path); if (stdout) @@ -487,8 +488,8 @@ static int __init check_legacy_serial_console(void) { struct device_node *prom_stdout = NULL; int speed = 0, offset = 0; - char *name; - u32 *spd; + const char *name; + const u32 *spd; DBG(" -> check_legacy_serial_console()\n"); @@ -509,7 +510,7 @@ static int __init check_legacy_serial_console(void) } /* We are getting a weird phandle from OF ... */ /* ... So use the full path instead */ - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + name = get_property(of_chosen, "linux,stdout-path", NULL); if (name == NULL) { DBG(" no linux,stdout-path !\n"); return -ENODEV; @@ -521,12 +522,12 @@ static int __init check_legacy_serial_console(void) } DBG("stdout is %s\n", prom_stdout->full_name); - name = (char *)get_property(prom_stdout, "name", NULL); + name = get_property(prom_stdout, "name", NULL); if (!name) { DBG(" stdout package has no name !\n"); goto not_found; } - spd = (u32 *)get_property(prom_stdout, "current-speed", NULL); + spd = get_property(prom_stdout, "current-speed", NULL); if (spd) speed = *spd; diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 2d94b372d49b..3ce3a2d56fa8 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -309,12 +309,11 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) int partition_potential_processors; int partition_active_processors; struct device_node *rtas_node; - int *lrdrp = NULL; + const int *lrdrp = NULL; rtas_node = find_path_device("/rtas"); if (rtas_node) - lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", - NULL); + lrdrp = get_property(rtas_node, "ibm,lrdr-capacity", NULL); if (lrdrp == NULL) { partition_potential_processors = vdso_data->processorCount; @@ -519,7 +518,8 @@ static int lparcfg_data(struct seq_file *m, void *v) const char *model = ""; const char *system_id = ""; const char *tmp; - unsigned int *lp_index_ptr, lp_index = 0; + const unsigned int *lp_index_ptr; + unsigned int lp_index = 0; seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); @@ -539,8 +539,7 @@ static int lparcfg_data(struct seq_file *m, void *v) if (firmware_has_feature(FW_FEATURE_ISERIES)) system_id += 4; } - lp_index_ptr = (unsigned int *) - get_property(rootdn, "ibm,partition-no", NULL); + lp_index_ptr = get_property(rootdn, "ibm,partition-no", NULL); if (lp_index_ptr) lp_index = *lp_index_ptr; } diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index b438d45a068c..4efdaa9d3f43 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -33,8 +33,8 @@ int default_machine_kexec_prepare(struct kimage *image) unsigned long begin, end; /* limits of segment */ unsigned long low, high; /* limits of blocked memory range */ struct device_node *node; - unsigned long *basep; - unsigned int *sizep; + const unsigned long *basep; + const unsigned int *sizep; if (!ppc_md.hpte_clear_all) return -ENOENT; @@ -74,10 +74,8 @@ int default_machine_kexec_prepare(struct kimage *image) /* We also should not overwrite the tce tables */ for (node = of_find_node_by_type(NULL, "pci"); node != NULL; node = of_find_node_by_type(node, "pci")) { - basep = (unsigned long *)get_property(node, "linux,tce-base", - NULL); - sizep = (unsigned int *)get_property(node, "linux,tce-size", - NULL); + basep = get_property(node, "linux,tce-base", NULL); + sizep = get_property(node, "linux,tce-size", NULL); if (basep == NULL || sizep == NULL) continue; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 898dae8ab6d9..3f6bd36e9e14 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -633,12 +633,12 @@ pcibios_alloc_controller(void) static void make_one_node_map(struct device_node* node, u8 pci_bus) { - int *bus_range; + const int *bus_range; int len; if (pci_bus >= pci_bus_count) return; - bus_range = (int *) get_property(node, "bus-range", &len); + bus_range = get_property(node, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, " "assuming it starts at 0\n", node->full_name); @@ -648,13 +648,13 @@ make_one_node_map(struct device_node* node, u8 pci_bus) for (node=node->child; node != 0;node = node->sibling) { struct pci_dev* dev; - unsigned int *class_code, *reg; + const unsigned int *class_code, *reg; - class_code = (unsigned int *) get_property(node, "class-code", NULL); + class_code = get_property(node, "class-code", NULL); if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) continue; - reg = (unsigned int *)get_property(node, "reg", NULL); + reg = get_property(node, "reg", NULL); if (!reg) continue; dev = pci_find_slot(pci_bus, ((reg[0] >> 8) & 0xff)); @@ -669,7 +669,7 @@ pcibios_make_OF_bus_map(void) { int i; struct pci_controller* hose; - u8* of_prop_map; + struct property *map_prop; pci_to_OF_bus_map = (u8*)kmalloc(pci_bus_count, GFP_KERNEL); if (!pci_to_OF_bus_map) { @@ -691,9 +691,12 @@ pcibios_make_OF_bus_map(void) continue; make_one_node_map(node, hose->first_busno); } - of_prop_map = get_property(find_path_device("/"), "pci-OF-bus-map", NULL); - if (of_prop_map) - memcpy(of_prop_map, pci_to_OF_bus_map, pci_bus_count); + map_prop = of_find_property(find_path_device("/"), + "pci-OF-bus-map", NULL); + if (map_prop) { + BUG_ON(pci_bus_count > map_prop->length); + memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count); + } #ifdef DEBUG printk("PCI->OF bus map:\n"); for (i=0; isibling) { - unsigned int *class_code; + const unsigned int *class_code; if (filter(node, data)) return node; @@ -722,7 +725,7 @@ scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void* * a fake root for all functions of a multi-function device, * we go down them as well. */ - class_code = (unsigned int *) get_property(node, "class-code", NULL); + class_code = get_property(node, "class-code", NULL); if ((!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) && strcmp(node->name, "multifunc-device")) @@ -737,10 +740,10 @@ scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void* static int scan_OF_pci_childs_iterator(struct device_node* node, void* data) { - unsigned int *reg; + const unsigned int *reg; u8* fdata = (u8*)data; - reg = (unsigned int *) get_property(node, "reg", NULL); + reg = get_property(node, "reg", NULL); if (reg && ((reg[0] >> 8) & 0xff) == fdata[1] && ((reg[0] >> 16) & 0xff) == fdata[0]) return 1; @@ -841,7 +844,7 @@ find_OF_pci_device_filter(struct device_node* node, void* data) int pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn) { - unsigned int *reg; + const unsigned int *reg; struct pci_controller* hose; struct pci_dev* dev = NULL; @@ -854,7 +857,7 @@ pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn) if (!scan_OF_pci_childs(((struct device_node*)hose->arch_data)->child, find_OF_pci_device_filter, (void *)node)) return -ENODEV; - reg = (unsigned int *) get_property(node, "reg", NULL); + reg = get_property(node, "reg", NULL); if (!reg) return -ENODEV; *bus = (reg[0] >> 16) & 0xff; @@ -885,8 +888,8 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int primary) { static unsigned int static_lc_ranges[256] __initdata; - unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; - unsigned int size; + const unsigned int *dt_ranges; + unsigned int *lc_ranges, *ranges, *prev, size; int rlen = 0, orig_rlen; int memno = 0; struct resource *res; @@ -897,7 +900,7 @@ pci_process_bridge_OF_ranges(struct pci_controller *hose, * that can have more than 3 ranges, fortunately using contiguous * addresses -- BenH */ - dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + dt_ranges = get_property(dev, "ranges", &rlen); if (!dt_ranges) return; /* Sanity check, though hopefully that never happens */ diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 1d85fcba51e4..e795a7e2a38e 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -246,10 +246,10 @@ static void __init pcibios_claim_of_setup(void) #ifdef CONFIG_PPC_MULTIPLATFORM static u32 get_int_prop(struct device_node *np, const char *name, u32 def) { - u32 *prop; + const u32 *prop; int len; - prop = (u32 *) get_property(np, name, &len); + prop = get_property(np, name, &len); if (prop && len >= 4) return *prop; return def; @@ -278,10 +278,11 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) u64 base, size; unsigned int flags; struct resource *res; - u32 *addrs, i; + const u32 *addrs; + u32 i; int proplen; - addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); + addrs = get_property(node, "assigned-addresses", &proplen); if (!addrs) return; DBG(" parse addresses (%d bytes) @ %p\n", proplen, addrs); @@ -381,7 +382,7 @@ void __devinit of_scan_bus(struct device_node *node, struct pci_bus *bus) { struct device_node *child = NULL; - u32 *reg; + const u32 *reg; int reglen, devfn; struct pci_dev *dev; @@ -389,7 +390,7 @@ void __devinit of_scan_bus(struct device_node *node, while ((child = of_get_next_child(node, child)) != NULL) { DBG(" * %s\n", child->full_name); - reg = (u32 *) get_property(child, "reg", ®len); + reg = get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; devfn = (reg[0] >> 8) & 0xff; @@ -413,7 +414,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node, struct pci_dev *dev) { struct pci_bus *bus; - u32 *busrange, *ranges; + const u32 *busrange, *ranges; int len, i, mode; struct resource *res; unsigned int flags; @@ -422,13 +423,13 @@ void __devinit of_scan_pci_bridge(struct device_node *node, DBG("of_scan_pci_bridge(%s)\n", node->full_name); /* parse bus-range property */ - busrange = (u32 *) get_property(node, "bus-range", &len); + busrange = get_property(node, "bus-range", &len); if (busrange == NULL || len != 8) { printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", node->full_name); return; } - ranges = (u32 *) get_property(node, "ranges", &len); + ranges = get_property(node, "ranges", &len); if (ranges == NULL) { printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", node->full_name); @@ -892,13 +893,13 @@ static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, unsigned int size; }; - struct isa_range *range; + const struct isa_range *range; unsigned long pci_addr; unsigned int isa_addr; unsigned int size; int rlen = 0; - range = (struct isa_range *) get_property(isa_node, "ranges", &rlen); + range = get_property(isa_node, "ranges", &rlen); if (range == NULL || (rlen < sizeof(struct isa_range))) { printk(KERN_ERR "no ISA ranges or unexpected isa range size," "mapping 64k\n"); @@ -939,7 +940,8 @@ static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int prim) { - unsigned int *ranges, pci_space; + const unsigned int *ranges; + unsigned int pci_space; unsigned long size; int rlen = 0; int memno = 0; @@ -957,7 +959,7 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, * (size depending on dev->n_addr_cells) * cells 4+5 or 5+6: the size of the range */ - ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + ranges = get_property(dev, "ranges", &rlen); if (ranges == NULL) return; hose->io_base_phys = 0; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 1c18953514c3..68df018dae0e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -40,8 +40,8 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) { struct pci_controller *phb = data; - int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); - u32 *regs; + const int *type = get_property(dn, "ibm,pci-config-space-type", NULL); + const u32 *regs; struct pci_dn *pdn; if (mem_init_done) @@ -54,14 +54,14 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) dn->data = pdn; pdn->node = dn; pdn->phb = phb; - regs = (u32 *)get_property(dn, "reg", NULL); + regs = get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ pdn->busno = (regs[0] >> 16) & 0xff; pdn->devfn = (regs[0] >> 8) & 0xff; } if (firmware_has_feature(FW_FEATURE_ISERIES)) { - u32 *busp = (u32 *)get_property(dn, "linux,subbus", NULL); + const u32 *busp = get_property(dn, "linux,subbus", NULL); if (busp) pdn->bussubno = *busp; } @@ -96,10 +96,11 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre, /* We started with a phb, iterate all childs */ for (dn = start->child; dn; dn = nextdn) { - u32 *classp, class; + const u32 *classp; + u32 class; nextdn = NULL; - classp = (u32 *)get_property(dn, "class-code", NULL); + classp = get_property(dn, "class-code", NULL); class = classp ? *classp : 0; if (pre && ((ret = pre(dn, data)) != NULL)) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a1787ffb6319..2a3d84a39cb5 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -942,11 +942,11 @@ void __init early_init_devtree(void *params) int prom_n_addr_cells(struct device_node* np) { - int* ip; + const int *ip; do { if (np->parent) np = np->parent; - ip = (int *) get_property(np, "#address-cells", NULL); + ip = get_property(np, "#address-cells", NULL); if (ip != NULL) return *ip; } while (np->parent); @@ -958,11 +958,11 @@ EXPORT_SYMBOL(prom_n_addr_cells); int prom_n_size_cells(struct device_node* np) { - int* ip; + const int* ip; do { if (np->parent) np = np->parent; - ip = (int *) get_property(np, "#size-cells", NULL); + ip = get_property(np, "#size-cells", NULL); if (ip != NULL) return *ip; } while (np->parent); @@ -1034,7 +1034,7 @@ int device_is_compatible(struct device_node *device, const char *compat) const char* cp; int cplen, l; - cp = (char *) get_property(device, "compatible", &cplen); + cp = get_property(device, "compatible", &cplen); if (cp == NULL) return 0; while (cplen > 0) { @@ -1449,7 +1449,7 @@ static int of_finish_dynamic_node(struct device_node *node) { struct device_node *parent = of_get_parent(node); int err = 0; - phandle *ibm_phandle; + const phandle *ibm_phandle; node->name = get_property(node, "name", NULL); node->type = get_property(node, "device_type", NULL); @@ -1466,8 +1466,7 @@ static int of_finish_dynamic_node(struct device_node *node) return -ENODEV; /* fix up new node's linux_phandle field */ - if ((ibm_phandle = (unsigned int *)get_property(node, - "ibm,phandle", NULL))) + if ((ibm_phandle = get_property(node, "ibm,phandle", NULL))) node->linux_phandle = *ibm_phandle; out: @@ -1658,16 +1657,16 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread) hardid = get_hard_smp_processor_id(cpu); for_each_node_by_type(np, "cpu") { - u32 *intserv; + const u32 *intserv; unsigned int plen, t; /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist * fallback to "reg" property and assume no threads */ - intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", - &plen); + intserv = get_property(np, "ibm,ppc-interrupt-server#s", + &plen); if (intserv == NULL) { - u32 *reg = (u32 *)get_property(np, "reg", NULL); + const u32 *reg = get_property(np, "reg", NULL); if (reg == NULL) continue; if (*reg == hardid) { diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index e9960170667b..cdcd5d665468 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -270,7 +270,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, struct of_bus *pbus, u32 *addr, int na, int ns, int pna) { - u32 *ranges; + const u32 *ranges; unsigned int rlen; int rone; u64 offset = OF_BAD_ADDR; @@ -287,7 +287,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * to translate addresses that aren't supposed to be translated in * the first place. --BenH. */ - ranges = (u32 *)get_property(parent, "ranges", &rlen); + ranges = get_property(parent, "ranges", &rlen); if (ranges == NULL || rlen == 0) { offset = of_read_number(addr, na); memset(addr, 0, pna * 4); @@ -330,7 +330,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * that can be mapped to a cpu physical address). This is not really specified * that way, but this is traditionally the way IBM at least do things */ -u64 of_translate_address(struct device_node *dev, u32 *in_addr) +u64 of_translate_address(struct device_node *dev, const u32 *in_addr) { struct device_node *parent = NULL; struct of_bus *bus, *pbus; @@ -407,10 +407,10 @@ u64 of_translate_address(struct device_node *dev, u32 *in_addr) } EXPORT_SYMBOL(of_translate_address); -u32 *of_get_address(struct device_node *dev, int index, u64 *size, +const u32 *of_get_address(struct device_node *dev, int index, u64 *size, unsigned int *flags) { - u32 *prop; + const u32 *prop; unsigned int psize; struct device_node *parent; struct of_bus *bus; @@ -427,7 +427,7 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size, return NULL; /* Get "reg" or "assigned-addresses" property */ - prop = (u32 *)get_property(dev, bus->addresses, &psize); + prop = get_property(dev, bus->addresses, &psize); if (prop == NULL) return NULL; psize /= 4; @@ -445,10 +445,10 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size, } EXPORT_SYMBOL(of_get_address); -u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, +const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, unsigned int *flags) { - u32 *prop; + const u32 *prop; unsigned int psize; struct device_node *parent; struct of_bus *bus; @@ -469,7 +469,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, return NULL; /* Get "reg" or "assigned-addresses" property */ - prop = (u32 *)get_property(dev, bus->addresses, &psize); + prop = get_property(dev, bus->addresses, &psize); if (prop == NULL) return NULL; psize /= 4; @@ -487,7 +487,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, } EXPORT_SYMBOL(of_get_pci_address); -static int __of_address_to_resource(struct device_node *dev, u32 *addrp, +static int __of_address_to_resource(struct device_node *dev, const u32 *addrp, u64 size, unsigned int flags, struct resource *r) { @@ -518,7 +518,7 @@ static int __of_address_to_resource(struct device_node *dev, u32 *addrp, int of_address_to_resource(struct device_node *dev, int index, struct resource *r) { - u32 *addrp; + const u32 *addrp; u64 size; unsigned int flags; @@ -532,7 +532,7 @@ EXPORT_SYMBOL_GPL(of_address_to_resource); int of_pci_address_to_resource(struct device_node *dev, int bar, struct resource *r) { - u32 *addrp; + const u32 *addrp; u64 size; unsigned int flags; @@ -543,13 +543,14 @@ int of_pci_address_to_resource(struct device_node *dev, int bar, } EXPORT_SYMBOL_GPL(of_pci_address_to_resource); -void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop, +void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, unsigned long *busno, unsigned long *phys, unsigned long *size) { - u32 *dma_window, cells; - unsigned char *prop; + const u32 *dma_window; + u32 cells; + const unsigned char *prop; - dma_window = (u32 *)dma_window_prop; + dma_window = dma_window_prop; /* busno is always one cell */ *busno = *(dma_window++); @@ -578,13 +579,13 @@ static struct device_node *of_irq_dflt_pic; static struct device_node *of_irq_find_parent(struct device_node *child) { struct device_node *p; - phandle *parp; + const phandle *parp; if (!of_node_get(child)) return NULL; do { - parp = (phandle *)get_property(child, "interrupt-parent", NULL); + parp = get_property(child, "interrupt-parent", NULL); if (parp == NULL) p = of_get_parent(child); else { @@ -646,11 +647,11 @@ void of_irq_map_init(unsigned int flags) } -int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, - struct of_irq *out_irq) +int of_irq_map_raw(struct device_node *parent, const u32 *intspec, + const u32 *addr, struct of_irq *out_irq) { struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL; - u32 *tmp, *imap, *imask; + const u32 *tmp, *imap, *imask; u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0; int imaplen, match, i; @@ -661,7 +662,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, * is none, we are nice and just walk up the tree */ do { - tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL); + tmp = get_property(ipar, "#interrupt-cells", NULL); if (tmp != NULL) { intsize = *tmp; break; @@ -682,7 +683,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, */ old = of_node_get(ipar); do { - tmp = (u32 *)get_property(old, "#address-cells", NULL); + tmp = get_property(old, "#address-cells", NULL); tnode = of_get_parent(old); of_node_put(old); old = tnode; @@ -709,7 +710,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, } /* Now look for an interrupt-map */ - imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen); + imap = get_property(ipar, "interrupt-map", &imaplen); /* No interrupt map, check for an interrupt parent */ if (imap == NULL) { DBG(" -> no map, getting parent\n"); @@ -719,7 +720,7 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, imaplen /= sizeof(u32); /* Look for a mask */ - imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL); + imask = get_property(ipar, "interrupt-map-mask", NULL); /* If we were passed no "reg" property and we attempt to parse * an interrupt-map, then #address-cells must be 0. @@ -766,14 +767,14 @@ int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, /* Get #interrupt-cells and #address-cells of new * parent */ - tmp = (u32 *)get_property(newpar, "#interrupt-cells", + tmp = get_property(newpar, "#interrupt-cells", NULL); if (tmp == NULL) { DBG(" -> parent lacks #interrupt-cells !\n"); goto fail; } newintsize = *tmp; - tmp = (u32 *)get_property(newpar, "#address-cells", + tmp = get_property(newpar, "#address-cells", NULL); newaddrsize = (tmp == NULL) ? 0 : *tmp; @@ -819,14 +820,14 @@ EXPORT_SYMBOL_GPL(of_irq_map_raw); static int of_irq_map_oldworld(struct device_node *device, int index, struct of_irq *out_irq) { - u32 *ints; + const u32 *ints; int intlen; /* * Old machines just have a list of interrupt numbers * and no interrupt-controller nodes. */ - ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen); + ints = get_property(device, "AAPL,interrupts", &intlen); if (ints == NULL) return -EINVAL; intlen /= sizeof(u32); @@ -851,7 +852,8 @@ static int of_irq_map_oldworld(struct device_node *device, int index, int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq) { struct device_node *p; - u32 *intspec, *tmp, intsize, intlen, *addr; + const u32 *intspec, *tmp, *addr; + u32 intsize, intlen; int res; DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index); @@ -861,13 +863,13 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq return of_irq_map_oldworld(device, index, out_irq); /* Get the interrupts property */ - intspec = (u32 *)get_property(device, "interrupts", &intlen); + intspec = get_property(device, "interrupts", &intlen); if (intspec == NULL) return -EINVAL; intlen /= sizeof(u32); /* Get the reg property (if any) */ - addr = (u32 *)get_property(device, "reg", NULL); + addr = get_property(device, "reg", NULL); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -875,7 +877,7 @@ int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq return -EINVAL; /* Get size of interrupt specifier */ - tmp = (u32 *)get_property(p, "#interrupt-cells", NULL); + tmp = get_property(p, "#interrupt-cells", NULL); if (tmp == NULL) { of_node_put(p); return -EINVAL; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 9c9ad1fa9cce..2fe82abf1c52 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -246,12 +246,12 @@ struct file_operations ppc_rtas_rmo_buf_ops = { static int ppc_rtas_find_all_sensors(void); static void ppc_rtas_process_sensor(struct seq_file *m, - struct individual_sensor *s, int state, int error, char *loc); + struct individual_sensor *s, int state, int error, const char *loc); static char *ppc_rtas_process_error(int error); static void get_location_code(struct seq_file *m, - struct individual_sensor *s, char *loc); -static void check_location_string(struct seq_file *m, char *c); -static void check_location(struct seq_file *m, char *c); + struct individual_sensor *s, const char *loc); +static void check_location_string(struct seq_file *m, const char *c); +static void check_location(struct seq_file *m, const char *c); static int __init proc_rtas_init(void) { @@ -446,11 +446,11 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) for (i=0; itoken); - loc = (char *) get_property(rtas_node, rstr, &llen); + loc = get_property(rtas_node, rstr, &llen); /* A sensor may have multiple instances */ for (j = 0, offs = 0; j <= p->quant; j++) { @@ -474,10 +474,10 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) static int ppc_rtas_find_all_sensors(void) { - unsigned int *utmp; + const unsigned int *utmp; int len, i; - utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len); + utmp = get_property(rtas_node, "rtas-sensors", &len); if (utmp == NULL) { printk (KERN_ERR "error: could not get rtas-sensors\n"); return 1; @@ -530,7 +530,7 @@ static char *ppc_rtas_process_error(int error) */ static void ppc_rtas_process_sensor(struct seq_file *m, - struct individual_sensor *s, int state, int error, char *loc) + struct individual_sensor *s, int state, int error, const char *loc) { /* Defined return vales */ const char * key_switch[] = { "Off\t", "Normal\t", "Secure\t", @@ -682,7 +682,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m, /* ****************************************************************** */ -static void check_location(struct seq_file *m, char *c) +static void check_location(struct seq_file *m, const char *c) { switch (c[0]) { case LOC_PLANAR: @@ -719,7 +719,7 @@ static void check_location(struct seq_file *m, char *c) * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] * the '.' may be an abbrevation */ -static void check_location_string(struct seq_file *m, char *c) +static void check_location_string(struct seq_file *m, const char *c) { while (*c) { if (isalpha(*c) || *c == '.') @@ -733,7 +733,8 @@ static void check_location_string(struct seq_file *m, char *c) /* ****************************************************************** */ -static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc) +static void get_location_code(struct seq_file *m, struct individual_sensor *s, + const char *loc) { if (!loc || !*loc) { seq_printf(m, "---");/* does not have a location */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4a4cb5598402..10e10be324c9 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -177,10 +177,12 @@ void __init udbg_init_rtas_console(void) void rtas_progress(char *s, unsigned short hex) { struct device_node *root; - int width, *p; + int width; + const int *p; char *os; static int display_character, set_indicator; - static int display_width, display_lines, *row_width, form_feed; + static int display_width, display_lines, form_feed; + const static int *row_width; static DEFINE_SPINLOCK(progress_lock); static int current_line; static int pending_newline = 0; /* did last write end with unprinted newline? */ @@ -191,16 +193,16 @@ void rtas_progress(char *s, unsigned short hex) if (display_width == 0) { display_width = 0x10; if ((root = find_path_device("/rtas"))) { - if ((p = (unsigned int *)get_property(root, + if ((p = get_property(root, "ibm,display-line-length", NULL))) display_width = *p; - if ((p = (unsigned int *)get_property(root, + if ((p = get_property(root, "ibm,form-feed", NULL))) form_feed = *p; - if ((p = (unsigned int *)get_property(root, + if ((p = get_property(root, "ibm,display-number-of-lines", NULL))) display_lines = *p; - row_width = (unsigned int *)get_property(root, + row_width = get_property(root, "ibm,display-truncation-length", NULL); } display_character = rtas_token("display-character"); @@ -293,10 +295,10 @@ EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { - int *tokp; + const int *tokp; if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; - tokp = (int *) get_property(rtas.dev, service, NULL); + tokp = get_property(rtas.dev, service, NULL); return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; } EXPORT_SYMBOL(rtas_token); @@ -824,15 +826,15 @@ void __init rtas_initialize(void) */ rtas.dev = of_find_node_by_name(NULL, "rtas"); if (rtas.dev) { - u32 *basep, *entryp; - u32 *sizep; + const u32 *basep, *entryp, *sizep; - basep = (u32 *)get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = (u32 *)get_property(rtas.dev, "rtas-size", NULL); + basep = get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = get_property(rtas.dev, "rtas-size", NULL); if (basep != NULL && sizep != NULL) { rtas.base = *basep; rtas.size = *sizep; - entryp = (u32 *)get_property(rtas.dev, "linux,rtas-entry", NULL); + entryp = get_property(rtas.dev, + "linux,rtas-entry", NULL); if (entryp == NULL) /* Ugh */ rtas.entry = rtas.base; else diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index cda022657324..5a798ac6aecf 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -57,7 +57,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where) static int of_device_available(struct device_node * dn) { - char * status; + const char *status; status = get_property(dn, "status", NULL); @@ -178,7 +178,7 @@ struct pci_ops rtas_pci_ops = { int is_python(struct device_node *dev) { - char *model = (char *)get_property(dev, "model", NULL); + const char *model = get_property(dev, "model", NULL); if (model && strstr(model, "Python")) return 1; @@ -234,7 +234,7 @@ void __init init_pci_config_tokens (void) unsigned long __devinit get_phb_buid (struct device_node *phb) { int addr_cells; - unsigned int *buid_vals; + const unsigned int *buid_vals; unsigned int len; unsigned long buid; @@ -247,7 +247,7 @@ unsigned long __devinit get_phb_buid (struct device_node *phb) if (phb->parent->parent) return 0; - buid_vals = (unsigned int *) get_property(phb, "reg", &len); + buid_vals = get_property(phb, "reg", &len); if (buid_vals == NULL) return 0; @@ -264,10 +264,10 @@ unsigned long __devinit get_phb_buid (struct device_node *phb) static int phb_set_bus_ranges(struct device_node *dev, struct pci_controller *phb) { - int *bus_range; + const int *bus_range; unsigned int len; - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { return 1; } @@ -325,15 +325,15 @@ unsigned long __init find_and_init_phbs(void) * in chosen. */ if (of_chosen) { - int *prop; + const int *prop; - prop = (int *)get_property(of_chosen, "linux,pci-probe-only", - NULL); + prop = get_property(of_chosen, + "linux,pci-probe-only", NULL); if (prop) pci_probe_only = *prop; - prop = (int *)get_property(of_chosen, - "linux,pci-assign-all-buses", NULL); + prop = get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); if (prop) pci_assign_all_buses = *prop; } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index c6d7b98af7d5..aaf1727b3570 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -304,16 +304,15 @@ struct seq_operations cpuinfo_op = { void __init check_for_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - unsigned long *prop; + const unsigned long *prop; DBG(" -> check_for_initrd()\n"); if (of_chosen) { - prop = (unsigned long *)get_property(of_chosen, - "linux,initrd-start", NULL); + prop = get_property(of_chosen, "linux,initrd-start", NULL); if (prop != NULL) { initrd_start = (unsigned long)__va(*prop); - prop = (unsigned long *)get_property(of_chosen, + prop = get_property(of_chosen, "linux,initrd-end", NULL); if (prop != NULL) { initrd_end = (unsigned long)__va(*prop); @@ -366,15 +365,14 @@ void __init smp_setup_cpu_maps(void) int cpu = 0; while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { - int *intserv; + const int *intserv; int j, len = sizeof(u32), nthreads = 1; - intserv = (int *)get_property(dn, "ibm,ppc-interrupt-server#s", - &len); + intserv = get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) nthreads = len / sizeof(int); else { - intserv = (int *) get_property(dn, "reg", NULL); + intserv = get_property(dn, "reg", NULL); if (!intserv) intserv = &cpu; /* assume logical == phys */ } @@ -395,13 +393,12 @@ void __init smp_setup_cpu_maps(void) if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; - unsigned int *ireg; + const unsigned int *ireg; num_addr_cell = prom_n_addr_cells(dn); num_size_cell = prom_n_size_cells(dn); - ireg = (unsigned int *) - get_property(dn, "ibm,lrdr-capacity", NULL); + ireg = get_property(dn, "ibm,lrdr-capacity", NULL); if (!ireg) goto out; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e2447aef3a8f..77efe19ccd2c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -106,7 +106,7 @@ static int smt_enabled_cmdline; static void check_smt_enabled(void) { struct device_node *dn; - char *smt_option; + const char *smt_option; /* Allow the command line to overrule the OF option */ if (smt_enabled_cmdline) @@ -115,7 +115,7 @@ static void check_smt_enabled(void) dn = of_find_node_by_path("/options"); if (dn) { - smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); + smt_option = get_property(dn, "ibm,smt-enabled", NULL); if (smt_option) { if (!strcmp(smt_option, "on")) @@ -292,7 +292,7 @@ static void __init initialize_cache_info(void) */ if ( num_cpus == 1 ) { - u32 *sizep, *lsizep; + const u32 *sizep, *lsizep; u32 size, lsize; const char *dc, *ic; @@ -307,10 +307,10 @@ static void __init initialize_cache_info(void) size = 0; lsize = cur_cpu_spec->dcache_bsize; - sizep = (u32 *)get_property(np, "d-cache-size", NULL); + sizep = get_property(np, "d-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = (u32 *) get_property(np, dc, NULL); + lsizep = get_property(np, dc, NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) @@ -324,10 +324,10 @@ static void __init initialize_cache_info(void) size = 0; lsize = cur_cpu_spec->icache_bsize; - sizep = (u32 *)get_property(np, "i-cache-size", NULL); + sizep = get_property(np, "i-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = (u32 *)get_property(np, ic, NULL); + lsizep = get_property(np, ic, NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 010435095550..1d724aef438a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -60,7 +60,7 @@ static int smt_snooze_cmdline; static int __init smt_setup(void) { struct device_node *options; - unsigned int *val; + const unsigned int *val; unsigned int cpu; if (!cpu_has_feature(CPU_FTR_SMT)) @@ -70,8 +70,7 @@ static int __init smt_setup(void) if (!options) return -ENODEV; - val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", - NULL); + val = get_property(options, "ibm,smt-snooze-delay", NULL); if (!smt_snooze_cmdline && val) { for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = *val; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 774c0a3c5019..8d4ccf061a4d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -859,14 +859,14 @@ EXPORT_SYMBOL(do_settimeofday); static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; - unsigned int *fp; + const unsigned int *fp; int found = 0; /* The cpu node should have timebase and clock frequency properties */ cpu = of_find_node_by_type(NULL, "cpu"); if (cpu) { - fp = (unsigned int *)get_property(cpu, name, NULL); + fp = get_property(cpu, name, NULL); if (fp) { found = 1; *val = 0; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index fad8580f9081..cb87e71eec66 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -77,7 +77,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) } else #endif { - unsigned char *dma_window; + const unsigned char *dma_window; struct iommu_table *tbl; unsigned long offset, size; @@ -217,7 +217,7 @@ static void __devinit vio_dev_release(struct device *dev) struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) { struct vio_dev *viodev; - unsigned int *unit_address; + const unsigned int *unit_address; /* we need the 'device_type' property, in order to match with drivers */ if (of_node->type == NULL) { @@ -227,7 +227,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) return NULL; } - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + unit_address = get_property(of_node, "reg", NULL); if (unit_address == NULL) { printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, @@ -249,7 +249,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) viodev->type = of_node->type; viodev->unit_address = *unit_address; if (firmware_has_feature(FW_FEATURE_ISERIES)) { - unit_address = (unsigned int *)get_property(of_node, + unit_address = get_property(of_node, "linux,unit_address", NULL); if (unit_address != NULL) viodev->unit_address = *unit_address; @@ -423,7 +423,7 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp, { const struct vio_dev *vio_dev = to_vio_dev(dev); struct device_node *dn = dev->platform_data; - char *cp; + const char *cp; int length; if (!num_envp) @@ -431,7 +431,7 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp, if (!dn) return -ENODEV; - cp = (char *)get_property(dn, "compatible", &length); + cp = get_property(dn, "compatible", &length); if (!cp) return -ENODEV; @@ -493,11 +493,11 @@ static struct vio_dev *vio_find_name(const char *kobj_name) */ struct vio_dev *vio_find_node(struct device_node *vnode) { - uint32_t *unit_address; + const uint32_t *unit_address; char kobj_name[BUS_ID_SIZE]; /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + unit_address = get_property(vnode, "reg", NULL); if (!unit_address) return NULL; snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index fbe23933f731..6c0f1c7d83e5 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -159,12 +159,12 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) { unsigned int hw_cpuid = get_hard_smp_processor_id(cpu); struct device_node *cpu_node = NULL; - unsigned int *interrupt_server, *reg; + const unsigned int *interrupt_server, *reg; int len; while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { /* Try interrupt server first */ - interrupt_server = (unsigned int *)get_property(cpu_node, + interrupt_server = get_property(cpu_node, "ibm,ppc-interrupt-server#s", &len); len = len / sizeof(u32); @@ -175,8 +175,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) return cpu_node; } } else { - reg = (unsigned int *)get_property(cpu_node, - "reg", &len); + reg = get_property(cpu_node, "reg", &len); if (reg && (len > 0) && (reg[0] == hw_cpuid)) return cpu_node; } @@ -186,9 +185,9 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) } /* must hold reference to node during call */ -static int *of_get_associativity(struct device_node *dev) +static const int *of_get_associativity(struct device_node *dev) { - return (unsigned int *)get_property(dev, "ibm,associativity", NULL); + return get_property(dev, "ibm,associativity", NULL); } /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa @@ -197,7 +196,7 @@ static int *of_get_associativity(struct device_node *dev) static int of_node_to_nid_single(struct device_node *device) { int nid = -1; - unsigned int *tmp; + const unsigned int *tmp; if (min_common_depth == -1) goto out; @@ -255,7 +254,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); static int __init find_min_common_depth(void) { int depth; - unsigned int *ref_points; + const unsigned int *ref_points; struct device_node *rtas_root; unsigned int len; @@ -270,7 +269,7 @@ static int __init find_min_common_depth(void) * configuration (should be all 0's) and the second is for a normal * NUMA configuration. */ - ref_points = (unsigned int *)get_property(rtas_root, + ref_points = get_property(rtas_root, "ibm,associativity-reference-points", &len); if ((len >= 1) && ref_points) { @@ -297,7 +296,7 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) of_node_put(memory); } -static unsigned long __devinit read_n_cells(int n, unsigned int **buf) +static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) { unsigned long result = 0; @@ -435,15 +434,13 @@ static int __init parse_numa_properties(void) unsigned long size; int nid; int ranges; - unsigned int *memcell_buf; + const unsigned int *memcell_buf; unsigned int len; - memcell_buf = (unsigned int *)get_property(memory, + memcell_buf = get_property(memory, "linux,usable-memory", &len); if (!memcell_buf || len <= 0) - memcell_buf = - (unsigned int *)get_property(memory, "reg", - &len); + memcell_buf = get_property(memory, "reg", &len); if (!memcell_buf || len <= 0) continue; @@ -787,10 +784,10 @@ int hot_add_scn_to_nid(unsigned long scn_addr) while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; int ranges; - unsigned int *memcell_buf; + const unsigned int *memcell_buf; unsigned int len; - memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + memcell_buf = get_property(memory, "reg", &len); if (!memcell_buf || len <= 0) continue; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index e983972132d8..07c47e8309ed 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -41,7 +41,7 @@ phys_addr_t get_immrbase(void) soc = of_find_node_by_type(NULL, "soc"); if (soc) { unsigned int size; - void *prop = get_property(soc, "reg", &size); + const void *prop = get_property(soc, "reg", &size); immrbase = of_translate_address(soc, prop); of_node_put(soc); }; @@ -86,8 +86,8 @@ static int __init gfar_mdio_of_init(void) while ((child = of_get_next_child(np, child)) != NULL) { if (child->n_intrs) { - u32 *id = - (u32 *) get_property(child, "reg", NULL); + const u32 *id = + get_property(child, "reg", NULL); mdio_data.irq[*id] = child->intrs[0].line; } } @@ -127,10 +127,10 @@ static int __init gfar_of_init(void) struct resource r[4]; struct device_node *phy, *mdio; struct gianfar_platform_data gfar_data; - unsigned int *id; - char *model; - void *mac_addr; - phandle *ph; + const unsigned int *id; + const char *model; + const void *mac_addr; + const phandle *ph; memset(r, 0, sizeof(r)); memset(&gfar_data, 0, sizeof(gfar_data)); @@ -188,7 +188,7 @@ static int __init gfar_of_init(void) FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH; - ph = (phandle *) get_property(np, "phy-handle", NULL); + ph = get_property(np, "phy-handle", NULL); phy = of_find_node_by_phandle(*ph); if (phy == NULL) { @@ -198,7 +198,7 @@ static int __init gfar_of_init(void) mdio = of_get_parent(phy); - id = (u32 *) get_property(phy, "reg", NULL); + id = get_property(phy, "reg", NULL); ret = of_address_to_resource(mdio, 0, &res); if (ret) { of_node_put(phy); @@ -242,7 +242,7 @@ static int __init fsl_i2c_of_init(void) i++) { struct resource r[2]; struct fsl_i2c_platform_data i2c_data; - unsigned char *flags = NULL; + const unsigned char *flags = NULL; memset(&r, 0, sizeof(r)); memset(&i2c_data, 0, sizeof(i2c_data)); @@ -294,7 +294,7 @@ static int __init mpc83xx_wdt_init(void) struct resource r; struct device_node *soc, *np; struct platform_device *dev; - unsigned int *freq; + const unsigned int *freq; int ret; np = of_find_compatible_node(NULL, "watchdog", "mpc83xx_wdt"); @@ -311,7 +311,7 @@ static int __init mpc83xx_wdt_init(void) goto nosoc; } - freq = (unsigned int *)get_property(soc, "bus-frequency", NULL); + freq = get_property(soc, "bus-frequency", NULL); if (!freq) { ret = -ENODEV; goto err; @@ -351,7 +351,7 @@ nodev: arch_initcall(mpc83xx_wdt_init); #endif -static enum fsl_usb2_phy_modes determine_usb_phy(char * phy_type) +static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type) { if (!phy_type) return FSL_USB2_PHY_NONE; @@ -379,7 +379,7 @@ static int __init fsl_usb_of_init(void) i++) { struct resource r[2]; struct fsl_usb2_platform_data usb_data; - unsigned char *prop = NULL; + const unsigned char *prop = NULL; memset(&r, 0, sizeof(r)); memset(&usb_data, 0, sizeof(usb_data)); @@ -428,7 +428,7 @@ static int __init fsl_usb_of_init(void) i++) { struct resource r[2]; struct fsl_usb2_platform_data usb_data; - unsigned char *prop = NULL; + const unsigned char *prop = NULL; memset(&r, 0, sizeof(r)); memset(&usb_data, 0, sizeof(usb_data)); diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c index 615350d46b52..ff23f5a4d4b9 100644 --- a/arch/powerpc/sysdev/mmio_nvram.c +++ b/arch/powerpc/sysdev/mmio_nvram.c @@ -80,7 +80,7 @@ static ssize_t mmio_nvram_get_size(void) int __init mmio_nvram_init(void) { struct device_node *nvram_node; - unsigned long *buffer; + const unsigned long *buffer; int proplen; unsigned long nvram_addr; int ret; @@ -91,7 +91,7 @@ int __init mmio_nvram_init(void) goto out; ret = -EIO; - buffer = (unsigned long *)get_property(nvram_node, "reg", &proplen); + buffer = get_property(nvram_node, "reg", &proplen); if (proplen != 2*sizeof(unsigned long)) goto out; diff --git a/include/asm-powerpc/ibmebus.h b/include/asm-powerpc/ibmebus.h index 7a42723d107c..7ab195a27888 100644 --- a/include/asm-powerpc/ibmebus.h +++ b/include/asm-powerpc/ibmebus.h @@ -48,7 +48,7 @@ extern struct dma_mapping_ops ibmebus_dma_ops; extern struct bus_type ibmebus_bus_type; struct ibmebus_dev { - char *name; + const char *name; struct of_device ofdev; }; diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index 56f6ea0c76de..abdf1be66e97 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -72,8 +72,8 @@ struct property { }; struct device_node { - char *name; - char *type; + const char *name; + const char *type; phandle node; phandle linux_phandle; char *full_name; @@ -209,15 +209,15 @@ static inline u64 of_read_number(const u32 *cell, int size) /* Translate an OF address block into a CPU physical address */ #define OF_BAD_ADDR ((u64)-1) -extern u64 of_translate_address(struct device_node *np, u32 *addr); +extern u64 of_translate_address(struct device_node *np, const u32 *addr); /* Extract an address from a device, returns the region size and * the address space flags too. The PCI version uses a BAR number * instead of an absolute index */ -extern u32 *of_get_address(struct device_node *dev, int index, +extern const u32 *of_get_address(struct device_node *dev, int index, u64 *size, unsigned int *flags); -extern u32 *of_get_pci_address(struct device_node *dev, int bar_no, +extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, unsigned int *flags); /* Get an address as a resource. Note that if your address is @@ -234,7 +234,7 @@ extern int of_pci_address_to_resource(struct device_node *dev, int bar, /* Parse the ibm,dma-window property of an OF node into the busno, phys and * size parameters. */ -void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop, +void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, unsigned long *busno, unsigned long *phys, unsigned long *size); extern void kdump_move_device_tree(void); @@ -288,8 +288,8 @@ extern void of_irq_map_init(unsigned int flags); * */ -extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr, - struct of_irq *out_irq); +extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec, + const u32 *addr, struct of_irq *out_irq); /*** diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h index dc9bd101ca14..4b51d42e1419 100644 --- a/include/asm-powerpc/vio.h +++ b/include/asm-powerpc/vio.h @@ -46,8 +46,8 @@ struct iommu_table; */ struct vio_dev { struct iommu_table *iommu_table; /* vio_map_* uses this */ - char *name; - char *type; + const char *name; + const char *type; uint32_t unit_address; unsigned int irq; struct device dev; From 954a46e2d5aec6f59976ddeb1d232b486e59b54a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:39:43 +1000 Subject: [PATCH 0045/1063] [POWERPC] pseries: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. pseries platform changes. Built for pseries_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh.c | 12 ++++---- arch/powerpc/platforms/pseries/eeh_driver.c | 4 +-- arch/powerpc/platforms/pseries/eeh_event.c | 4 +-- arch/powerpc/platforms/pseries/firmware.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 13 ++++----- arch/powerpc/platforms/pseries/lpar.c | 10 +++---- arch/powerpc/platforms/pseries/nvram.c | 5 ++-- arch/powerpc/platforms/pseries/pci.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 4 +-- arch/powerpc/platforms/pseries/rtasd.c | 4 +-- arch/powerpc/platforms/pseries/setup.c | 12 ++++---- arch/powerpc/platforms/pseries/smp.c | 8 +++--- arch/powerpc/platforms/pseries/xics.c | 22 +++++++------- drivers/char/hvc_vio.c | 4 +-- drivers/char/hvsi.c | 7 ++--- drivers/pci/hotplug/rpaphp_core.c | 32 ++++++++++----------- 16 files changed, 72 insertions(+), 73 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 32eaddfa5470..5a23ce5e16ff 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -691,11 +691,11 @@ static void *early_enable_eeh(struct device_node *dn, void *data) { struct eeh_early_enable_info *info = data; int ret; - char *status = get_property(dn, "status", NULL); - u32 *class_code = (u32 *)get_property(dn, "class-code", NULL); - u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL); - u32 *device_id = (u32 *)get_property(dn, "device-id", NULL); - u32 *regs; + const char *status = get_property(dn, "status", NULL); + const u32 *class_code = get_property(dn, "class-code", NULL); + const u32 *vendor_id = get_property(dn, "vendor-id", NULL); + const u32 *device_id = get_property(dn, "device-id", NULL); + const u32 *regs; int enable; struct pci_dn *pdn = PCI_DN(dn); @@ -737,7 +737,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) /* Ok... see if this device supports EEH. Some do, some don't, * and the only way to find out is to check each and every one. */ - regs = (u32 *)get_property(dn, "reg", NULL); + regs = get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ /* Try to enable eeh */ diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index aaad2c0afcbf..3269d2cd428b 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -268,14 +268,14 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) if (!frozen_dn) { - location = (char *) get_property(event->dn, "ibm,loc-code", NULL); + location = get_property(event->dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", location, pci_name(event->dev)); return NULL; } - location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL); + location = get_property(frozen_dn, "ibm,loc-code", NULL); location = location ? location : "unknown"; /* There are two different styles for coming up with the PE. diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 45ccc687e57c..137077451316 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -124,11 +124,11 @@ int eeh_send_failure_event (struct device_node *dn, { unsigned long flags; struct eeh_event *event; - char *location; + const char *location; if (!mem_init_done) { printk(KERN_ERR "EEH: event during early boot not handled\n"); - location = (char *) get_property(dn, "ibm,loc-code", NULL); + location = get_property(dn, "ibm,loc-code", NULL); printk(KERN_ERR "EEH: device node = %s\n", dn->full_name); printk(KERN_ERR "EEH: PCI location = %s\n", location); return 1; diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index c01d8f0cbe6d..1c7b2baa5f73 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -68,7 +68,7 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = { void __init fw_feature_init(void) { struct device_node *dn; - char *hypertas, *s; + const char *hypertas, *s; int len, i; DBG(" -> fw_feature_init()\n"); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d67af2c65754..bbf2e34dc358 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -267,13 +267,12 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table *tbl) { struct device_node *node; - unsigned long *basep; - unsigned int *sizep; + const unsigned long *basep, *sizep; node = (struct device_node *)phb->arch_data; - basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); - sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); + basep = get_property(node, "linux,tce-base", NULL); + sizep = get_property(node, "linux,tce-size", NULL); if (basep == NULL || sizep == NULL) { printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " "missing tce entries !\n", dn->full_name); @@ -315,7 +314,7 @@ static void iommu_table_setparms(struct pci_controller *phb, static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl, - unsigned char *dma_window) + const void *dma_window) { unsigned long offset, size; @@ -415,7 +414,7 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - unsigned char *dma_window = NULL; + const void *dma_window = NULL; DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); @@ -519,7 +518,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - unsigned char *dma_window = NULL; + const void *dma_window = NULL; struct pci_dn *pci; DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3aeb40699042..4cb7ff227f72 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -204,20 +204,20 @@ void __init udbg_init_debug_lpar(void) void __init find_udbg_vterm(void) { struct device_node *stdout_node; - u32 *termno; - char *name; + const u32 *termno; + const char *name; int add_console; /* find the boot console from /chosen/stdout */ if (!of_chosen) return; - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + name = get_property(of_chosen, "linux,stdout-path", NULL); if (name == NULL) return; stdout_node = of_find_node_by_path(name); if (!stdout_node) return; - name = (char *)get_property(stdout_node, "name", NULL); + name = get_property(stdout_node, "name", NULL); if (!name) { printk(KERN_WARNING "stdout node missing 'name' property!\n"); goto out; @@ -228,7 +228,7 @@ void __init find_udbg_vterm(void) /* Check if it's a virtual terminal */ if (strncmp(name, "vty", 3) != 0) goto out; - termno = (u32 *)get_property(stdout_node, "reg", NULL); + termno = get_property(stdout_node, "reg", NULL); if (termno == NULL) goto out; vtermno = termno[0]; diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 18abfb1f4e24..64163cecdf93 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -123,13 +123,14 @@ static ssize_t pSeries_nvram_get_size(void) int __init pSeries_nvram_init(void) { struct device_node *nvram; - unsigned int *nbytes_p, proplen; + const unsigned int *nbytes_p; + unsigned int proplen; nvram = of_find_node_by_type(NULL, "nvram"); if (nvram == NULL) return -ENODEV; - nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + nbytes_p = get_property(nvram, "#bytes", &proplen); if (nbytes_p == NULL || proplen != sizeof(unsigned int)) return -EIO; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index e97e67f5e079..410a6bcc4ca0 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -60,7 +60,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); static void __devinit check_s7a(void) { struct device_node *root; - char *model; + const char *model; s7a_workaround = 0; root = of_find_node_by_path("/"); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9df783088b61..0e6339ee45a1 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -79,7 +79,7 @@ static void request_ras_irqs(struct device_node *np, { int i, index, count = 0; struct of_irq oirq; - u32 *opicprop; + const u32 *opicprop; unsigned int opicplen; unsigned int virqs[16]; @@ -87,7 +87,7 @@ static void request_ras_irqs(struct device_node *np, * map those interrupts using the default interrupt host and default * trigger */ - opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen); + opicprop = get_property(np, "open-pic-interrupt", &opicplen); if (opicprop) { opicplen /= sizeof(u32); for (i = 0; i < opicplen; i++) { diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index 2e4e04042d85..8ca2612221d6 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -359,11 +359,11 @@ static int enable_surveillance(int timeout) static int get_eventscan_parms(void) { struct device_node *node; - int *ip; + const int *ip; node = of_find_node_by_path("/rtas"); - ip = (int *)get_property(node, "rtas-event-scan-rate", NULL); + ip = get_property(node, "rtas-event-scan-rate", NULL); if (ip == NULL) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n"); of_node_put(node); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 54a52437265c..927e0a423b87 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -133,9 +133,9 @@ void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, static void __init pseries_mpic_init_IRQ(void) { struct device_node *np, *old, *cascade = NULL; - unsigned int *addrp; + const unsigned int *addrp; unsigned long intack = 0; - unsigned int *opprop; + const unsigned int *opprop; unsigned long openpic_addr = 0; unsigned int cascade_irq; int naddr, n, i, opplen; @@ -143,7 +143,7 @@ static void __init pseries_mpic_init_IRQ(void) np = of_find_node_by_path("/"); naddr = prom_n_addr_cells(np); - opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen); + opprop = get_property(np, "platform-open-pic", &opplen); if (opprop != 0) { openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); @@ -192,7 +192,7 @@ static void __init pseries_mpic_init_IRQ(void) break; if (strcmp(np->name, "pci") != 0) continue; - addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", + addrp = get_property(np, "8259-interrupt-acknowledge", NULL); if (addrp == NULL) continue; @@ -249,11 +249,11 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) static void __init pseries_discover_pic(void) { struct device_node *np; - char *typep; + const char *typep; for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) { - typep = (char *)get_property(np, "compatible", NULL); + typep = get_property(np, "compatible", NULL); if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index ac61098ff401..f39dad8b99e0 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -145,9 +145,9 @@ static int pSeries_add_processor(struct device_node *np) unsigned int cpu; cpumask_t candidate_map, tmp = CPU_MASK_NONE; int err = -ENOSPC, len, nthreads, i; - u32 *intserv; + const u32 *intserv; - intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) return 0; @@ -205,9 +205,9 @@ static void pSeries_remove_processor(struct device_node *np) { unsigned int cpu; int len, nthreads, i; - u32 *intserv; + const u32 *intserv; - intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); + intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) return; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 716972aa9777..756421049441 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -604,14 +604,14 @@ static void __init xics_init_one_node(struct device_node *np, unsigned int *indx) { unsigned int ilen; - u32 *ireg; + const u32 *ireg; /* This code does the theorically broken assumption that the interrupt * server numbers are the same as the hard CPU numbers. * This happens to be the case so far but we are playing with fire... * should be fixed one of these days. -BenH. */ - ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL); + ireg = get_property(np, "ibm,interrupt-server-ranges", NULL); /* Do that ever happen ? we'll know soon enough... but even good'old * f80 does have that property .. @@ -623,7 +623,7 @@ static void __init xics_init_one_node(struct device_node *np, */ *indx = *ireg; } - ireg = (u32 *)get_property(np, "reg", &ilen); + ireg = get_property(np, "reg", &ilen); if (!ireg) panic("xics_init_IRQ: can't find interrupt reg property"); @@ -649,7 +649,7 @@ static void __init xics_setup_8259_cascade(void) { struct device_node *np, *old, *found = NULL; int cascade, naddr; - u32 *addrp; + const u32 *addrp; unsigned long intack = 0; for_each_node_by_type(np, "interrupt-controller") @@ -675,7 +675,7 @@ static void __init xics_setup_8259_cascade(void) break; if (strcmp(np->name, "pci") != 0) continue; - addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL); + addrp = get_property(np, "8259-interrupt-acknowledge", NULL); if (addrp == NULL) continue; naddr = prom_n_addr_cells(np); @@ -694,7 +694,8 @@ void __init xics_init_IRQ(void) { int i; struct device_node *np; - u32 *ireg, ilen, indx = 0; + u32 ilen, indx = 0; + const u32 *ireg; int found = 0; ppc64_boot_msg(0x20, "XICS Init"); @@ -719,18 +720,17 @@ void __init xics_init_IRQ(void) for (np = of_find_node_by_type(NULL, "cpu"); np; np = of_find_node_by_type(np, "cpu")) { - ireg = (u32 *)get_property(np, "reg", &ilen); + ireg = get_property(np, "reg", &ilen); if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { - ireg = (u32 *)get_property(np, - "ibm,ppc-interrupt-gserver#s", - &ilen); + ireg = get_property(np, + "ibm,ppc-interrupt-gserver#s", &ilen); i = ilen / sizeof(int); if (ireg && i > 0) { default_server = ireg[0]; /* take last element */ default_distrib_server = ireg[i-1]; } - ireg = (u32 *)get_property(np, + ireg = get_property(np, "ibm,interrupt-server#-size", NULL); if (ireg) interrupt_server_size = *ireg; diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 651e5d25f58b..cc95941148fb 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -141,7 +141,7 @@ static int hvc_find_vtys(void) for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; vty = of_find_node_by_name(vty, "vty")) { - uint32_t *vtermno; + const uint32_t *vtermno; /* We have statically defined space for only a certain number * of console adapters. @@ -149,7 +149,7 @@ static int hvc_find_vtys(void) if (num_found >= MAX_NR_HVC_CONSOLES) break; - vtermno = (uint32_t *)get_property(vty, "reg", NULL); + vtermno = get_property(vty, "reg", NULL); if (!vtermno) continue; diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c index 56612a2dca6b..542de0e51f35 100644 --- a/drivers/char/hvsi.c +++ b/drivers/char/hvsi.c @@ -1276,11 +1276,10 @@ static int __init hvsi_console_init(void) vty != NULL; vty = of_find_compatible_node(vty, "serial", "hvterm-protocol")) { struct hvsi_struct *hp; - uint32_t *vtermno; - uint32_t *irq; + const uint32_t *vtermno, *irq; - vtermno = (uint32_t *)get_property(vty, "reg", NULL); - irq = (uint32_t *)get_property(vty, "interrupts", NULL); + vtermno = get_property(vty, "reg", NULL); + irq = get_property(vty, "interrupts", NULL); if (!vtermno || !irq) continue; diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 076bd6dcafae..7288a3eccfb3 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -176,16 +176,16 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe return 0; } -static int get_children_props(struct device_node *dn, int **drc_indexes, - int **drc_names, int **drc_types, int **drc_power_domains) +static int get_children_props(struct device_node *dn, const int **drc_indexes, + const int **drc_names, const int **drc_types, + const int **drc_power_domains) { - int *indexes, *names; - int *types, *domains; + const int *indexes, *names, *types, *domains; - indexes = (int *) get_property(dn, "ibm,drc-indexes", NULL); - names = (int *) get_property(dn, "ibm,drc-names", NULL); - types = (int *) get_property(dn, "ibm,drc-types", NULL); - domains = (int *) get_property(dn, "ibm,drc-power-domains", NULL); + indexes = get_property(dn, "ibm,drc-indexes", NULL); + names = get_property(dn, "ibm,drc-names", NULL); + types = get_property(dn, "ibm,drc-types", NULL); + domains = get_property(dn, "ibm,drc-power-domains", NULL); if (!indexes || !names || !types || !domains) { /* Slot does not have dynamically-removable children */ @@ -212,13 +212,13 @@ static int get_children_props(struct device_node *dn, int **drc_indexes, int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, char **drc_name, char **drc_type, int *drc_power_domain) { - int *indexes, *names; - int *types, *domains; - unsigned int *my_index; + const int *indexes, *names; + const int *types, *domains; + const unsigned int *my_index; char *name_tmp, *type_tmp; int i, rc; - my_index = (int *) get_property(dn, "ibm,my-drc-index", NULL); + my_index = get_property(dn, "ibm,my-drc-index", NULL); if (!my_index) { /* Node isn't DLPAR/hotplug capable */ return -EINVAL; @@ -265,10 +265,10 @@ static int is_php_type(char *drc_type) return 1; } -static int is_php_dn(struct device_node *dn, int **indexes, int **names, - int **types, int **power_domains) +static int is_php_dn(struct device_node *dn, const int **indexes, + const int **names, const int **types, const int **power_domains) { - int *drc_types; + const int *drc_types; int rc; rc = get_children_props(dn, indexes, names, &drc_types, power_domains); @@ -296,7 +296,7 @@ int rpaphp_add_slot(struct device_node *dn) struct slot *slot; int retval = 0; int i; - int *indexes, *names, *types, *power_domains; + const int *indexes, *names, *types, *power_domains; char *name, *type; dbg("Entry %s: dn->full_name=%s\n", __FUNCTION__, dn->full_name); From c4c7cba90cf9f180a2c45f7e54143f786360f3dd Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:39:42 +1000 Subject: [PATCH 0046/1063] [POWERPC] iseries: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. iseries platform changes. Built for iseries_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/iseries/iommu.c | 2 +- arch/powerpc/platforms/iseries/pci.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index 2c3dbcd4613c..f4cbbcf8773a 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -179,7 +179,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn) { struct iommu_table *tbl; struct pci_dn *pdn = PCI_DN(dn); - u32 *lsn = (u32 *)get_property(dn, "linux,logical-slot-number", NULL); + const u32 *lsn = get_property(dn, "linux,logical-slot-number", NULL); BUG_ON(lsn == NULL); diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c index 35bcc98111f5..f4d427a7bb2d 100644 --- a/arch/powerpc/platforms/iseries/pci.c +++ b/arch/powerpc/platforms/iseries/pci.c @@ -176,12 +176,12 @@ void iSeries_pcibios_init(void) } while ((node = of_get_next_child(root, node)) != NULL) { HvBusNumber bus; - u32 *busp; + const u32 *busp; if ((node->type == NULL) || (strcmp(node->type, "pci") != 0)) continue; - busp = (u32 *)get_property(node, "bus-range", NULL); + busp = get_property(node, "bus-range", NULL); if (busp == NULL) continue; bus = *busp; @@ -221,10 +221,9 @@ void __init iSeries_pci_final_fixup(void) if (node != NULL) { struct pci_dn *pdn = PCI_DN(node); - u32 *agent; + const u32 *agent; - agent = (u32 *)get_property(node, "linux,agent-id", - NULL); + agent = get_property(node, "linux,agent-id", NULL); if ((pdn != NULL) && (agent != NULL)) { u8 irq = iSeries_allocate_IRQ(pdn->busno, 0, pdn->bussubno); From 8efca49329a50710d656a8bb78d6f0f0e2f48a26 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:39:42 +1000 Subject: [PATCH 0047/1063] [POWERPC] mpc: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. mpc* platform changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/83xx/mpc834x_itx.c | 4 ++-- arch/powerpc/platforms/83xx/mpc834x_sys.c | 4 ++-- arch/powerpc/platforms/83xx/pci.c | 4 ++-- arch/powerpc/platforms/85xx/mpc85xx_ads.c | 4 ++-- arch/powerpc/platforms/85xx/mpc85xx_cds.c | 4 ++-- arch/powerpc/platforms/85xx/pci.c | 4 ++-- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 4 ++-- arch/powerpc/platforms/86xx/pci.c | 4 ++-- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 8 ++++---- 9 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index b46305645d38..d9675f9b9766 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -80,8 +80,8 @@ static void __init mpc834x_itx_setup_arch(void) np = of_find_node_by_type(NULL, "cpu"); if (np != 0) { - unsigned int *fp = - (int *)get_property(np, "clock-frequency", NULL); + const unsigned int *fp = + get_property(np, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else diff --git a/arch/powerpc/platforms/83xx/mpc834x_sys.c b/arch/powerpc/platforms/83xx/mpc834x_sys.c index 3e1c16eb4a63..5eadf9d035f1 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_sys.c +++ b/arch/powerpc/platforms/83xx/mpc834x_sys.c @@ -84,8 +84,8 @@ static void __init mpc834x_sys_setup_arch(void) np = of_find_node_by_type(NULL, "cpu"); if (np != 0) { - unsigned int *fp = - (int *)get_property(np, "clock-frequency", NULL); + const unsigned int *fp = + get_property(np, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else diff --git a/arch/powerpc/platforms/83xx/pci.c b/arch/powerpc/platforms/83xx/pci.c index 3b5e563c279f..9c3650555144 100644 --- a/arch/powerpc/platforms/83xx/pci.c +++ b/arch/powerpc/platforms/83xx/pci.c @@ -50,7 +50,7 @@ int __init add_bridge(struct device_node *dev) int len; struct pci_controller *hose; struct resource rsrc; - int *bus_range; + const int *bus_range; int primary = 1, has_address = 0; phys_addr_t immr = get_immrbase(); @@ -60,7 +60,7 @@ int __init add_bridge(struct device_node *dev) has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); /* Get bus range if any */ - bus_range = (int *)get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, assume" " bus 0\n", dev->full_name); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index 06a497676c99..d0cfcdb1d1b5 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -172,9 +172,9 @@ static void __init mpc85xx_ads_setup_arch(void) cpu = of_find_node_by_type(NULL, "cpu"); if (cpu != 0) { - unsigned int *fp; + const unsigned int *fp; - fp = (int *)get_property(cpu, "clock-frequency", NULL); + fp = get_property(cpu, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 18e6e11f7020..5fd53eba6912 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -273,9 +273,9 @@ mpc85xx_cds_setup_arch(void) cpu = of_find_node_by_type(NULL, "cpu"); if (cpu != 0) { - unsigned int *fp; + const unsigned int *fp; - fp = (int *)get_property(cpu, "clock-frequency", NULL); + fp = get_property(cpu, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else diff --git a/arch/powerpc/platforms/85xx/pci.c b/arch/powerpc/platforms/85xx/pci.c index 1d51f3242ab1..05930eeb6e7f 100644 --- a/arch/powerpc/platforms/85xx/pci.c +++ b/arch/powerpc/platforms/85xx/pci.c @@ -41,7 +41,7 @@ int __init add_bridge(struct device_node *dev) int len; struct pci_controller *hose; struct resource rsrc; - int *bus_range; + const int *bus_range; int primary = 1, has_address = 0; phys_addr_t immr = get_immrbase(); @@ -51,7 +51,7 @@ int __init add_bridge(struct device_node *dev) has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); /* Get bus range if any */ - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, assume" " bus 0\n", dev->full_name); diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index ebae73eb0063..839090682ab2 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -323,9 +323,9 @@ mpc86xx_hpcn_setup_arch(void) np = of_find_node_by_type(NULL, "cpu"); if (np != 0) { - unsigned int *fp; + const unsigned int *fp; - fp = (int *)get_property(np, "clock-frequency", NULL); + fp = get_property(np, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else diff --git a/arch/powerpc/platforms/86xx/pci.c b/arch/powerpc/platforms/86xx/pci.c index bc5139043112..d7050c1108ff 100644 --- a/arch/powerpc/platforms/86xx/pci.c +++ b/arch/powerpc/platforms/86xx/pci.c @@ -153,7 +153,7 @@ int __init add_bridge(struct device_node *dev) int len; struct pci_controller *hose; struct resource rsrc; - int *bus_range; + const int *bus_range; int has_address = 0; int primary = 0; @@ -163,7 +163,7 @@ int __init add_bridge(struct device_node *dev) has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); /* Get bus range if any */ - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) printk(KERN_WARNING "Can't get bus-range for %s, assume" " bus 0\n", dev->full_name); diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index d7a4fc7ca238..69c998cb4f1b 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -130,7 +130,7 @@ void mpc7448_hpc2_fixup_irq(struct pci_dev *dev) { struct pci_controller *hose; struct device_node *node; - unsigned int *interrupt; + const unsigned int *interrupt; int busnr; int len; u8 slot; @@ -147,7 +147,7 @@ void mpc7448_hpc2_fixup_irq(struct pci_dev *dev) if (!node) printk(KERN_ERR "No pci node found\n"); - interrupt = (unsigned int *) get_property(node, "interrupt-map", &len); + interrupt = get_property(node, "interrupt-map", &len); slot = find_slot_by_devfn(interrupt, dev->devfn); pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin == 0 || pin > 4) @@ -176,9 +176,9 @@ static void __init mpc7448_hpc2_setup_arch(void) cpu = of_find_node_by_type(NULL, "cpu"); if (cpu != 0) { - unsigned int *fp; + const unsigned int *fp; - fp = (int *)get_property(cpu, "clock-frequency", NULL); + fp = get_property(cpu, "clock-frequency", NULL); if (fp != 0) loops_per_jiffy = *fp / HZ; else From c61c27d58af61e5b78257019b173732c29ce0c64 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:39:54 +1000 Subject: [PATCH 0048/1063] [POWERPC] cell: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. cell platform changes. Built for cell_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/cbe_regs.c | 8 +++----- arch/powerpc/platforms/cell/interrupt.c | 5 ++--- arch/powerpc/platforms/cell/iommu.c | 22 ++++++++++++---------- arch/powerpc/platforms/cell/spider-pic.c | 10 +++++----- arch/powerpc/platforms/cell/spu_base.c | 18 +++++++++--------- include/asm-powerpc/spu.h | 2 +- 6 files changed, 32 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index ce696c1cca75..3f3859d12e00 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -97,7 +97,7 @@ void __init cbe_regs_init(void) struct cbe_regs_map *map = &cbe_regs_maps[cbe_regs_map_count++]; /* That hack must die die die ! */ - struct address_prop { + const struct address_prop { unsigned long address; unsigned int len; } __attribute__((packed)) *prop; @@ -114,13 +114,11 @@ void __init cbe_regs_init(void) if (cbe_thread_map[i].cpu_node == cpu) cbe_thread_map[i].regs = map; - prop = (struct address_prop *)get_property(cpu, "pervasive", - NULL); + prop = get_property(cpu, "pervasive", NULL); if (prop != NULL) map->pmd_regs = ioremap(prop->address, prop->len); - prop = (struct address_prop *)get_property(cpu, "iic", - NULL); + prop = get_property(cpu, "iic", NULL); if (prop != NULL) map->iic_regs = ioremap(prop->address, prop->len); } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 9d5da7896892..b26b496f6548 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -250,15 +250,14 @@ static int __init setup_iic(void) struct resource r0, r1; struct irq_host *host; int found = 0; - u32 *np; + const u32 *np; for (dn = NULL; (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { if (!device_is_compatible(dn, "IBM,CBEA-Internal-Interrupt-Controller")) continue; - np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges", - NULL); + np = get_property(dn, "ibm,interrupt-server-ranges", NULL); if (np == NULL) { printk(KERN_WARNING "IIC: CPU association not found\n"); of_node_put(dn); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index a35004e14c69..d2b20eba5b87 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -308,15 +308,16 @@ static void cell_do_map_iommu(struct cell_iommu *iommu, static void iommu_devnode_setup(struct device_node *d) { - unsigned int *ioid; - unsigned long *dma_window, map_start, map_size, token; + const unsigned int *ioid; + unsigned long map_start, map_size, token; + const unsigned long *dma_window; struct cell_iommu *iommu; - ioid = (unsigned int *)get_property(d, "ioid", NULL); + ioid = get_property(d, "ioid", NULL); if (!ioid) pr_debug("No ioid entry found !\n"); - dma_window = (unsigned long *)get_property(d, "ibm,dma-window", NULL); + dma_window = get_property(d, "ibm,dma-window", NULL); if (!dma_window) pr_debug("No ibm,dma-window entry found !\n"); @@ -371,8 +372,9 @@ static int cell_map_iommu_hardcoded(int num_nodes) static int cell_map_iommu(void) { - unsigned int num_nodes = 0, *node_id; - unsigned long *base, *mmio_base; + unsigned int num_nodes = 0; + const unsigned int *node_id; + const unsigned long *base, *mmio_base; struct device_node *dn; struct cell_iommu *iommu = NULL; @@ -381,7 +383,7 @@ static int cell_map_iommu(void) for(dn = of_find_node_by_type(NULL, "cpu"); dn; dn = of_find_node_by_type(dn, "cpu")) { - node_id = (unsigned int *)get_property(dn, "node-id", NULL); + node_id = get_property(dn, "node-id", NULL); if (num_nodes < *node_id) num_nodes = *node_id; @@ -396,9 +398,9 @@ static int cell_map_iommu(void) dn; dn = of_find_node_by_type(dn, "cpu")) { - node_id = (unsigned int *)get_property(dn, "node-id", NULL); - base = (unsigned long *)get_property(dn, "ioc-cache", NULL); - mmio_base = (unsigned long *)get_property(dn, "ioc-translation", NULL); + node_id = get_property(dn, "node-id", NULL); + base = get_property(dn, "ioc-cache", NULL); + mmio_base = get_property(dn, "ioc-translation", NULL); if (!base || !mmio_base || !node_id) return cell_map_iommu_hardcoded(num_nodes); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index ae7ef88f1a37..ab4c252a4d9b 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -230,7 +230,7 @@ static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc, static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) { unsigned int virq; - u32 *imap, *tmp; + const u32 *imap, *tmp; int imaplen, intsize, unit; struct device_node *iic; struct irq_host *iic_host; @@ -248,25 +248,25 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) #endif /* Now do the horrible hacks */ - tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL); + tmp = get_property(pic->of_node, "#interrupt-cells", NULL); if (tmp == NULL) return NO_IRQ; intsize = *tmp; - imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen); + imap = get_property(pic->of_node, "interrupt-map", &imaplen); if (imap == NULL || imaplen < (intsize + 1)) return NO_IRQ; iic = of_find_node_by_phandle(imap[intsize]); if (iic == NULL) return NO_IRQ; imap += intsize + 1; - tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL); + tmp = get_property(iic, "#interrupt-cells", NULL); if (tmp == NULL) return NO_IRQ; intsize = *tmp; /* Assume unit is last entry of interrupt specifier */ unit = imap[intsize - 1]; /* Ok, we have a unit, now let's try to get the node */ - tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL); + tmp = get_property(iic, "ibm,interrupt-server-ranges", NULL); if (tmp == NULL) { of_node_put(iic); return NO_IRQ; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 5d2313a6c82b..86d55675e1d2 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -488,10 +488,10 @@ int spu_irq_class_1_bottom(struct spu *spu) static int __init find_spu_node_id(struct device_node *spe) { - unsigned int *id; + const unsigned int *id; struct device_node *cpu; cpu = spe->parent->parent; - id = (unsigned int *)get_property(cpu, "node-id", NULL); + id = get_property(cpu, "node-id", NULL); return id ? *id : 0; } @@ -500,7 +500,7 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, { static DEFINE_MUTEX(add_spumem_mutex); - struct address_prop { + const struct address_prop { unsigned long address; unsigned int len; } __attribute__((packed)) *p; @@ -511,7 +511,7 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, struct zone *zone; int ret; - p = (void*)get_property(spe, prop, &proplen); + p = get_property(spe, prop, &proplen); WARN_ON(proplen != sizeof (*p)); start_pfn = p->address >> PAGE_SHIFT; @@ -531,12 +531,12 @@ static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, static void __iomem * __init map_spe_prop(struct spu *spu, struct device_node *n, const char *name) { - struct address_prop { + const struct address_prop { unsigned long address; unsigned int len; } __attribute__((packed)) *prop; - void *p; + const void *p; int proplen; void* ret = NULL; int err = 0; @@ -570,14 +570,14 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) { struct irq_host *host; unsigned int isrc; - u32 *tmp; + const u32 *tmp; host = iic_get_irq_host(spu->node); if (host == NULL) return -ENODEV; /* Get the interrupt source from the device-tree */ - tmp = (u32 *)get_property(np, "isrc", NULL); + tmp = get_property(np, "isrc", NULL); if (!tmp) return -ENODEV; spu->isrc = isrc = tmp[0]; @@ -593,7 +593,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) static int __init spu_map_device(struct spu *spu, struct device_node *node) { - char *prop; + const char *prop; int ret; ret = -ENODEV; diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index c02d105d8294..b42b53c40f5d 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -106,7 +106,7 @@ struct spu_context; struct spu_runqueue; struct spu { - char *name; + const char *name; unsigned long local_store_phys; u8 *local_store; unsigned long problem_phys; From ae6b4101e53dcf8a41f3432dacca9d3eb34e9cc3 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:40:05 +1000 Subject: [PATCH 0049/1063] [POWERPC] chrp: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. chrp platform changes. Built for chrp32_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/chrp/nvram.c | 5 +++-- arch/powerpc/platforms/chrp/pci.c | 11 +++++------ arch/powerpc/platforms/chrp/setup.c | 21 +++++++++------------ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index 150f67d6f90c..0dd4a64757d9 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -67,13 +67,14 @@ static void chrp_nvram_write(int addr, unsigned char val) void __init chrp_nvram_init(void) { struct device_node *nvram; - unsigned int *nbytes_p, proplen; + const unsigned int *nbytes_p; + unsigned int proplen; nvram = of_find_node_by_type(NULL, "nvram"); if (nvram == NULL) return; - nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen); + nbytes_p = get_property(nvram, "#bytes", &proplen); if (nbytes_p == NULL || proplen != sizeof(unsigned int)) return; diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 6d7ac649b45e..0f4340506c75 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -214,11 +214,11 @@ void __init chrp_find_bridges(void) { struct device_node *dev; - int *bus_range; + const int *bus_range; int len, index = -1; struct pci_controller *hose; - unsigned int *dma; - char *model, *machine; + const unsigned int *dma; + const char *model, *machine; int is_longtrail = 0, is_mot = 0, is_pegasos = 0; struct device_node *root = find_path_device("/"); struct resource r; @@ -246,7 +246,7 @@ chrp_find_bridges(void) dev->full_name); continue; } - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s\n", dev->full_name); @@ -312,8 +312,7 @@ chrp_find_bridges(void) /* check the first bridge for a property that we can use to set pci_dram_offset */ - dma = (unsigned int *) - get_property(dev, "ibm,dma-ranges", &len); + dma = get_property(dev, "ibm,dma-ranges", &len); if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) { pci_dram_offset = dma[2] - dma[3]; printk("pci_dram_offset = %lx\n", pci_dram_offset); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index be39742db809..488dbd9b51ae 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -226,8 +226,7 @@ static void __init pegasos_set_l2cr(void) /* Enable L2 cache if needed */ np = find_type_devices("cpu"); if (np != NULL) { - unsigned int *l2cr = (unsigned int *) - get_property (np, "l2cr", NULL); + const unsigned int *l2cr = get_property(np, "l2cr", NULL); if (l2cr == NULL) { printk ("Pegasos l2cr : no cpu l2cr property found\n"); return; @@ -252,7 +251,7 @@ static void briq_restart(char *cmd) void __init chrp_setup_arch(void) { struct device_node *root = find_path_device ("/"); - char *machine = NULL; + const char *machine = NULL; /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000/HZ; @@ -353,7 +352,7 @@ static void __init chrp_find_openpic(void) struct device_node *np, *root; int len, i, j; int isu_size, idu_size; - unsigned int *iranges, *opprop = NULL; + const unsigned int *iranges, *opprop = NULL; int oplen = 0; unsigned long opaddr; int na = 1; @@ -363,8 +362,7 @@ static void __init chrp_find_openpic(void) return; root = of_find_node_by_path("/"); if (root) { - opprop = (unsigned int *) get_property - (root, "platform-open-pic", &oplen); + opprop = get_property(root, "platform-open-pic", &oplen); na = prom_n_addr_cells(root); } if (opprop && oplen >= na * sizeof(unsigned int)) { @@ -381,7 +379,7 @@ static void __init chrp_find_openpic(void) printk(KERN_INFO "OpenPIC at %lx\n", opaddr); - iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len); + iranges = get_property(np, "interrupt-ranges", &len); if (iranges == NULL) len = 0; /* non-distributed mpic */ else @@ -467,8 +465,8 @@ static void __init chrp_find_8259(void) * from anyway */ for (np = find_devices("pci"); np != NULL; np = np->next) { - unsigned int *addrp = (unsigned int *) - get_property(np, "8259-interrupt-acknowledge", NULL); + const unsigned int *addrp = get_property(np, + "8259-interrupt-acknowledge", NULL); if (addrp == NULL) continue; @@ -527,7 +525,7 @@ void __init chrp_init2(void) { struct device_node *device; - unsigned int *p = NULL; + const unsigned int *p = NULL; #ifdef CONFIG_NVRAM chrp_nvram_init(); @@ -545,8 +543,7 @@ chrp_init2(void) */ device = find_devices("rtas"); if (device) - p = (unsigned int *) get_property - (device, "rtas-event-scan-rate", NULL); + p = get_property(device, "rtas-event-scan-rate", NULL); if (p && *p) { /* * Arrange to call chrp_event_scan at least *p times From eeb2b723ef5100fafa381d92eb70d83e98516a44 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:40:17 +1000 Subject: [PATCH 0050/1063] [POWERPC] maple: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. maple platform changes. Built for maple_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/maple/pci.c | 20 +++++++++++--------- arch/powerpc/platforms/maple/setup.c | 27 ++++++++++----------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 63a1670d3bfd..dc05af5156a9 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -38,16 +38,16 @@ static struct pci_controller *u3_agp, *u3_ht; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { for (; node != 0;node = node->sibling) { - int * bus_range; - unsigned int *class_code; + const int *bus_range; + const unsigned int *class_code; int len; /* For PCI<->PCI bridges or CardBus bridges, we go down */ - class_code = (unsigned int *) get_property(node, "class-code", NULL); + class_code = get_property(node, "class-code", NULL); if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) continue; - bus_range = (int *) get_property(node, "bus-range", &len); + bus_range = get_property(node, "bus-range", &len); if (bus_range != NULL && len > 2 * sizeof(int)) { if (bus_range[1] > higher) higher = bus_range[1]; @@ -65,16 +65,18 @@ static int __init fixup_one_level_bus_range(struct device_node *node, int higher */ static void __init fixup_bus_range(struct device_node *bridge) { - int * bus_range; + int *bus_range; + struct property *prop; int len; /* Lookup the "bus-range" property for the hose */ - bus_range = (int *) get_property(bridge, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { + prop = of_find_property(bridge, "bus-range", &len); + if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s\n", bridge->full_name); return; } + bus_range = (int *)prop->value; bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); } @@ -314,12 +316,12 @@ static int __init add_bridge(struct device_node *dev) int len; struct pci_controller *hose; char* disp_name; - int *bus_range; + const int *bus_range; int primary = 1; DBG("Adding PCI host bridge %s\n", dev->full_name); - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", dev->full_name); diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index cb528c9de4c3..ecc764a3ff3a 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -99,8 +99,7 @@ static unsigned long maple_find_nvram_base(void) static void maple_restart(char *cmd) { unsigned int maple_nvram_base; - unsigned int maple_nvram_offset; - unsigned int maple_nvram_command; + const unsigned int *maple_nvram_offset, *maple_nvram_command; struct device_node *sp; maple_nvram_base = maple_find_nvram_base(); @@ -113,14 +112,12 @@ static void maple_restart(char *cmd) printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); goto fail; } - maple_nvram_offset = *(unsigned int*) get_property(sp, - "restart-addr", NULL); - maple_nvram_command = *(unsigned int*) get_property(sp, - "restart-value", NULL); + maple_nvram_offset = get_property(sp, "restart-addr", NULL); + maple_nvram_command = get_property(sp, "restart-value", NULL); of_node_put(sp); /* send command */ - outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); for (;;) ; fail: printk(KERN_EMERG "Maple: Manual Restart Required\n"); @@ -129,8 +126,7 @@ static void maple_restart(char *cmd) static void maple_power_off(void) { unsigned int maple_nvram_base; - unsigned int maple_nvram_offset; - unsigned int maple_nvram_command; + const unsigned int *maple_nvram_offset, *maple_nvram_command; struct device_node *sp; maple_nvram_base = maple_find_nvram_base(); @@ -143,14 +139,12 @@ static void maple_power_off(void) printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); goto fail; } - maple_nvram_offset = *(unsigned int*) get_property(sp, - "power-off-addr", NULL); - maple_nvram_command = *(unsigned int*) get_property(sp, - "power-off-value", NULL); + maple_nvram_offset = get_property(sp, "power-off-addr", NULL); + maple_nvram_command = get_property(sp, "power-off-value", NULL); of_node_put(sp); /* send command */ - outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); for (;;) ; fail: printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); @@ -211,7 +205,7 @@ static void __init maple_init_early(void) static void __init maple_init_IRQ(void) { struct device_node *root, *np, *mpic_node = NULL; - unsigned int *opprop; + const unsigned int *opprop; unsigned long openpic_addr = 0; int naddr, n, i, opplen, has_isus = 0; struct mpic *mpic; @@ -234,8 +228,7 @@ static void __init maple_init_IRQ(void) /* Find address list in /platform-open-pic */ root = of_find_node_by_path("/"); naddr = prom_n_addr_cells(root); - opprop = (unsigned int *) get_property(root, "platform-open-pic", - &opplen); + opprop = get_property(root, "platform-open-pic", &opplen); if (opprop != 0) { openpic_addr = of_read_number(opprop, naddr); has_isus = (opplen > naddr); From 018a3d1db7cdb6127656c1622ee1d2302e16436d Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:40:29 +1000 Subject: [PATCH 0051/1063] [POWERPC] powermac: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powermac platform & macintosh driver changes. Built for pmac32_defconfig, g5_defconfig Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/powermac/backlight.c | 3 +- arch/powerpc/platforms/powermac/cpufreq_32.c | 23 ++++++------ arch/powerpc/platforms/powermac/cpufreq_64.c | 27 +++++++------- arch/powerpc/platforms/powermac/feature.c | 30 ++++++++-------- arch/powerpc/platforms/powermac/low_i2c.c | 24 ++++++------- arch/powerpc/platforms/powermac/pci.c | 37 +++++++++++--------- arch/powerpc/platforms/powermac/pfunc_base.c | 2 +- arch/powerpc/platforms/powermac/pfunc_core.c | 5 +-- arch/powerpc/platforms/powermac/setup.c | 18 +++++----- arch/powerpc/platforms/powermac/smp.c | 7 ++-- arch/powerpc/platforms/powermac/udbg_scc.c | 10 +++--- drivers/i2c/busses/i2c-powermac.c | 3 +- drivers/ide/ppc/pmac.c | 6 ++-- drivers/macintosh/macio_asic.c | 10 +++--- drivers/macintosh/macio_sysfs.c | 8 ++--- drivers/macintosh/smu.c | 19 +++++----- drivers/macintosh/therm_adt746x.c | 8 ++--- drivers/macintosh/therm_pm72.c | 14 ++++---- drivers/macintosh/therm_windtunnel.c | 4 +-- drivers/macintosh/via-cuda.c | 4 +-- drivers/macintosh/via-pmu-led.c | 2 +- drivers/macintosh/via-pmu.c | 10 +++--- drivers/macintosh/windfarm_pm81.c | 4 +-- drivers/macintosh/windfarm_pm91.c | 2 +- drivers/macintosh/windfarm_smu_controls.c | 13 +++---- drivers/macintosh/windfarm_smu_sat.c | 8 ++--- drivers/macintosh/windfarm_smu_sensors.c | 12 +++---- drivers/serial/pmac_zilog.c | 9 ++--- include/asm-powerpc/smu.h | 2 +- 29 files changed, 166 insertions(+), 158 deletions(-) diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index 69f65e215a5c..205b4a392862 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -38,7 +38,8 @@ int pmac_has_backlight_type(const char *type) struct device_node* bk_node = find_devices("backlight"); if (bk_node) { - char *prop = get_property(bk_node, "backlight-control", NULL); + const char *prop = get_property(bk_node, + "backlight-control", NULL); if (prop && strncmp(prop, type, strlen(type)) == 0) return 1; } diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c index 62926248bdb8..c2b6b4134f68 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ b/arch/powerpc/platforms/powermac/cpufreq_32.c @@ -421,7 +421,7 @@ static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) static u32 read_gpio(struct device_node *np) { - u32 *reg = (u32 *)get_property(np, "reg", NULL); + const u32 *reg = get_property(np, "reg", NULL); u32 offset; if (reg == NULL) @@ -497,7 +497,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) "frequency-gpio"); struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, "slewing-done"); - u32 *value; + const u32 *value; /* * Check to see if it's GPIO driven or PMU only @@ -519,15 +519,15 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) */ if (frequency_gpio && slew_done_gpio) { int lenp, rc; - u32 *freqs, *ratio; + const u32 *freqs, *ratio; - freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp); + freqs = get_property(cpunode, "bus-frequencies", &lenp); lenp /= sizeof(u32); if (freqs == NULL || lenp != 2) { printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); return 1; } - ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL); + ratio = get_property(cpunode, "processor-to-bus-ratio*2", NULL); if (ratio == NULL) { printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); return 1; @@ -562,7 +562,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) /* If we use the PMU, look for the min & max frequencies in the * device-tree */ - value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL); + value = get_property(cpunode, "min-clock-frequency", NULL); if (!value) return 1; low_freq = (*value) / 1000; @@ -571,7 +571,7 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (low_freq < 100000) low_freq *= 10; - value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL); + value = get_property(cpunode, "max-clock-frequency", NULL); if (!value) return 1; hi_freq = (*value) / 1000; @@ -611,13 +611,14 @@ static int pmac_cpufreq_init_7447A(struct device_node *cpunode) static int pmac_cpufreq_init_750FX(struct device_node *cpunode) { struct device_node *volt_gpio_np; - u32 pvr, *value; + u32 pvr; + const u32 *value; if (get_property(cpunode, "dynamic-power-step", NULL) == NULL) return 1; hi_freq = cur_freq; - value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL); + value = get_property(cpunode, "reduced-clock-frequency", NULL); if (!value) return 1; low_freq = (*value) / 1000; @@ -650,7 +651,7 @@ static int pmac_cpufreq_init_750FX(struct device_node *cpunode) static int __init pmac_cpufreq_setup(void) { struct device_node *cpunode; - u32 *value; + const u32 *value; if (strstr(cmd_line, "nocpufreq")) return 0; @@ -661,7 +662,7 @@ static int __init pmac_cpufreq_setup(void) goto out; /* Get current cpu clock freq */ - value = (u32 *)get_property(cpunode, "clock-frequency", NULL); + value = get_property(cpunode, "clock-frequency", NULL); if (!value) goto out; cur_freq = (*value) / 1000; diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c index a6a84ac5433e..c364c89adb4e 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ b/arch/powerpc/platforms/powermac/cpufreq_64.c @@ -89,7 +89,7 @@ static DEFINE_MUTEX(g5_switch_mutex); #ifdef CONFIG_PPC_SMU -static u32 *g5_pmode_data; +static const u32 *g5_pmode_data; static int g5_pmode_max; static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ @@ -391,7 +391,8 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) unsigned int psize, ssize; unsigned long max_freq; char *freq_method, *volt_method; - u32 *valp, pvr_hi; + const u32 *valp; + u32 pvr_hi; int use_volts_vdnap = 0; int use_volts_smu = 0; int rc = -ENODEV; @@ -409,8 +410,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) /* Get first CPU node */ for (cpunode = NULL; (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - u32 *reg = - (u32 *)get_property(cpunode, "reg", NULL); + const u32 *reg = get_property(cpunode, "reg", NULL); if (reg == NULL || (*reg) != 0) continue; if (!strcmp(cpunode->type, "cpu")) @@ -422,7 +422,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) } /* Check 970FX for now */ - valp = (u32 *)get_property(cpunode, "cpu-version", NULL); + valp = get_property(cpunode, "cpu-version", NULL); if (!valp) { DBG("No cpu-version property !\n"); goto bail_noprops; @@ -434,7 +434,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) } /* Look for the powertune data in the device-tree */ - g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize); + g5_pmode_data = get_property(cpunode, "power-mode-data",&psize); if (!g5_pmode_data) { DBG("No power-mode-data !\n"); goto bail_noprops; @@ -442,7 +442,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) g5_pmode_max = psize / sizeof(u32) - 1; if (use_volts_smu) { - struct smu_sdbp_header *shdr; + const struct smu_sdbp_header *shdr; /* Look for the FVT table */ shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); @@ -493,7 +493,7 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) * half freq in this version. So far, I haven't yet seen a machine * supporting anything else. */ - valp = (u32 *)get_property(cpunode, "clock-frequency", NULL); + valp = get_property(cpunode, "clock-frequency", NULL); if (!valp) return -ENODEV; max_freq = (*valp)/1000; @@ -541,8 +541,8 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus) static int __init g5_pm72_cpufreq_init(struct device_node *cpus) { struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL; - u8 *eeprom = NULL; - u32 *valp; + const u8 *eeprom = NULL; + const u32 *valp; u64 max_freq, min_freq, ih, il; int has_volt = 1, rc = 0; @@ -563,7 +563,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) /* Lookup the cpuid eeprom node */ cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0"); if (cpuid != NULL) - eeprom = (u8 *)get_property(cpuid, "cpuid", NULL); + eeprom = get_property(cpuid, "cpuid", NULL); if (eeprom == NULL) { printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n"); rc = -ENODEV; @@ -573,7 +573,8 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) /* Lookup the i2c hwclock */ for (hwclock = NULL; (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){ - char *loc = get_property(hwclock, "hwctrl-location", NULL); + const char *loc = get_property(hwclock, + "hwctrl-location", NULL); if (loc == NULL) continue; if (strcmp(loc, "CPU CLOCK")) @@ -637,7 +638,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpus) */ /* Get max frequency from device-tree */ - valp = (u32 *)get_property(cpunode, "clock-frequency", NULL); + valp = get_property(cpunode, "clock-frequency", NULL); if (!valp) { printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n"); rc = -ENODEV; diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index f8313bf9a9f7..13fcaf5b1796 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value) if (np == NULL) return -ENODEV; for (np = np->child; np != NULL; np = np->sibling) { - u32 *num = (u32 *)get_property(np, "reg", NULL); - u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + u32 *num = get_property(np, "reg", NULL); + u32 *rst = get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) continue; if (param == *num) { @@ -1087,7 +1087,7 @@ core99_usb_enable(struct device_node *node, long param, long value) { struct macio_chip *macio; unsigned long flags; - char *prop; + const char *prop; int number; u32 reg; @@ -1096,7 +1096,7 @@ core99_usb_enable(struct device_node *node, long param, long value) macio->type != macio_intrepid) return -ENODEV; - prop = (char *)get_property(node, "AAPL,clock-id", NULL); + prop = get_property(node, "AAPL,clock-id", NULL); if (!prop) return -ENODEV; if (strncmp(prop, "usb0u048", 8) == 0) @@ -1507,8 +1507,8 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) if (np == NULL) return -ENODEV; for (np = np->child; np != NULL; np = np->sibling) { - u32 *num = (u32 *)get_property(np, "reg", NULL); - u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + const u32 *num = get_property(np, "reg", NULL); + const u32 *rst = get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) continue; if (param == *num) { @@ -2408,7 +2408,7 @@ static int __init probe_motherboard(void) */ dt = find_devices("device-tree"); if (dt != NULL) - model = (const char *) get_property(dt, "model", NULL); + model = get_property(dt, "model", NULL); for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { if (strcmp(model, pmac_mb_defs[i].model_string) == 0) { pmac_mb = pmac_mb_defs[i]; @@ -2536,7 +2536,7 @@ found: */ static void __init probe_uninorth(void) { - u32 *addrp; + const u32 *addrp; phys_addr_t address; unsigned long actrl; @@ -2555,7 +2555,7 @@ static void __init probe_uninorth(void) if (uninorth_node == NULL) return; - addrp = (u32 *)get_property(uninorth_node, "reg", NULL); + addrp = get_property(uninorth_node, "reg", NULL); if (addrp == NULL) return; address = of_translate_address(uninorth_node, addrp); @@ -2596,7 +2596,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ struct device_node* node; int i; volatile u32 __iomem *base; - u32 *addrp, *revp; + const u32 *addrp, *revp; phys_addr_t addr; u64 size; @@ -2639,7 +2639,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ return; } if (type == macio_keylargo || type == macio_keylargo2) { - u32 *did = (u32 *)get_property(node, "device-id", NULL); + const u32 *did = get_property(node, "device-id", NULL); if (*did == 0x00000025) type = macio_pangea; if (*did == 0x0000003e) @@ -2652,7 +2652,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ macio_chips[i].base = base; macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON; macio_chips[i].name = macio_names[type]; - revp = (u32 *)get_property(node, "revision-id", NULL); + revp = get_property(node, "revision-id", NULL); if (revp) macio_chips[i].rev = *revp; printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", @@ -2695,15 +2695,15 @@ static void __init initial_serial_shutdown(struct device_node *np) { int len; - struct slot_names_prop { + const struct slot_names_prop { int count; char name[1]; } *slots; - char *conn; + const char *conn; int port_type = PMAC_SCC_ASYNC; int modem = 0; - slots = (struct slot_names_prop *)get_property(np, "slot-names", &len); + slots = get_property(np, "slot-names", &len); conn = get_property(np, "AAPL,connector", &len); if (conn && (strcmp(conn, "infrared") == 0)) port_type = PMAC_SCC_IRDA; diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 8677f50c2586..c2c7cf75dd5f 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -477,7 +477,8 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) { struct pmac_i2c_host_kw *host; - u32 *psteps, *prate, *addrp, steps; + const u32 *psteps, *prate, *addrp; + u32 steps; host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL); if (host == NULL) { @@ -490,7 +491,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) * on all i2c keywest nodes so far ... we would have to fallback * to macio parsing if that wasn't the case */ - addrp = (u32 *)get_property(np, "AAPL,address", NULL); + addrp = get_property(np, "AAPL,address", NULL); if (addrp == NULL) { printk(KERN_ERR "low_i2c: Can't find address for %s\n", np->full_name); @@ -504,13 +505,13 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) host->timeout_timer.function = kw_i2c_timeout; host->timeout_timer.data = (unsigned long)host; - psteps = (u32 *)get_property(np, "AAPL,address-step", NULL); + psteps = get_property(np, "AAPL,address-step", NULL); steps = psteps ? (*psteps) : 0x10; for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) steps >>= 1; /* Select interface rate */ host->speed = KW_I2C_MODE_25KHZ; - prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL); + prate = get_property(np, "AAPL,i2c-rate", NULL); if (prate) switch(*prate) { case 100: host->speed = KW_I2C_MODE_100KHZ; @@ -618,8 +619,8 @@ static void __init kw_i2c_probe(void) } else { for (child = NULL; (child = of_get_next_child(np, child)) != NULL;) { - u32 *reg = - (u32 *)get_property(child, "reg", NULL); + const u32 *reg = get_property(child, + "reg", NULL); if (reg == NULL) continue; kw_i2c_add(host, np, child, *reg); @@ -881,7 +882,7 @@ static void __init smu_i2c_probe(void) { struct device_node *controller, *busnode; struct pmac_i2c_bus *bus; - u32 *reg; + const u32 *reg; int sz; if (!smu_present()) @@ -904,7 +905,7 @@ static void __init smu_i2c_probe(void) if (strcmp(busnode->type, "i2c") && strcmp(busnode->type, "i2c-bus")) continue; - reg = (u32 *)get_property(busnode, "reg", NULL); + reg = get_property(busnode, "reg", NULL); if (reg == NULL) continue; @@ -948,9 +949,8 @@ struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node) list_for_each_entry(bus, &pmac_i2c_busses, link) { if (p == bus->busnode) { if (prev && bus->flags & pmac_i2c_multibus) { - u32 *reg; - reg = (u32 *)get_property(prev, "reg", - NULL); + const u32 *reg; + reg = get_property(prev, "reg", NULL); if (!reg) continue; if (((*reg) >> 8) != bus->channel) @@ -971,7 +971,7 @@ EXPORT_SYMBOL_GPL(pmac_i2c_find_bus); u8 pmac_i2c_get_dev_addr(struct device_node *device) { - u32 *reg = (u32 *)get_property(device, "reg", NULL); + const u32 *reg = get_property(device, "reg", NULL); if (reg == NULL) return 0; diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 556b349797e8..787ffd999bc2 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -69,16 +69,16 @@ struct device_node *k2_skiplist[2]; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { for (; node != 0;node = node->sibling) { - int * bus_range; - unsigned int *class_code; + const int * bus_range; + const unsigned int *class_code; int len; /* For PCI<->PCI bridges or CardBus bridges, we go down */ - class_code = (unsigned int *) get_property(node, "class-code", NULL); + class_code = get_property(node, "class-code", NULL); if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) continue; - bus_range = (int *) get_property(node, "bus-range", &len); + bus_range = get_property(node, "bus-range", &len); if (bus_range != NULL && len > 2 * sizeof(int)) { if (bus_range[1] > higher) higher = bus_range[1]; @@ -96,13 +96,15 @@ static int __init fixup_one_level_bus_range(struct device_node *node, int higher */ static void __init fixup_bus_range(struct device_node *bridge) { - int * bus_range; - int len; + int *bus_range, len; + struct property *prop; /* Lookup the "bus-range" property for the hose */ - bus_range = (int *) get_property(bridge, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) + prop = of_find_property(bridge, "bus-range", &len); + if (prop == NULL || prop->length < 2 * sizeof(int)) return; + + bus_range = (int *)prop->value; bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); } @@ -240,7 +242,7 @@ static struct pci_ops macrisc_pci_ops = static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) { struct device_node *np; - u32 *vendor, *device; + const u32 *vendor, *device; if (offset >= 0x100) return PCIBIOS_BAD_REGISTER_NUMBER; @@ -248,8 +250,8 @@ static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) if (np == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - vendor = (u32 *)get_property(np, "vendor-id", NULL); - device = (u32 *)get_property(np, "device-id", NULL); + vendor = get_property(np, "vendor-id", NULL); + device = get_property(np, "device-id", NULL); if (vendor == NULL || device == NULL) return PCIBIOS_DEVICE_NOT_FOUND; @@ -689,20 +691,21 @@ static void __init fixup_nec_usb2(void) for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { struct pci_controller *hose; - u32 data, *prop; + u32 data; + const u32 *prop; u8 bus, devfn; - prop = (u32 *)get_property(nec, "vendor-id", NULL); + prop = get_property(nec, "vendor-id", NULL); if (prop == NULL) continue; if (0x1033 != *prop) continue; - prop = (u32 *)get_property(nec, "device-id", NULL); + prop = get_property(nec, "device-id", NULL); if (prop == NULL) continue; if (0x0035 != *prop) continue; - prop = (u32 *)get_property(nec, "reg", NULL); + prop = get_property(nec, "reg", NULL); if (prop == NULL) continue; devfn = (prop[0] >> 8) & 0xff; @@ -901,7 +904,7 @@ static int __init add_bridge(struct device_node *dev) struct pci_controller *hose; struct resource rsrc; char *disp_name; - int *bus_range; + const int *bus_range; int primary = 1, has_address = 0; DBG("Adding PCI host bridge %s\n", dev->full_name); @@ -910,7 +913,7 @@ static int __init add_bridge(struct device_node *dev) has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); /* Get bus range if any */ - bus_range = (int *) get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, assume" " bus 0\n", dev->full_name); diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 6d66359ec8c8..829dacec96e5 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -114,7 +114,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) * we just create them all */ for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) { - u32 *reg = (u32 *)get_property(gp, "reg", NULL); + const u32 *reg = get_property(gp, "reg", NULL); unsigned long offset; if (reg == NULL) continue; diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index b117adbf9571..7651f278615a 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -813,14 +813,15 @@ struct pmf_function *__pmf_find_function(struct device_node *target, struct pmf_device *dev; struct pmf_function *func, *result = NULL; char fname[64]; - u32 *prop, ph; + const u32 *prop; + u32 ph; /* * Look for a "platform-*" function reference. If we can't find * one, then we fallback to a direct call attempt */ snprintf(fname, 63, "platform-%s", name); - prop = (u32 *)get_property(target, fname, NULL); + prop = get_property(target, fname, NULL); if (prop == NULL) goto find_it; ph = *prop; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 31a9da769fa2..824a618396ab 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -116,7 +116,7 @@ extern struct smp_ops_t core99_smp_ops; static void pmac_show_cpuinfo(struct seq_file *m) { struct device_node *np; - char *pp; + const char *pp; int plen; int mbmodel; unsigned int mbflags; @@ -134,12 +134,12 @@ static void pmac_show_cpuinfo(struct seq_file *m) seq_printf(m, "machine\t\t: "); np = of_find_node_by_path("/"); if (np != NULL) { - pp = (char *) get_property(np, "model", NULL); + pp = get_property(np, "model", NULL); if (pp != NULL) seq_printf(m, "%s\n", pp); else seq_printf(m, "PowerMac\n"); - pp = (char *) get_property(np, "compatible", &plen); + pp = get_property(np, "compatible", &plen); if (pp != NULL) { seq_printf(m, "motherboard\t:"); while (plen > 0) { @@ -163,10 +163,8 @@ static void pmac_show_cpuinfo(struct seq_file *m) if (np == NULL) np = of_find_node_by_type(NULL, "cache"); if (np != NULL) { - unsigned int *ic = (unsigned int *) - get_property(np, "i-cache-size", NULL); - unsigned int *dc = (unsigned int *) - get_property(np, "d-cache-size", NULL); + const unsigned int *ic = get_property(np, "i-cache-size", NULL); + const unsigned int *dc = get_property(np, "d-cache-size", NULL); seq_printf(m, "L2 cache\t:"); has_l2cache = 1; if (get_property(np, "cache-unified", NULL) != 0 && dc) { @@ -254,7 +252,7 @@ static void __init l2cr_init(void) if (np == 0) np = find_type_devices("cpu"); if (np != 0) { - unsigned int *l2cr = (unsigned int *) + const unsigned int *l2cr = get_property(np, "l2cr-value", NULL); if (l2cr != 0) { ppc_override_l2cr = 1; @@ -277,7 +275,7 @@ static void __init l2cr_init(void) static void __init pmac_setup_arch(void) { struct device_node *cpu, *ic; - int *fp; + const int *fp; unsigned long pvr; pvr = PVR_VER(mfspr(SPRN_PVR)); @@ -287,7 +285,7 @@ static void __init pmac_setup_arch(void) loops_per_jiffy = 50000000 / HZ; cpu = of_find_node_by_type(NULL, "cpu"); if (cpu != NULL) { - fp = (int *) get_property(cpu, "clock-frequency", NULL); + fp = get_property(cpu, "clock-frequency", NULL); if (fp != NULL) { if (pvr >= 0x30 && pvr < 0x80) /* PPC970 etc. */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 827b7121ffb8..653eeb64d1e2 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -548,7 +548,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) struct device_node *cc = NULL; struct device_node *p; const char *name = NULL; - u32 *reg; + const u32 *reg; int ok; /* Look for the clock chip */ @@ -562,7 +562,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc); if (pmac_tb_clock_chip_host == NULL) continue; - reg = (u32 *)get_property(cc, "reg", NULL); + reg = get_property(cc, "reg", NULL); if (reg == NULL) continue; switch (*reg) { @@ -707,8 +707,7 @@ static void __init smp_core99_setup(int ncpus) core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ cpu = of_find_node_by_type(NULL, "cpu"); if (cpu != NULL) { - tbprop = (u32 *)get_property(cpu, "timebase-enable", - NULL); + tbprop = get_property(cpu, "timebase-enable", NULL); if (tbprop) core99_tb_gpio = *tbprop; of_node_put(cpu); diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index 37e5b1eff911..ce1a235855f7 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -68,11 +68,11 @@ static unsigned char scc_inittab[] = { void udbg_scc_init(int force_scc) { - u32 *reg; + const u32 *reg; unsigned long addr; struct device_node *stdout = NULL, *escc = NULL, *macio = NULL; struct device_node *ch, *ch_def = NULL, *ch_a = NULL; - char *path; + const char *path; int i, x; escc = of_find_node_by_name(NULL, "escc"); @@ -81,7 +81,7 @@ void udbg_scc_init(int force_scc) macio = of_get_parent(escc); if (macio == NULL) goto bail; - path = (char *)get_property(of_chosen, "linux,stdout-path", NULL); + path = get_property(of_chosen, "linux,stdout-path", NULL); if (path != NULL) stdout = of_find_node_by_path(path); for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) { @@ -96,13 +96,13 @@ void udbg_scc_init(int force_scc) ch = ch_def ? ch_def : ch_a; /* Get address within mac-io ASIC */ - reg = (u32 *)get_property(escc, "reg", NULL); + reg = get_property(escc, "reg", NULL); if (reg == NULL) goto bail; addr = reg[0]; /* Get address of mac-io PCI itself */ - reg = (u32 *)get_property(macio, "assigned-addresses", NULL); + reg = get_property(macio, "assigned-addresses", NULL); if (reg == NULL) goto bail; addr += reg[2]; diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c index 2a0b3be7cdd0..e8a6de5a1517 100644 --- a/drivers/i2c/busses/i2c-powermac.c +++ b/drivers/i2c/busses/i2c-powermac.c @@ -209,7 +209,8 @@ static int i2c_powermac_probe(struct device *dev) struct pmac_i2c_bus *bus = dev->platform_data; struct device_node *parent = NULL; struct i2c_adapter *adapter; - char name[32], *basename; + char name[32]; + const char *basename; int rc; if (bus == NULL) diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index ebf961f1718d..fa46856e8068 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1154,7 +1154,7 @@ static int pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) { struct device_node *np = pmif->node; - int *bidp; + const int *bidp; pmif->cable_80 = 0; pmif->broken_dma = pmif->broken_dma_warn = 0; @@ -1176,14 +1176,14 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif) pmif->broken_dma = 1; } - bidp = (int *)get_property(np, "AAPL,bus-id", NULL); + bidp = get_property(np, "AAPL,bus-id", NULL); pmif->aapl_bus_id = bidp ? *bidp : 0; /* Get cable type from device-tree */ if (pmif->kind == controller_kl_ata4 || pmif->kind == controller_un_ata6 || pmif->kind == controller_k2_ata6 || pmif->kind == controller_sh_ata6) { - char* cable = get_property(np, "cable-type", NULL); + const char* cable = get_property(np, "cable-type", NULL); if (cable && !strncmp(cable, "80-", 3)) pmif->cable_80 = 1; } diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 80c0c665b5f6..7817cf286d0c 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -139,7 +139,9 @@ static int macio_uevent(struct device *dev, char **envp, int num_envp, { struct macio_dev * macio_dev; struct of_device * of; - char *scratch, *compat, *compat2; + char *scratch; + const char *compat, *compat2; + int i = 0; int length, cplen, cplen2, seen = 0; @@ -173,7 +175,7 @@ static int macio_uevent(struct device *dev, char **envp, int num_envp, * it's not really legal to split it out with commas. We split it * up using a number of environment variables instead. */ - compat = (char *) get_property(of->node, "compatible", &cplen); + compat = get_property(of->node, "compatible", &cplen); compat2 = compat; cplen2= cplen; while (compat && cplen > 0) { @@ -454,7 +456,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, struct resource *parent_res) { struct macio_dev *dev; - u32 *reg; + const u32 *reg; if (np == NULL) return NULL; @@ -489,7 +491,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, #endif MAX_NODE_NAME_SIZE, np->name); } else { - reg = (u32 *)get_property(np, "reg", NULL); + reg = get_property(np, "reg", NULL); sprintf(dev->ofdev.dev.bus_id, "%1d.%08x:%.*s", chip->lbus.index, reg ? *reg : 0, MAX_NODE_NAME_SIZE, np->name); diff --git a/drivers/macintosh/macio_sysfs.c b/drivers/macintosh/macio_sysfs.c index cae24a13526a..8566bdfdd4b8 100644 --- a/drivers/macintosh/macio_sysfs.c +++ b/drivers/macintosh/macio_sysfs.c @@ -16,12 +16,12 @@ static ssize_t compatible_show (struct device *dev, struct device_attribute *attr, char *buf) { struct of_device *of; - char *compat; + const char *compat; int cplen; int length = 0; of = &to_macio_device (dev)->ofdev; - compat = (char *) get_property(of->node, "compatible", &cplen); + compat = get_property(of->node, "compatible", &cplen); if (!compat) { *buf = '\0'; return 0; @@ -42,12 +42,12 @@ static ssize_t modalias_show (struct device *dev, struct device_attribute *attr, char *buf) { struct of_device *of; - char *compat; + const char *compat; int cplen; int length; of = &to_macio_device (dev)->ofdev; - compat = (char *) get_property (of->node, "compatible", &cplen); + compat = get_property(of->node, "compatible", &cplen); if (!compat) compat = "", cplen = 1; length = sprintf (buf, "of:N%sT%s", of->node->name, of->node->type); buf += length; diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index f139a74696fe..6f358600536e 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -447,7 +447,7 @@ EXPORT_SYMBOL(smu_present); int __init smu_init (void) { struct device_node *np; - u32 *data; + const u32 *data; np = of_find_node_by_type(NULL, "smu"); if (np == NULL) @@ -483,7 +483,7 @@ int __init smu_init (void) printk(KERN_ERR "SMU: Can't find doorbell GPIO !\n"); goto fail; } - data = (u32 *)get_property(np, "reg", NULL); + data = get_property(np, "reg", NULL); if (data == NULL) { of_node_put(np); printk(KERN_ERR "SMU: Can't find doorbell GPIO address !\n"); @@ -506,7 +506,7 @@ int __init smu_init (void) np = of_find_node_by_name(NULL, "smu-interrupt"); if (np == NULL) break; - data = (u32 *)get_property(np, "reg", NULL); + data = get_property(np, "reg", NULL); if (data == NULL) { of_node_put(np); break; @@ -959,11 +959,11 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id) /* Note: Only allowed to return error code in pointers (using ERR_PTR) * when interruptible is 1 */ -struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, - int interruptible) +const struct smu_sdbp_header *__smu_get_sdb_partition(int id, + unsigned int *size, int interruptible) { char pname[32]; - struct smu_sdbp_header *part; + const struct smu_sdbp_header *part; if (!smu) return NULL; @@ -980,8 +980,7 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, } else mutex_lock(&smu_part_access); - part = (struct smu_sdbp_header *)get_property(smu->of_node, - pname, size); + part = get_property(smu->of_node, pname, size); if (part == NULL) { DPRINTK("trying to extract from SMU ...\n"); part = smu_create_sdb_partition(id); @@ -992,7 +991,7 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, return part; } -struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) +const struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size) { return __smu_get_sdb_partition(id, size, 0); } @@ -1071,7 +1070,7 @@ static ssize_t smu_write(struct file *file, const char __user *buf, pp->mode = smu_file_events; return 0; } else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) { - struct smu_sdbp_header *part; + const struct smu_sdbp_header *part; part = __smu_get_sdb_partition(hdr.cmd, NULL, 1); if (part == NULL) return -EINVAL; diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 7f86478bdd36..a0f30d0853ea 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -47,7 +47,7 @@ static u8 FAN_SPD_SET[2] = {0x30, 0x31}; static u8 default_limits_local[3] = {70, 50, 70}; /* local, sensor1, sensor2 */ static u8 default_limits_chip[3] = {80, 65, 80}; /* local, sensor1, sensor2 */ -static char *sensor_location[3] = {NULL, NULL, NULL}; +static const char *sensor_location[3] = {NULL, NULL, NULL}; static int limit_adjust = 0; static int fan_speed = -1; @@ -553,7 +553,7 @@ static int __init thermostat_init(void) { struct device_node* np; - u32 *prop; + const u32 *prop; int i = 0, offset = 0; np = of_find_node_by_name(NULL, "fan"); @@ -566,13 +566,13 @@ thermostat_init(void) else return -ENODEV; - prop = (u32 *)get_property(np, "hwsensor-params-version", NULL); + prop = get_property(np, "hwsensor-params-version", NULL); printk(KERN_INFO "adt746x: version %d (%ssupported)\n", *prop, (*prop == 1)?"":"un"); if (*prop != 1) return -ENODEV; - prop = (u32 *)get_property(np, "reg", NULL); + prop = get_property(np, "reg", NULL); if (!prop) return -ENODEV; diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 20bf67244e2c..d00c0c37a12e 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -660,7 +660,7 @@ static int read_eeprom(int cpu, struct mpu_data *out) { struct device_node *np; char nodename[64]; - u8 *data; + const u8 *data; int len; /* prom.c routine for finding a node by path is a bit brain dead @@ -673,7 +673,7 @@ static int read_eeprom(int cpu, struct mpu_data *out) printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid node from device-tree\n"); return -ENODEV; } - data = (u8 *)get_property(np, "cpuid", &len); + data = get_property(np, "cpuid", &len); if (data == NULL) { printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid property from device-tree\n"); of_node_put(np); @@ -1336,7 +1336,7 @@ static int init_backside_state(struct backside_pid_state *state) */ u3 = of_find_node_by_path("/u3@0,f8000000"); if (u3 != NULL) { - u32 *vers = (u32 *)get_property(u3, "device-rev", NULL); + const u32 *vers = get_property(u3, "device-rev", NULL); if (vers) if (((*vers) & 0x3f) < 0x34) u3h = 0; @@ -2111,8 +2111,8 @@ static void fcu_lookup_fans(struct device_node *fcu_node) while ((np = of_get_next_child(fcu_node, np)) != NULL) { int type = -1; - char *loc; - u32 *reg; + const char *loc; + const u32 *reg; DBG(" control: %s, type: %s\n", np->name, np->type); @@ -2128,8 +2128,8 @@ static void fcu_lookup_fans(struct device_node *fcu_node) continue; /* Lookup for a matching location */ - loc = (char *)get_property(np, "location", NULL); - reg = (u32 *)get_property(np, "reg", NULL); + loc = get_property(np, "location", NULL); + reg = get_property(np, "reg", NULL); if (loc == NULL || reg == NULL) continue; DBG(" matching location: %s, reg: 0x%08x\n", loc, *reg); diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index c7d1c290cb0c..738faab1b22c 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -484,14 +484,14 @@ struct apple_thermal_info { static int __init g4fan_init( void ) { - struct apple_thermal_info *info; + const struct apple_thermal_info *info; struct device_node *np; init_MUTEX( &x.lock ); if( !(np=of_find_node_by_name(NULL, "power-mgt")) ) return -ENODEV; - info = (struct apple_thermal_info*)get_property(np, "thermal-info", NULL); + info = get_property(np, "thermal-info", NULL); of_node_put(np); if( !info || !machine_is_compatible("PowerMac3,6") ) diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index 69d5452fd22f..7512d1c15207 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -123,7 +123,7 @@ int __init find_via_cuda(void) { struct adb_request req; phys_addr_t taddr; - u32 *reg; + const u32 *reg; int err; if (vias != 0) @@ -132,7 +132,7 @@ int __init find_via_cuda(void) if (vias == 0) return 0; - reg = (u32 *)get_property(vias, "reg", NULL); + reg = get_property(vias, "reg", NULL); if (reg == NULL) { printk(KERN_ERR "via-cuda: No \"reg\" property !\n"); goto fail; diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c index 5189d5454b1f..179af10105d9 100644 --- a/drivers/macintosh/via-pmu-led.c +++ b/drivers/macintosh/via-pmu-led.c @@ -120,7 +120,7 @@ static int __init via_pmu_led_init(void) dt = of_find_node_by_path("/"); if (dt == NULL) return -ENODEV; - model = (const char *)get_property(dt, "model", NULL); + model = get_property(dt, "model", NULL); if (model == NULL) return -ENODEV; if (strncmp(model, "PowerBook", strlen("PowerBook")) != 0 && diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 06ca80bfd6b9..80e88b4a1d07 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -287,7 +287,7 @@ static char *pbook_type[] = { int __init find_via_pmu(void) { u64 taddr; - u32 *reg; + const u32 *reg; if (via != 0) return 1; @@ -295,7 +295,7 @@ int __init find_via_pmu(void) if (vias == NULL) return 0; - reg = (u32 *)get_property(vias, "reg", NULL); + reg = get_property(vias, "reg", NULL); if (reg == NULL) { printk(KERN_ERR "via-pmu: No \"reg\" property !\n"); goto fail; @@ -337,7 +337,7 @@ int __init find_via_pmu(void) gpiop = of_find_node_by_name(NULL, "gpio"); if (gpiop) { - reg = (u32 *)get_property(gpiop, "reg", NULL); + reg = get_property(gpiop, "reg", NULL); if (reg) gaddr = of_translate_address(gpiop, reg); if (gaddr != OF_BAD_ADDR) @@ -486,9 +486,9 @@ static int __init via_pmu_dev_init(void) pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART; } else { struct device_node* prim = find_devices("power-mgt"); - u32 *prim_info = NULL; + const u32 *prim_info = NULL; if (prim) - prim_info = (u32 *)get_property(prim, "prim-info", NULL); + prim_info = get_property(prim, "prim-info", NULL); if (prim_info) { /* Other stuffs here yet unknown */ pmu_battery_count = (prim_info[6] >> 16) & 0xff; diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c index f1df6efcbe68..2ff546e4c92f 100644 --- a/drivers/macintosh/windfarm_pm81.c +++ b/drivers/macintosh/windfarm_pm81.c @@ -396,7 +396,7 @@ static void wf_smu_sys_fans_tick(struct wf_smu_sys_fans_state *st) static void wf_smu_create_cpu_fans(void) { struct wf_cpu_pid_param pid_param; - struct smu_sdbp_header *hdr; + const struct smu_sdbp_header *hdr; struct smu_sdbp_cpupiddata *piddata; struct smu_sdbp_fvt *fvt; s32 tmax, tdelta, maxpow, powadj; @@ -702,7 +702,7 @@ static struct notifier_block wf_smu_events = { static int wf_init_pm(void) { - struct smu_sdbp_header *hdr; + const struct smu_sdbp_header *hdr; hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); if (hdr != 0) { diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c index 0d6372e96d32..59e9ffe37c39 100644 --- a/drivers/macintosh/windfarm_pm91.c +++ b/drivers/macintosh/windfarm_pm91.c @@ -144,7 +144,7 @@ static struct wf_smu_slots_fans_state *wf_smu_slots_fans; static void wf_smu_create_cpu_fans(void) { struct wf_cpu_pid_param pid_param; - struct smu_sdbp_header *hdr; + const struct smu_sdbp_header *hdr; struct smu_sdbp_cpupiddata *piddata; struct smu_sdbp_fvt *fvt; s32 tmax, tdelta, maxpow, powadj; diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index a9e88edc0c72..bff1f372f188 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -159,14 +159,15 @@ static struct smu_fan_control *smu_fan_create(struct device_node *node, int pwm_fan) { struct smu_fan_control *fct; - s32 *v; u32 *reg; - char *l; + const s32 *v; + const u32 *reg; + const char *l; fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL); if (fct == NULL) return NULL; fct->ctrl.ops = &smu_fan_ops; - l = (char *)get_property(node, "location", NULL); + l = get_property(node, "location", NULL); if (l == NULL) goto fail; @@ -223,17 +224,17 @@ static struct smu_fan_control *smu_fan_create(struct device_node *node, goto fail; /* Get min & max values*/ - v = (s32 *)get_property(node, "min-value", NULL); + v = get_property(node, "min-value", NULL); if (v == NULL) goto fail; fct->min = *v; - v = (s32 *)get_property(node, "max-value", NULL); + v = get_property(node, "max-value", NULL); if (v == NULL) goto fail; fct->max = *v; /* Get "reg" value */ - reg = (u32 *)get_property(node, "reg", NULL); + reg = get_property(node, "reg", NULL); if (reg == NULL) goto fail; fct->reg = *reg; diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index e295a07a1ebc..aceb61d9fbc8 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -233,15 +233,15 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev) { struct wf_sat *sat; struct wf_sat_sensor *sens; - u32 *reg; - char *loc, *type; + const u32 *reg; + const char *loc, *type; u8 addr, chip, core; struct device_node *child; int shift, cpu, index; char *name; int vsens[2], isens[2]; - reg = (u32 *) get_property(dev, "reg", NULL); + reg = get_property(dev, "reg", NULL); if (reg == NULL) return; addr = *reg; @@ -268,7 +268,7 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev) isens[0] = isens[1] = -1; child = NULL; while ((child = of_get_next_child(dev, child)) != NULL) { - reg = (u32 *) get_property(child, "reg", NULL); + reg = get_property(child, "reg", NULL); type = get_property(child, "device_type", NULL); loc = get_property(child, "location", NULL); if (reg == NULL || loc == NULL) diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c index bed25dcf8a1e..defe9922ebd1 100644 --- a/drivers/macintosh/windfarm_smu_sensors.c +++ b/drivers/macintosh/windfarm_smu_sensors.c @@ -198,14 +198,14 @@ static struct wf_sensor_ops smu_slotspow_ops = { static struct smu_ad_sensor *smu_ads_create(struct device_node *node) { struct smu_ad_sensor *ads; - char *c, *l; - u32 *v; + const char *c, *l; + const u32 *v; ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL); if (ads == NULL) return NULL; - c = (char *)get_property(node, "device_type", NULL); - l = (char *)get_property(node, "location", NULL); + c = get_property(node, "device_type", NULL); + l = get_property(node, "location", NULL); if (c == NULL || l == NULL) goto fail; @@ -255,7 +255,7 @@ static struct smu_ad_sensor *smu_ads_create(struct device_node *node) } else goto fail; - v = (u32 *)get_property(node, "reg", NULL); + v = get_property(node, "reg", NULL); if (v == NULL) goto fail; ads->reg = *v; @@ -382,7 +382,7 @@ smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps) static void smu_fetch_param_partitions(void) { - struct smu_sdbp_header *hdr; + const struct smu_sdbp_header *hdr; /* Get CPU voltage/current/power calibration data */ hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL); diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c index bfd2a22759eb..a3b99caf80e6 100644 --- a/drivers/serial/pmac_zilog.c +++ b/drivers/serial/pmac_zilog.c @@ -1400,8 +1400,8 @@ static struct uart_ops pmz_pops = { static int __init pmz_init_port(struct uart_pmac_port *uap) { struct device_node *np = uap->node; - char *conn; - struct slot_names_prop { + const char *conn; + const struct slot_names_prop { int count; char name[1]; } *slots; @@ -1458,7 +1458,7 @@ no_dma: uap->flags |= PMACZILOG_FLAG_IS_IRDA; uap->port_type = PMAC_SCC_ASYNC; /* 1999 Powerbook G3 has slot-names property instead */ - slots = (struct slot_names_prop *)get_property(np, "slot-names", &len); + slots = get_property(np, "slot-names", &len); if (slots && slots->count > 0) { if (strcmp(slots->name, "IrDA") == 0) uap->flags |= PMACZILOG_FLAG_IS_IRDA; @@ -1470,7 +1470,8 @@ no_dma: if (ZS_IS_INTMODEM(uap)) { struct device_node* i2c_modem = find_devices("i2c-modem"); if (i2c_modem) { - char* mid = get_property(i2c_modem, "modem-id", NULL); + const char* mid = + get_property(i2c_modem, "modem-id", NULL); if (mid) switch(*mid) { case 0x04 : case 0x05 : diff --git a/include/asm-powerpc/smu.h b/include/asm-powerpc/smu.h index 51e65fc46a03..e49f644ca63a 100644 --- a/include/asm-powerpc/smu.h +++ b/include/asm-powerpc/smu.h @@ -517,7 +517,7 @@ struct smu_sdbp_cpupiddata { * This returns the pointer to an SMU "sdb" partition data or NULL * if not found. The data format is described below */ -extern struct smu_sdbp_header *smu_get_sdb_partition(int id, +extern const struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size); /* Get "sdb" partition data from an SMU satellite */ From b04e3dd4ab4c7763a4ca8f751caaf69ce8dabbba Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:40:40 +1000 Subject: [PATCH 0052/1063] [POWERPC] video & agp: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powerpc-specific video & agp driver changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- drivers/char/agp/uninorth-agp.c | 4 ++-- drivers/video/S3triofb.c | 12 ++++++------ drivers/video/aty/radeon_base.c | 8 ++++---- drivers/video/aty/radeon_monitor.c | 12 ++++++------ drivers/video/aty/radeon_pm.c | 4 ++-- drivers/video/nvidia/nv_of.c | 12 +++++------- drivers/video/offb.c | 22 +++++++++++----------- drivers/video/riva/fbdev.c | 5 ++--- 8 files changed, 38 insertions(+), 41 deletions(-) diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c index 1de1b12043bf..91b71e750ee1 100644 --- a/drivers/char/agp/uninorth-agp.c +++ b/drivers/char/agp/uninorth-agp.c @@ -601,8 +601,8 @@ static int __devinit agp_uninorth_probe(struct pci_dev *pdev, uninorth_node = of_find_node_by_name(NULL, "u3"); } if (uninorth_node) { - int *revprop = (int *) - get_property(uninorth_node, "device-rev", NULL); + const int *revprop = get_property(uninorth_node, + "device-rev", NULL); if (revprop != NULL) uninorth_rev = *revprop & 0x3f; of_node_put(uninorth_node); diff --git a/drivers/video/S3triofb.c b/drivers/video/S3triofb.c index e714e8449c1d..0f2ed75a681f 100644 --- a/drivers/video/S3triofb.c +++ b/drivers/video/S3triofb.c @@ -350,30 +350,30 @@ static void __init s3triofb_of_init(struct device_node *dp) s3trio_name[sizeof(s3trio_name)-1] = '\0'; strcpy(fb_fix.id, s3trio_name); - if((pp = (int *)get_property(dp, "vendor-id", &len)) != NULL + if((pp = get_property(dp, "vendor-id", &len)) != NULL && *pp!=PCI_VENDOR_ID_S3) { printk("%s: can't find S3 Trio board\n", dp->full_name); return; } - if((pp = (int *)get_property(dp, "device-id", &len)) != NULL + if((pp = get_property(dp, "device-id", &len)) != NULL && *pp!=PCI_DEVICE_ID_S3_TRIO) { printk("%s: can't find S3 Trio board\n", dp->full_name); return; } - if ((pp = (int *)get_property(dp, "depth", &len)) != NULL + if ((pp = get_property(dp, "depth", &len)) != NULL && len == sizeof(int) && *pp != 8) { printk("%s: can't use depth = %d\n", dp->full_name, *pp); return; } - if ((pp = (int *)get_property(dp, "width", &len)) != NULL + if ((pp = get_property(dp, "width", &len)) != NULL && len == sizeof(int)) fb_var.xres = fb_var.xres_virtual = *pp; - if ((pp = (int *)get_property(dp, "height", &len)) != NULL + if ((pp = get_property(dp, "height", &len)) != NULL && len == sizeof(int)) fb_var.yres = fb_var.yres_virtual = *pp; - if ((pp = (int *)get_property(dp, "linebytes", &len)) != NULL + if ((pp = get_property(dp, "linebytes", &len)) != NULL && len == sizeof(int)) fb_fix.line_length = *pp; else diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 51b78f8de949..60c37add2579 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -412,11 +412,11 @@ static int __devinit radeon_find_mem_vbios(struct radeonfb_info *rinfo) static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo) { struct device_node *dp = rinfo->of_node; - u32 *val; + const u32 *val; if (dp == NULL) return -ENODEV; - val = (u32 *) get_property(dp, "ATY,RefCLK", NULL); + val = get_property(dp, "ATY,RefCLK", NULL); if (!val || !*val) { printk(KERN_WARNING "radeonfb: No ATY,RefCLK property !\n"); return -EINVAL; @@ -424,11 +424,11 @@ static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo) rinfo->pll.ref_clk = (*val) / 10; - val = (u32 *) get_property(dp, "ATY,SCLK", NULL); + val = get_property(dp, "ATY,SCLK", NULL); if (val && *val) rinfo->pll.sclk = (*val) / 10; - val = (u32 *) get_property(dp, "ATY,MCLK", NULL); + val = get_property(dp, "ATY,MCLK", NULL); if (val && *val) rinfo->pll.mclk = (*val) / 10; diff --git a/drivers/video/aty/radeon_monitor.c b/drivers/video/aty/radeon_monitor.c index 98c05bc0de44..ea531a6f45d1 100644 --- a/drivers/video/aty/radeon_monitor.c +++ b/drivers/video/aty/radeon_monitor.c @@ -64,13 +64,13 @@ static int __devinit radeon_parse_montype_prop(struct device_node *dp, u8 **out_ { static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID2", NULL }; - u8 *pedid = NULL; - u8 *pmt = NULL; + const u8 *pedid = NULL; + const u8 *pmt = NULL; u8 *tmp; int i, mt = MT_NONE; RTRACE("analyzing OF properties...\n"); - pmt = (u8 *)get_property(dp, "display-type", NULL); + pmt = get_property(dp, "display-type", NULL); if (!pmt) return MT_NONE; RTRACE("display-type: %s\n", pmt); @@ -89,7 +89,7 @@ static int __devinit radeon_parse_montype_prop(struct device_node *dp, u8 **out_ } for (i = 0; propnames[i] != NULL; ++i) { - pedid = (u8 *)get_property(dp, propnames[i], NULL); + pedid = get_property(dp, propnames[i], NULL); if (pedid != NULL) break; } @@ -124,14 +124,14 @@ static int __devinit radeon_probe_OF_head(struct radeonfb_info *rinfo, int head_ return MT_NONE; if (rinfo->has_CRTC2) { - char *pname; + const char *pname; int len, second = 0; dp = dp->child; do { if (!dp) return MT_NONE; - pname = (char *)get_property(dp, "name", NULL); + pname = get_property(dp, "name", NULL); if (!pname) return MT_NONE; len = strlen(pname); diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index c7091761cef4..b9b9396d3bde 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -1167,7 +1167,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo) 0x21320032, 0xa1320032, 0x21320032, 0xffffffff, 0x31320032 }; - u32 *mrtable = default_mrtable; + const u32 *mrtable = default_mrtable; int i, mrtable_size = ARRAY_SIZE(default_mrtable); mdelay(30); @@ -1186,7 +1186,7 @@ static void radeon_pm_full_reset_sdram(struct radeonfb_info *rinfo) if (rinfo->of_node != NULL) { int size; - mrtable = (u32 *)get_property(rinfo->of_node, "ATY,MRT", &size); + mrtable = get_property(rinfo->of_node, "ATY,MRT", &size); if (mrtable) mrtable_size = size >> 2; else diff --git a/drivers/video/nvidia/nv_of.c b/drivers/video/nvidia/nv_of.c index 8209106e26ee..d9af88c2b580 100644 --- a/drivers/video/nvidia/nv_of.c +++ b/drivers/video/nvidia/nv_of.c @@ -32,7 +32,7 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid) { struct nvidia_par *par = info->par; struct device_node *parent, *dp; - unsigned char *pedid = NULL; + const unsigned char *pedid = NULL; static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL }; @@ -42,20 +42,19 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid) if (parent == NULL) return -1; if (par->twoHeads) { - char *pname; + const char *pname; int len; for (dp = NULL; (dp = of_get_next_child(parent, dp)) != NULL;) { - pname = (char *)get_property(dp, "name", NULL); + pname = get_property(dp, "name", NULL); if (!pname) continue; len = strlen(pname); if ((pname[len-1] == 'A' && conn == 1) || (pname[len-1] == 'B' && conn == 2)) { for (i = 0; propnames[i] != NULL; ++i) { - pedid = (unsigned char *) - get_property(dp, propnames[i], + pedid = get_property(dp, propnames[i], NULL); if (pedid != NULL) break; @@ -67,8 +66,7 @@ int nvidia_probe_of_connector(struct fb_info *info, int conn, u8 **out_edid) } if (pedid == NULL) { for (i = 0; propnames[i] != NULL; ++i) { - pedid = (unsigned char *) - get_property(parent, propnames[i], NULL); + pedid = get_property(parent, propnames[i], NULL); if (pedid != NULL) break; } diff --git a/drivers/video/offb.c b/drivers/video/offb.c index faba67228526..0e750a8510fc 100644 --- a/drivers/video/offb.c +++ b/drivers/video/offb.c @@ -410,30 +410,30 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) unsigned int flags, rsize, addr_prop = 0; unsigned long max_size = 0; u64 rstart, address = OF_BAD_ADDR; - u32 *pp, *addrp, *up; + const u32 *pp, *addrp, *up; u64 asize; - pp = (u32 *)get_property(dp, "linux,bootx-depth", &len); + pp = get_property(dp, "linux,bootx-depth", &len); if (pp == NULL) - pp = (u32 *)get_property(dp, "depth", &len); + pp = get_property(dp, "depth", &len); if (pp && len == sizeof(u32)) depth = *pp; - pp = (u32 *)get_property(dp, "linux,bootx-width", &len); + pp = get_property(dp, "linux,bootx-width", &len); if (pp == NULL) - pp = (u32 *)get_property(dp, "width", &len); + pp = get_property(dp, "width", &len); if (pp && len == sizeof(u32)) width = *pp; - pp = (u32 *)get_property(dp, "linux,bootx-height", &len); + pp = get_property(dp, "linux,bootx-height", &len); if (pp == NULL) - pp = (u32 *)get_property(dp, "height", &len); + pp = get_property(dp, "height", &len); if (pp && len == sizeof(u32)) height = *pp; - pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len); + pp = get_property(dp, "linux,bootx-linebytes", &len); if (pp == NULL) - pp = (u32 *)get_property(dp, "linebytes", &len); + pp = get_property(dp, "linebytes", &len); if (pp && len == sizeof(u32)) pitch = *pp; else @@ -451,9 +451,9 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) * ranges and pick one that is both big enough and if possible encloses * the "address" property. If none match, we pick the biggest */ - up = (u32 *)get_property(dp, "linux,bootx-addr", &len); + up = get_property(dp, "linux,bootx-addr", &len); if (up == NULL) - up = (u32 *)get_property(dp, "address", &len); + up = get_property(dp, "address", &len); if (up && len == sizeof(u32)) addr_prop = *up; diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c index 2788655e6e7d..6a30c0ca1de0 100644 --- a/drivers/video/riva/fbdev.c +++ b/drivers/video/riva/fbdev.c @@ -1816,14 +1816,13 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd) NVTRACE_ENTER(); dp = pci_device_to_OF_node(pd); for (; dp != NULL; dp = dp->child) { - disptype = (unsigned char *)get_property(dp, "display-type", NULL); + disptype = get_property(dp, "display-type", NULL); if (disptype == NULL) continue; if (strncmp(disptype, "LCD", 3) != 0) continue; for (i = 0; propnames[i] != NULL; ++i) { - pedid = (unsigned char *) - get_property(dp, propnames[i], NULL); + pedid = get_property(dp, propnames[i], NULL); if (pedid != NULL) { par->EDID = pedid; NVTRACE("LCD found.\n"); From 294ef16a2ee34d0d94aa63616f7552d3bc66c982 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:40:51 +1000 Subject: [PATCH 0053/1063] [POWERPC] scsi: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powerpc-specific scsi driver changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- drivers/scsi/ibmvscsi/rpa_vscsi.c | 11 ++++------- drivers/scsi/mac53c94.c | 2 +- drivers/scsi/mesh.c | 5 +++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c index 242b8873b333..cafef9cbbe2e 100644 --- a/drivers/scsi/ibmvscsi/rpa_vscsi.c +++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c @@ -156,8 +156,8 @@ static void gather_partition_info(void) { struct device_node *rootdn; - char *ppartition_name; - unsigned int *p_number_ptr; + const char *ppartition_name; + const unsigned int *p_number_ptr; /* Retrieve information about this partition */ rootdn = find_path_device("/"); @@ -165,14 +165,11 @@ static void gather_partition_info(void) return; } - ppartition_name = - get_property(rootdn, "ibm,partition-name", NULL); + ppartition_name = get_property(rootdn, "ibm,partition-name", NULL); if (ppartition_name) strncpy(partition_name, ppartition_name, sizeof(partition_name)); - p_number_ptr = - (unsigned int *)get_property(rootdn, "ibm,partition-no", - NULL); + p_number_ptr = get_property(rootdn, "ibm,partition-no", NULL); if (p_number_ptr) partition_number = *p_number_ptr; } diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c index 93edaa8696cf..c77f6f2581f7 100644 --- a/drivers/scsi/mac53c94.c +++ b/drivers/scsi/mac53c94.c @@ -431,7 +431,7 @@ static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *mat struct fsc_state *state; struct Scsi_Host *host; void *dma_cmd_space; - unsigned char *clkprop; + const unsigned char *clkprop; int proplen, rc = -ENODEV; if (macio_resource_count(mdev) != 2 || macio_irq_count(mdev) != 2) { diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index c88717727be8..cee9758b9278 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1850,7 +1850,8 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) { struct device_node *mesh = macio_get_of_node(mdev); struct pci_dev* pdev = macio_get_pci_dev(mdev); - int tgt, *cfp, minper; + int tgt, minper; + const int *cfp; struct mesh_state *ms; struct Scsi_Host *mesh_host; void *dma_cmd_space; @@ -1939,7 +1940,7 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) ms->tgts[tgt].current_req = NULL; } - if ((cfp = (int *) get_property(mesh, "clock-frequency", NULL))) + if ((cfp = get_property(mesh, "clock-frequency", NULL))) ms->clk_freq = *cfp; else { printk(KERN_INFO "mesh: assuming 50MHz clock frequency\n"); From 1a2509c946bfd4d4a4c5a6e816082d3a7de45db8 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:41:03 +1000 Subject: [PATCH 0054/1063] [POWERPC] netdevices: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powerpc-specific network device driver changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- drivers/net/bmac.c | 13 ++++++++----- drivers/net/mace.c | 2 +- drivers/net/spider_net.c | 12 ++++++------ drivers/net/sungem.c | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c index 6fad83f24c4f..711609665632 100644 --- a/drivers/net/bmac.c +++ b/drivers/net/bmac.c @@ -1264,7 +1264,8 @@ static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_device_i { int j, rev, ret; struct bmac_data *bp; - unsigned char *addr; + const unsigned char *prop_addr; + unsigned char addr[6]; struct net_device *dev; int is_bmac_plus = ((int)match->data) != 0; @@ -1272,14 +1273,16 @@ static int __devinit bmac_probe(struct macio_dev *mdev, const struct of_device_i printk(KERN_ERR "BMAC: can't use, need 3 addrs and 3 intrs\n"); return -ENODEV; } - addr = get_property(macio_get_of_node(mdev), "mac-address", NULL); - if (addr == NULL) { - addr = get_property(macio_get_of_node(mdev), "local-mac-address", NULL); - if (addr == NULL) { + prop_addr = get_property(macio_get_of_node(mdev), "mac-address", NULL); + if (prop_addr == NULL) { + prop_addr = get_property(macio_get_of_node(mdev), + "local-mac-address", NULL); + if (prop_addr == NULL) { printk(KERN_ERR "BMAC: Can't get mac-address\n"); return -ENODEV; } } + memcpy(addr, prop_addr, sizeof(addr)); dev = alloc_etherdev(PRIV_BYTES); if (!dev) { diff --git a/drivers/net/mace.c b/drivers/net/mace.c index 29e4b5aa6ead..5d80e0e6a8e9 100644 --- a/drivers/net/mace.c +++ b/drivers/net/mace.c @@ -113,7 +113,7 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i struct device_node *mace = macio_get_of_node(mdev); struct net_device *dev; struct mace_data *mp; - unsigned char *addr; + const unsigned char *addr; int j, rev, rc = -EBUSY; if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) { diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index fb1d5a8a45cf..b30290d53f79 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -1812,10 +1812,10 @@ spider_net_setup_phy(struct spider_net_card *card) */ static int spider_net_download_firmware(struct spider_net_card *card, - u8 *firmware_ptr) + const void *firmware_ptr) { int sequencer, i; - u32 *fw_ptr = (u32 *)firmware_ptr; + const u32 *fw_ptr = firmware_ptr; /* stop sequencers */ spider_net_write_reg(card, SPIDER_NET_GSINIT, @@ -1872,7 +1872,7 @@ spider_net_init_firmware(struct spider_net_card *card) { struct firmware *firmware = NULL; struct device_node *dn; - u8 *fw_prop = NULL; + const u8 *fw_prop = NULL; int err = -ENOENT; int fw_size; @@ -1898,7 +1898,7 @@ try_host_fw: if (!dn) goto out_err; - fw_prop = (u8 *)get_property(dn, "firmware", &fw_size); + fw_prop = get_property(dn, "firmware", &fw_size); if (!fw_prop) goto out_err; @@ -2058,7 +2058,7 @@ spider_net_setup_netdev(struct spider_net_card *card) struct net_device *netdev = card->netdev; struct device_node *dn; struct sockaddr addr; - u8 *mac; + const u8 *mac; SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &card->pdev->dev); @@ -2089,7 +2089,7 @@ spider_net_setup_netdev(struct spider_net_card *card) if (!dn) return -EIO; - mac = (u8 *)get_property(dn, "local-mac-address", NULL); + mac = get_property(dn, "local-mac-address", NULL); if (!mac) return -EIO; memcpy(addr.sa_data, mac, ETH_ALEN); diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index b70bbd748978..d7b1d1882cab 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -2896,7 +2896,7 @@ static int __devinit gem_get_device_address(struct gem *gp) if (use_idprom) memcpy(dev->dev_addr, idprom->id_ethaddr, 6); #elif defined(CONFIG_PPC_PMAC) - unsigned char *addr; + const unsigned char *addr; addr = get_property(gp->of_node, "local-mac-address", NULL); if (addr == NULL) { From abddd185a0195988b8a5e802d55aff91783489de Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:41:18 +1000 Subject: [PATCH 0055/1063] [POWERPC] sound: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. powerpc-specific sound driver changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- sound/aoa/core/snd-aoa-gpio-feature.c | 2 +- sound/oss/dmasound/dmasound_awacs.c | 11 ++++++----- sound/ppc/tumbler.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c index 7ae0c0bdfad8..2ab55330b31c 100644 --- a/sound/aoa/core/snd-aoa-gpio-feature.c +++ b/sound/aoa/core/snd-aoa-gpio-feature.c @@ -56,7 +56,7 @@ static struct device_node *get_gpio(char *name, { struct device_node *np, *gpio; u32 *reg; - char *audio_gpio; + const char *audio_gpio; *gpioptr = -1; diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c index 4359903f4376..9ae659f82430 100644 --- a/sound/oss/dmasound/dmasound_awacs.c +++ b/sound/oss/dmasound/dmasound_awacs.c @@ -347,8 +347,8 @@ int setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* gpio_pol) { struct device_node *np; - u32* pp; - + const u32* pp; + np = find_devices("gpio"); if (!np) return -ENODEV; @@ -356,7 +356,8 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* np = np->child; while(np != 0) { if (name) { - char *property = get_property(np,"audio-gpio",NULL); + const char *property = + get_property(np,"audio-gpio",NULL); if (property != 0 && strcmp(property,name) == 0) break; } else if (compatible && device_is_compatible(np, compatible)) @@ -365,11 +366,11 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* } if (!np) return -ENODEV; - pp = (u32 *)get_property(np, "AAPL,address", NULL); + pp = get_property(np, "AAPL,address", NULL); if (!pp) return -ENODEV; *gpio_addr = (*pp) & 0x0000ffff; - pp = (u32 *)get_property(np, "audio-gpio-active-state", NULL); + pp = get_property(np, "audio-gpio-active-state", NULL); if (pp) *gpio_pol = *pp; else diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c index 692c61177678..b94ecd0ebab2 100644 --- a/sound/ppc/tumbler.c +++ b/sound/ppc/tumbler.c @@ -1035,7 +1035,7 @@ static struct device_node *find_audio_device(const char *name) return NULL; for (np = np->child; np; np = np->sibling) { - char *property = get_property(np, "audio-gpio", NULL); + const char *property = get_property(np, "audio-gpio", NULL); if (property && strcmp(property, name) == 0) return np; } @@ -1062,7 +1062,8 @@ static long tumbler_find_device(const char *device, const char *platform, struct pmac_gpio *gp, int is_compatible) { struct device_node *node; - u32 *base, addr; + const u32 *base; + u32 addr; if (is_compatible) node = find_compatible_audio_device(device); @@ -1074,9 +1075,9 @@ static long tumbler_find_device(const char *device, const char *platform, return -ENODEV; } - base = (u32 *)get_property(node, "AAPL,address", NULL); + base = get_property(node, "AAPL,address", NULL); if (! base) { - base = (u32 *)get_property(node, "reg", NULL); + base = get_property(node, "reg", NULL); if (!base) { DBG("(E) cannot find address for device %s !\n", device); snd_printd("cannot find address for device %s\n", device); @@ -1090,13 +1091,13 @@ static long tumbler_find_device(const char *device, const char *platform, gp->addr = addr & 0x0000ffff; /* Try to find the active state, default to 0 ! */ - base = (u32 *)get_property(node, "audio-gpio-active-state", NULL); + base = get_property(node, "audio-gpio-active-state", NULL); if (base) { gp->active_state = *base; gp->active_val = (*base) ? 0x5 : 0x4; gp->inactive_val = (*base) ? 0x4 : 0x5; } else { - u32 *prop = NULL; + const u32 *prop = NULL; gp->active_state = 0; gp->active_val = 0x4; gp->inactive_val = 0x5; @@ -1105,7 +1106,7 @@ static long tumbler_find_device(const char *device, const char *platform, * as we don't yet have an interpreter for these things */ if (platform) - prop = (u32 *)get_property(node, platform, NULL); + prop = get_property(node, platform, NULL); if (prop) { if (prop[3] == 0x9 && prop[4] == 0x9) { gp->active_val = 0xd; From 5c339e96a391476ebb7cc63d913445c8cee092ff Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:41:30 +1000 Subject: [PATCH 0056/1063] [POWERPC] tmp_atmel: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. tpm_atmel changes Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- drivers/char/tpm/tpm_atmel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_atmel.h b/drivers/char/tpm/tpm_atmel.h index 2e68eeb8a2cd..aefd683c60b7 100644 --- a/drivers/char/tpm/tpm_atmel.h +++ b/drivers/char/tpm/tpm_atmel.h @@ -37,7 +37,7 @@ static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size) { struct device_node *dn; unsigned long address, size; - unsigned int *reg; + const unsigned int *reg; int reglen; int naddrc; int nsizec; @@ -52,7 +52,7 @@ static void __iomem * atmel_get_base_addr(unsigned long *base, int *region_size) return NULL; } - reg = (unsigned int *) get_property(dn, "reg", ®len); + reg = get_property(dn, "reg", ®len); naddrc = prom_n_addr_cells(dn); nsizec = prom_n_size_cells(dn); From af5f92d881d783b47d1f993ddffa2bce8b2993fe Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:41:41 +1000 Subject: [PATCH 0057/1063] [POWERPC] sata_svw: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. sata_svw changes Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- drivers/scsi/sata_svw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 7d0858095e1f..6b70c3c76dfd 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -268,7 +268,7 @@ static int k2_sata_proc_info(struct Scsi_Host *shost, char *page, char **start, /* Match it to a port node */ index = (ap == ap->host_set->ports[0]) ? 0 : 1; for (np = np->child; np != NULL; np = np->sibling) { - u32 *reg = (u32 *)get_property(np, "reg", NULL); + const u32 *reg = get_property(np, "reg", NULL); if (!reg) continue; if (index == *reg) From 88c805940bb9a1478f06ed6dd5d6f660bdc38eaa Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:41:52 +1000 Subject: [PATCH 0058/1063] [POWERPC] tsi108: Constify & voidify get_property() Now that get_property() returns a void *, there's no need to cast its return value. Also, treat the return value as const, so we can constify get_property later. tsi108 driver changes. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/tsi108_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 3265d54c82ed..f6c492f8ab95 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -195,7 +195,7 @@ int __init tsi108_setup_pci(struct device_node *dev) int len; struct pci_controller *hose; struct resource rsrc; - int *bus_range; + const int *bus_range; int primary = 0, has_address = 0; /* PCI Config mapping */ @@ -208,7 +208,7 @@ int __init tsi108_setup_pci(struct device_node *dev) has_address = (of_address_to_resource(dev, 0, &rsrc) == 0); /* Get bus range if any */ - bus_range = (int *)get_property(dev, "bus-range", &len); + bus_range = get_property(dev, "bus-range", &len); if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING "Can't get bus-range for %s, assume" " bus 0\n", dev->full_name); From 931b261f442e779b0656d9b04c7ffe4939ef8c0a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 12 Jul 2006 15:42:06 +1000 Subject: [PATCH 0059/1063] [POWERPC] Make get_property() return a const void * Previous changes have treated the return values of get_property as const, so now we can make the actual change to get_property(). There shouldn't be a need to cast the return values anymore. We will now get compiler warnings when property values are assigned to a non-const variable. If properties need to be updated, there's still the of_find_property function. Built for cell_defconfig, chrp32_defconfig, g5_defconfig, iseries_defconfig, maple_defconfig, pmac32_defconfig, ppc64_defconfig and pseries_defconfig. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom.c | 2 +- include/asm-powerpc/prom.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2a3d84a39cb5..bf2005b2feb6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1527,7 +1527,7 @@ struct property *of_find_property(struct device_node *np, const char *name, * Find a property with a given name for a given node * and return the value. */ -void *get_property(struct device_node *np, const char *name, int *lenp) +const void *get_property(struct device_node *np, const char *name, int *lenp) { struct property *pp = of_find_property(np,name,lenp); return pp ? pp->value : NULL; diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index abdf1be66e97..31bfea4686a6 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -160,7 +160,7 @@ extern void unflatten_device_tree(void); extern void early_init_devtree(void *); extern int device_is_compatible(struct device_node *device, const char *); extern int machine_is_compatible(const char *compat); -extern void *get_property(struct device_node *node, const char *name, +extern const void *get_property(struct device_node *node, const char *name, int *lenp); extern void print_properties(struct device_node *node); extern int prom_n_addr_cells(struct device_node* np); From 5d33eebee83784f5f03bc3861fa92ee5cd831922 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 13 Jul 2006 16:32:52 +1000 Subject: [PATCH 0060/1063] [POWERPC] Simplify dma_ops bug conditions Use BUG_ON rather than BUG to simplify the dma_ops handing, and remove the now-unnecessary return cases. Booted on pseries. Signed-off-by: Jeremy Kerr Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/dma_64.c | 65 ++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c index 36aaa7663f02..6c168f6ea142 100644 --- a/arch/powerpc/kernel/dma_64.c +++ b/arch/powerpc/kernel/dma_64.c @@ -35,10 +35,9 @@ int dma_supported(struct device *dev, u64 mask) { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - return dma_ops->dma_supported(dev, mask); - BUG(); - return 0; + BUG_ON(!dma_ops); + + return dma_ops->dma_supported(dev, mask); } EXPORT_SYMBOL(dma_supported); @@ -66,10 +65,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - return dma_ops->alloc_coherent(dev, size, dma_handle, flag); - BUG(); - return NULL; + BUG_ON(!dma_ops); + + return dma_ops->alloc_coherent(dev, size, dma_handle, flag); } EXPORT_SYMBOL(dma_alloc_coherent); @@ -78,10 +76,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); - else - BUG(); + BUG_ON(!dma_ops); + + dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); } EXPORT_SYMBOL(dma_free_coherent); @@ -90,10 +87,9 @@ dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - return dma_ops->map_single(dev, cpu_addr, size, direction); - BUG(); - return (dma_addr_t)0; + BUG_ON(!dma_ops); + + return dma_ops->map_single(dev, cpu_addr, size, direction); } EXPORT_SYMBOL(dma_map_single); @@ -102,10 +98,9 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - dma_ops->unmap_single(dev, dma_addr, size, direction); - else - BUG(); + BUG_ON(!dma_ops); + + dma_ops->unmap_single(dev, dma_addr, size, direction); } EXPORT_SYMBOL(dma_unmap_single); @@ -115,11 +110,10 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - return dma_ops->map_single(dev, - (page_address(page) + offset), size, direction); - BUG(); - return (dma_addr_t)0; + BUG_ON(!dma_ops); + + return dma_ops->map_single(dev, page_address(page) + offset, size, + direction); } EXPORT_SYMBOL(dma_map_page); @@ -128,10 +122,9 @@ void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - dma_ops->unmap_single(dev, dma_address, size, direction); - else - BUG(); + BUG_ON(!dma_ops); + + dma_ops->unmap_single(dev, dma_address, size, direction); } EXPORT_SYMBOL(dma_unmap_page); @@ -140,10 +133,9 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - return dma_ops->map_sg(dev, sg, nents, direction); - BUG(); - return 0; + BUG_ON(!dma_ops); + + return dma_ops->map_sg(dev, sg, nents, direction); } EXPORT_SYMBOL(dma_map_sg); @@ -152,9 +144,8 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, { struct dma_mapping_ops *dma_ops = get_dma_ops(dev); - if (dma_ops) - dma_ops->unmap_sg(dev, sg, nhwentries, direction); - else - BUG(); + BUG_ON(!dma_ops); + + dma_ops->unmap_sg(dev, sg, nhwentries, direction); } EXPORT_SYMBOL(dma_unmap_sg); From cb18bd40030c879cd93fef02fd579f74dbab473d Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 20 Jul 2006 23:39:51 -0700 Subject: [PATCH 0061/1063] [POWERPC] Instrument Hypervisor Calls: merge headers Move all the Hypervisor call definitions to to a single header file. Signed-off-by: Mike Kravetz Signed-off-by: Paul Mackerras --- drivers/net/ibmveth.h | 10 ---------- include/asm-powerpc/hvcall.h | 8 ++++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h index 8385bf836507..149191cef2f0 100644 --- a/drivers/net/ibmveth.h +++ b/drivers/net/ibmveth.h @@ -41,16 +41,6 @@ #define IbmVethMcastRemoveFilter 0x2UL #define IbmVethMcastClearFilterTable 0x3UL -/* hcall numbers */ -#define H_VIO_SIGNAL 0x104 -#define H_REGISTER_LOGICAL_LAN 0x114 -#define H_FREE_LOGICAL_LAN 0x118 -#define H_ADD_LOGICAL_LAN_BUFFER 0x11C -#define H_SEND_LOGICAL_LAN 0x120 -#define H_MULTICAST_CTRL 0x130 -#define H_CHANGE_LOGICAL_LAN_MAC 0x14C -#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 - /* hcall macros */ #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \ plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac) diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index 0d3c4e85711a..f07ae50cbc2c 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -164,9 +164,15 @@ #define H_VIO_SIGNAL 0x104 #define H_SEND_CRQ 0x108 #define H_COPY_RDMA 0x110 +#define H_REGISTER_LOGICAL_LAN 0x114 +#define H_FREE_LOGICAL_LAN 0x118 +#define H_ADD_LOGICAL_LAN_BUFFER 0x11C +#define H_SEND_LOGICAL_LAN 0x120 +#define H_MULTICAST_CTRL 0x130 #define H_SET_XDABR 0x134 #define H_STUFF_TCE 0x138 #define H_PUT_TCE_INDIRECT 0x13C +#define H_CHANGE_LOGICAL_LAN_MAC 0x14C #define H_VTERM_PARTNER_INFO 0x150 #define H_REGISTER_VTERM 0x154 #define H_FREE_VTERM 0x158 @@ -196,11 +202,13 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 #define H_JOIN 0x298 #define H_VASI_STATE 0x2A4 #define H_ENABLE_CRQ 0x2B0 +#define MAX_HCALL_OPCODES (H_ENABLE_CRQ >> 2) #ifndef __ASSEMBLY__ From b9377ffc3a03cde558d76349a262a1adbb6d3112 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 19 Jul 2006 08:01:28 +1000 Subject: [PATCH 0062/1063] [POWERPC] clean up pseries hcall interfaces Our pseries hcall interfaces are out of control: plpar_hcall_norets plpar_hcall plpar_hcall_8arg_2ret plpar_hcall_4out plpar_hcall_7arg_7ret plpar_hcall_9arg_9ret Create 3 interfaces to cover all cases: plpar_hcall_norets: 7 arguments no returns plpar_hcall: 6 arguments 4 returns plpar_hcall9: 9 arguments 9 returns There are only 2 cases in the kernel that need plpar_hcall9, hopefully we can keep it that way. Pass in a buffer to stash return parameters so we avoid the &dummy1, &dummy2 madness. Signed-off-by: Anton Blanchard -- Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/lparcfg.c | 18 +- arch/powerpc/kernel/rtas.c | 11 +- arch/powerpc/platforms/pseries/hvCall.S | 211 +++--------------- arch/powerpc/platforms/pseries/hvconsole.c | 5 +- arch/powerpc/platforms/pseries/lpar.c | 12 +- .../platforms/pseries/plpar_wrappers.h | 97 ++++++-- arch/powerpc/platforms/pseries/xics.c | 22 +- drivers/net/ibmveth.c | 3 +- drivers/net/ibmveth.h | 17 +- include/asm-powerpc/hvcall.h | 105 +++------ 10 files changed, 186 insertions(+), 315 deletions(-) diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 3ce3a2d56fa8..41c05dcd68f4 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -182,8 +182,14 @@ static unsigned int h_get_ppp(unsigned long *entitled, unsigned long *resource) { unsigned long rc; - rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated, - aggregation, resource); + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_GET_PPP, retbuf); + + *entitled = retbuf[0]; + *unallocated = retbuf[1]; + *aggregation = retbuf[2]; + *resource = retbuf[3]; log_plpar_hcall_return(rc, "H_GET_PPP"); @@ -193,8 +199,12 @@ static unsigned int h_get_ppp(unsigned long *entitled, static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) { unsigned long rc; - unsigned long dummy; - rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy); + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_PIC, retbuf); + + *pool_idle_time = retbuf[0]; + *num_procs = retbuf[1]; if (rc != H_AUTHORITY) log_plpar_hcall_return(rc, "H_PIC"); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 10e10be324c9..14353b8789dd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -668,15 +668,14 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) int i; long state; long rc; - unsigned long dummy; - + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, - ((u64)args->args[0] << 32) | args->args[1], - 0, 0, 0, - &state, &dummy, &dummy); + rc = plpar_hcall(H_VASI_STATE, retbuf, + ((u64)args->args[0] << 32) | args->args[1]); + + state = retbuf[0]; if (rc) { printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c9ff547f9d25..9a99b056bd27 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -1,7 +1,6 @@ /* * This file contains the generic code to perform a call to the * pSeries LPAR hypervisor. - * NOTE: this file will go away when we move to inline this work. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -16,42 +15,6 @@ .text -/* long plpar_hcall(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long *out1, R8 - unsigned long *out2, R9 - unsigned long *out3); R10 - */ -_GLOBAL(plpar_hcall) - HMT_MEDIUM - - mfcr r0 - - std r8,STK_PARM(r8)(r1) /* Save out ptrs */ - std r9,STK_PARM(r9)(r1) - std r10,STK_PARM(r10)(r1) - - stw r0,8(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - - ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */ - ld r9,STK_PARM(r9)(r1) - ld r10,STK_PARM(r10)(r1) - std r4,0(r8) - std r5,0(r9) - std r6,0(r10) - - mtcrf 0xff,r0 - blr /* return r3 = status */ - - -/* Simple interface with no output values (other than status) */ _GLOBAL(plpar_hcall_norets) HMT_MEDIUM @@ -64,164 +27,64 @@ _GLOBAL(plpar_hcall_norets) mtcrf 0xff,r0 blr /* return r3 = status */ - -/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long arg5, R8 - unsigned long arg6, R9 - unsigned long arg7, R10 - unsigned long arg8, 112(R1) - unsigned long *out1); 120(R1) - */ -_GLOBAL(plpar_hcall_8arg_2ret) - HMT_MEDIUM - - mfcr r0 - ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ - stw r0,8(r1) - - HVSC /* invoke the hypervisor */ - - lwz r0,8(r1) - ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */ - std r4,0(r10) - mtcrf 0xff,r0 - blr /* return r3 = status */ - - -/* long plpar_hcall_4out(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long *out1, R8 - unsigned long *out2, R9 - unsigned long *out3, R10 - unsigned long *out4); 112(R1) - */ -_GLOBAL(plpar_hcall_4out) +_GLOBAL(plpar_hcall) HMT_MEDIUM mfcr r0 stw r0,8(r1) - std r8,STK_PARM(r8)(r1) /* Save out ptrs */ - std r9,STK_PARM(r9)(r1) - std r10,STK_PARM(r10)(r1) + std r4,STK_PARM(r4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 HVSC /* invoke the hypervisor */ + ld r12,STK_PARM(r4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + lwz r0,8(r1) - - ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */ - ld r9,STK_PARM(r9)(r1) - ld r10,STK_PARM(r10)(r1) - ld r11,STK_PARM(r11)(r1) - std r4,0(r8) - std r5,0(r9) - std r6,0(r10) - std r7,0(r11) - mtcrf 0xff,r0 + blr /* return r3 = status */ -/* plpar_hcall_7arg_7ret(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long arg5, R8 - unsigned long arg6, R9 - unsigned long arg7, R10 - unsigned long *out1, 112(R1) - unsigned long *out2, 110(R1) - unsigned long *out3, 108(R1) - unsigned long *out4, 106(R1) - unsigned long *out5, 104(R1) - unsigned long *out6, 102(R1) - unsigned long *out7); 100(R1) -*/ -_GLOBAL(plpar_hcall_7arg_7ret) +_GLOBAL(plpar_hcall9) HMT_MEDIUM mfcr r0 stw r0,8(r1) - HVSC /* invoke the hypervisor */ + std r4,STK_PARM(r4)(r1) /* Save ret buffer */ - lwz r0,8(r1) - - ld r11,STK_PARM(r11)(r1) /* Fetch r4 ret arg */ - std r4,0(r11) - ld r11,STK_PARM(r12)(r1) /* Fetch r5 ret arg */ - std r5,0(r11) - ld r11,STK_PARM(r13)(r1) /* Fetch r6 ret arg */ - std r6,0(r11) - ld r11,STK_PARM(r14)(r1) /* Fetch r7 ret arg */ - std r7,0(r11) - ld r11,STK_PARM(r15)(r1) /* Fetch r8 ret arg */ - std r8,0(r11) - ld r11,STK_PARM(r16)(r1) /* Fetch r9 ret arg */ - std r9,0(r11) - ld r11,STK_PARM(r17)(r1) /* Fetch r10 ret arg */ - std r10,0(r11) - - mtcrf 0xff,r0 - - blr /* return r3 = status */ - -/* plpar_hcall_9arg_9ret(unsigned long opcode, R3 - unsigned long arg1, R4 - unsigned long arg2, R5 - unsigned long arg3, R6 - unsigned long arg4, R7 - unsigned long arg5, R8 - unsigned long arg6, R9 - unsigned long arg7, R10 - unsigned long arg8, 112(R1) - unsigned long arg9, 110(R1) - unsigned long *out1, 108(R1) - unsigned long *out2, 106(R1) - unsigned long *out3, 104(R1) - unsigned long *out4, 102(R1) - unsigned long *out5, 100(R1) - unsigned long *out6, 98(R1) - unsigned long *out7); 96(R1) - unsigned long *out8, 94(R1) - unsigned long *out9, 92(R1) -*/ -_GLOBAL(plpar_hcall_9arg_9ret) - HMT_MEDIUM - - mfcr r0 - stw r0,8(r1) - - ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */ - ld r12,STK_PARM(r12)(r1) /* put arg9 in R12 */ + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STK_PARM(r11)(r1) /* put arg7 in R10 */ + ld r11,STK_PARM(r12)(r1) /* put arg8 in R11 */ + ld r12,STK_PARM(r13)(r1) /* put arg9 in R12 */ HVSC /* invoke the hypervisor */ - ld r0,STK_PARM(r13)(r1) /* Fetch r4 ret arg */ - stdx r4,r0,r0 - ld r0,STK_PARM(r14)(r1) /* Fetch r5 ret arg */ - stdx r5,r0,r0 - ld r0,STK_PARM(r15)(r1) /* Fetch r6 ret arg */ - stdx r6,r0,r0 - ld r0,STK_PARM(r16)(r1) /* Fetch r7 ret arg */ - stdx r7,r0,r0 - ld r0,STK_PARM(r17)(r1) /* Fetch r8 ret arg */ - stdx r8,r0,r0 - ld r0,STK_PARM(r18)(r1) /* Fetch r9 ret arg */ - stdx r9,r0,r0 - ld r0,STK_PARM(r19)(r1) /* Fetch r10 ret arg */ - stdx r10,r0,r0 - ld r0,STK_PARM(r20)(r1) /* Fetch r11 ret arg */ - stdx r11,r0,r0 - ld r0,STK_PARM(r21)(r1) /* Fetch r12 ret arg */ - stdx r12,r0,r0 + ld r12,STK_PARM(r4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + std r8, 32(r12) + std r9, 40(r12) + std r10,48(r12) + std r11,56(r12) + std r12,64(r12) lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index a72a987f1d4d..3f6a89b09816 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -27,6 +27,7 @@ #include #include #include +#include "plpar_wrappers.h" /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -40,9 +41,9 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count) { unsigned long got; - if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, - (unsigned long *)buf, (unsigned long *)buf+1) == H_SUCCESS) + if (plpar_get_term_char(vtermno, &got, buf) == H_SUCCESS) return got; + return 0; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4cb7ff227f72..6cbf14266d5e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -48,13 +48,11 @@ #define DBG_LOW(fmt...) do { } while(0) #endif -/* in pSeries_hvCall.S */ +/* in hvCall.S */ EXPORT_SYMBOL(plpar_hcall); -EXPORT_SYMBOL(plpar_hcall_4out); +EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); -EXPORT_SYMBOL(plpar_hcall_8arg_2ret); -EXPORT_SYMBOL(plpar_hcall_7arg_7ret); -EXPORT_SYMBOL(plpar_hcall_9arg_9ret); + extern void pSeries_find_serial_port(void); @@ -277,7 +275,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long flags; unsigned long slot; unsigned long hpte_v, hpte_r; - unsigned long dummy0, dummy1; if (!(vflags & HPTE_V_BOLTED)) DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " @@ -302,8 +299,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) hpte_r &= ~_PAGE_COHERENT; - lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, - hpte_r, &slot, &dummy0, &dummy1); + lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { if (!(vflags & HPTE_V_BOLTED)) DBG_LOW(" full\n"); diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index 3bd1b3e06003..ebd15de7597e 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -5,20 +5,17 @@ static inline long poll_pending(void) { - unsigned long dummy; - return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0, &dummy, &dummy, &dummy); + return plpar_hcall_norets(H_POLL_PENDING); } static inline long prod_processor(void) { - plpar_hcall_norets(H_PROD); - return 0; + return plpar_hcall_norets(H_PROD); } static inline long cede_processor(void) { - plpar_hcall_norets(H_CEDE); - return 0; + return plpar_hcall_norets(H_CEDE); } static inline long vpa_call(unsigned long flags, unsigned long cpu, @@ -42,21 +39,47 @@ static inline long register_vpa(unsigned long cpu, unsigned long vpa) extern void vpa_init(int cpu); +static inline long plpar_pte_enter(unsigned long flags, + unsigned long hpte_group, unsigned long hpte_v, + unsigned long hpte_r, unsigned long *slot) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r); + + *slot = retbuf[0]; + + return rc; +} + static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex, unsigned long avpn, unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) { - unsigned long dummy; - return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, old_pteh_ret, - old_ptel_ret, &dummy); + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn); + + *old_pteh_ret = retbuf[0]; + *old_ptel_ret = retbuf[1]; + + return rc; } static inline long plpar_pte_read(unsigned long flags, unsigned long ptex, unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) { - unsigned long dummy; - return plpar_hcall(H_READ, flags, ptex, 0, 0, old_pteh_ret, - old_ptel_ret, &dummy); + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_READ, retbuf, flags, ptex); + + *old_pteh_ret = retbuf[0]; + *old_ptel_ret = retbuf[1]; + + return rc; } static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex, @@ -68,9 +91,14 @@ static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex, static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba, unsigned long *tce_ret) { - unsigned long dummy; - return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, tce_ret, &dummy, - &dummy); + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba); + + *tce_ret = retbuf[0]; + + return rc; } static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba, @@ -94,9 +122,17 @@ static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba, static inline long plpar_get_term_char(unsigned long termno, unsigned long *len_ret, char *buf_ret) { + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; unsigned long *lbuf = (unsigned long *)buf_ret; /* TODO: alignment? */ - return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, len_ret, - lbuf + 0, lbuf + 1); + + rc = plpar_hcall(H_GET_TERM_CHAR, retbuf, termno); + + *len_ret = retbuf[0]; + lbuf[0] = retbuf[1]; + lbuf[1] = retbuf[2]; + + return rc; } static inline long plpar_put_term_char(unsigned long termno, unsigned long len, @@ -107,4 +143,31 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len, lbuf[1]); } +static inline long plpar_eoi(unsigned long xirr) +{ + return plpar_hcall_norets(H_EOI, xirr); +} + +static inline long plpar_cppr(unsigned long cppr) +{ + return plpar_hcall_norets(H_CPPR, cppr); +} + +static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) +{ + return plpar_hcall_norets(H_IPI, servernum, mfrr); +} + +static inline long plpar_xirr(unsigned long *xirr_ret) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_XIRR, retbuf); + + *xirr_ret = retbuf[0]; + + return rc; +} + #endif /* _PSERIES_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 1eab4688be17..c88ec63129f3 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -34,6 +34,7 @@ #include #include "xics.h" +#include "plpar_wrappers.h" #define XICS_IPI 2 #define XICS_IRQ_SPURIOUS 0 @@ -110,27 +111,6 @@ static inline void direct_qirr_info(int n_cpu, u8 value) /* LPAR low level accessors */ -static inline long plpar_eoi(unsigned long xirr) -{ - return plpar_hcall_norets(H_EOI, xirr); -} - -static inline long plpar_cppr(unsigned long cppr) -{ - return plpar_hcall_norets(H_CPPR, cppr); -} - -static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) -{ - return plpar_hcall_norets(H_IPI, servernum, mfrr); -} - -static inline long plpar_xirr(unsigned long *xirr_ret) -{ - unsigned long dummy; - return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); -} - static inline unsigned int lpar_xirr_info_get(int n_cpu) { unsigned long lpar_rc; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 0464e78f733a..e56eac88b809 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -702,7 +702,8 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) desc[3].desc, desc[4].desc, desc[5].desc, - correlator); + correlator, + &correlator); } while ((lpar_rc == H_BUSY) && (retry_count--)); if(lpar_rc != H_SUCCESS && lpar_rc != H_DROPPED) { diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h index 149191cef2f0..f5b25bff1540 100644 --- a/drivers/net/ibmveth.h +++ b/drivers/net/ibmveth.h @@ -51,8 +51,21 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) -#define h_send_logical_lan(ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator) \ - plpar_hcall_8arg_2ret(H_SEND_LOGICAL_LAN, ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator, &correlator) +static inline long h_send_logical_lan(unsigned long unit_address, + unsigned long desc1, unsigned long desc2, unsigned long desc3, + unsigned long desc4, unsigned long desc5, unsigned long desc6, + unsigned long corellator_in, unsigned long *corellator_out) +{ + long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, desc1, + desc2, desc3, desc4, desc5, desc6, corellator_in); + + *corellator_out = retbuf[0]; + + return rc; +} #define h_multicast_ctrl(ua, cmd, mac) \ plpar_hcall_norets(H_MULTICAST_CTRL, ua, cmd, mac) diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index f07ae50cbc2c..63ce1ac8c1f4 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -212,94 +212,39 @@ #ifndef __ASSEMBLY__ -/* plpar_hcall() -- Generic call interface using above opcodes +/** + * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments + * @opcode: The hypervisor call to make. * - * The actual call interface is a hypervisor call instruction with - * the opcode in R3 and input args in R4-R7. - * Status is returned in R3 with variable output values in R4-R11. - * Only H_PTE_READ with H_READ_4 uses R6-R11 so we ignore it for now - * and return only two out args which MUST ALWAYS BE PROVIDED. - */ -long plpar_hcall(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long *out1, - unsigned long *out2, - unsigned long *out3); - -/* Same as plpar_hcall but for those opcodes that return no values - * other than status. Slightly more efficient. + * This call supports up to 7 arguments and only returns the status of + * the hcall. Use this version where possible, its slightly faster than + * the other plpar_hcalls. */ long plpar_hcall_norets(unsigned long opcode, ...); -/* - * Special hcall interface for ibmveth support. - * Takes 8 input parms. Returns a rc and stores the - * R4 return value in *out1. - */ -long plpar_hcall_8arg_2ret(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long *out1); - -/* plpar_hcall_4out() +/** + * plpar_hcall: - Make a pseries hypervisor call + * @opcode: The hypervisor call to make. + * @retbuf: Buffer to store up to 4 return arguments in. * - * same as plpar_hcall except with 4 output arguments. + * This call supports up to 6 arguments and 4 return arguments. Use + * PLPAR_HCALL_BUFSIZE to size the return argument buffer. * + * Used for all but the craziest of phyp interfaces (see plpar_hcall9) */ -long plpar_hcall_4out(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long *out1, - unsigned long *out2, - unsigned long *out3, - unsigned long *out4); +#define PLPAR_HCALL_BUFSIZE 4 +long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); -long plpar_hcall_7arg_7ret(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long *out1, - unsigned long *out2, - unsigned long *out3, - unsigned long *out4, - unsigned long *out5, - unsigned long *out6, - unsigned long *out7); - -long plpar_hcall_9arg_9ret(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9, - unsigned long *out1, - unsigned long *out2, - unsigned long *out3, - unsigned long *out4, - unsigned long *out5, - unsigned long *out6, - unsigned long *out7, - unsigned long *out8, - unsigned long *out9); +/** + * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments + * @opcode: The hypervisor call to make. + * @retbuf: Buffer to store up to 9 return arguments in. + * + * This call supports up to 9 arguments and 9 return arguments. Use + * PLPAR_HCALL9_BUFSIZE to size the return argument buffer. + */ +#define PLPAR_HCALL9_BUFSIZE 9 +long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ From 43d6b68dc38867e489995e21649bb82f6ee7b5d3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 29 Jul 2006 11:14:08 -0700 Subject: [PATCH 0063/1063] [SCSI] areca sysfs fix Remove sysfs_remove_bin_file() return-value checking from the areca driver. There's nothing a driver can do if sysfs file removal fails, so we'll soon be changing sysfs_remove_bin_file() to internally print a diagnostic and to return void. Cc: Erich Chen Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- drivers/scsi/arcmsr/arcmsr_attr.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c index 0459f4194d7c..c96f7140cb62 100644 --- a/drivers/scsi/arcmsr/arcmsr_attr.c +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -240,15 +240,11 @@ int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb) } return 0; error_bin_file_message_clear: - error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + sysfs_remove_bin_file(&host->shost_classdev.kobj, &arcmsr_sysfs_message_write_attr); - if (error) - printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_write failed\n"); error_bin_file_message_write: - error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + sysfs_remove_bin_file(&host->shost_classdev.kobj, &arcmsr_sysfs_message_read_attr); - if (error) - printk(KERN_ERR "arcmsr: sysfs_remove_bin_file mu_read failed\n"); error_bin_file_message_read: return error; } @@ -256,20 +252,13 @@ error_bin_file_message_read: void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) { struct Scsi_Host *host = acb->host; - int error; - error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + sysfs_remove_bin_file(&host->shost_classdev.kobj, &arcmsr_sysfs_message_clear_attr); - if (error) - printk(KERN_ERR "arcmsr: free sysfs mu_clear failed\n"); - error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + sysfs_remove_bin_file(&host->shost_classdev.kobj, &arcmsr_sysfs_message_write_attr); - if (error) - printk(KERN_ERR "arcmsr: free sysfs mu_write failed\n"); - error = sysfs_remove_bin_file(&host->shost_classdev.kobj, + sysfs_remove_bin_file(&host->shost_classdev.kobj, &arcmsr_sysfs_message_read_attr); - if (error) - printk(KERN_ERR "arcmsr: free sysfss mu_read failed\n"); } From d67a70aca200f67be42428e74eb3353f20ad1130 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 28 Jul 2006 17:36:46 -0500 Subject: [PATCH 0064/1063] [SCSI] arcmsr: fix up sysfs values The sysfs files in arcmsr are non-standard in that they aren't simple filename value pairs, the values actually contain preceeding text which would have to be parsed. The idea of sysfs files is that the file name is the description and the contents is a simple value. Fix up arcmsr to conform to this standard. Acked-By: Erich Chen Signed-off-by: James Bottomley --- drivers/scsi/arcmsr/arcmsr_attr.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c index c96f7140cb62..12497da5529d 100644 --- a/drivers/scsi/arcmsr/arcmsr_attr.c +++ b/drivers/scsi/arcmsr/arcmsr_attr.c @@ -265,7 +265,7 @@ arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) { static ssize_t arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) { return snprintf(buf, PAGE_SIZE, - "ARCMSR: %s\n", + "%s\n", ARCMSR_DRIVER_VERSION); } @@ -274,7 +274,7 @@ arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) { struct Scsi_Host *host = class_to_shost(cdev); struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Current commands posted: %4d\n", + "%4d\n", atomic_read(&acb->ccboutstandingcount)); } @@ -283,7 +283,7 @@ arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) { struct Scsi_Host *host = class_to_shost(cdev); struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "SCSI Host Resets: %4d\n", + "%4d\n", acb->num_resets); } @@ -292,7 +292,7 @@ arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) { struct Scsi_Host *host = class_to_shost(cdev); struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "SCSI Aborts/Timeouts: %4d\n", + "%4d\n", acb->num_aborts); } @@ -301,7 +301,7 @@ arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) { struct Scsi_Host *host = class_to_shost(cdev); struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Adapter Model: %s\n", + "%s\n", acb->firm_model); } @@ -311,7 +311,7 @@ arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) { struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Firmware Version: %s\n", + "%s\n", acb->firm_version); } @@ -321,7 +321,7 @@ arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) { struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Reguest Lenth: %4d\n", + "%4d\n", acb->firm_request_len); } @@ -331,7 +331,7 @@ arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) { struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Numbers of Queue: %4d\n", + "%4d\n", acb->firm_numbers_queue); } @@ -341,7 +341,7 @@ arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) { struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "SDRAM Size: %4d\n", + "%4d\n", acb->firm_sdram_size); } @@ -351,7 +351,7 @@ arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) { struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; return snprintf(buf, PAGE_SIZE, - "Hard Disk Channels: %4d\n", + "%4d\n", acb->firm_hd_channels); } From 2672ea86be26353108a72a28910df4dc61cdb5e2 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 2 Aug 2006 17:11:49 -0400 Subject: [PATCH 0065/1063] [SCSI] advansys pci tweaks. Remove a lot of duplicate #defines from the advansys driver, and make them look like PCI IDs as defined elsewhere in the kernel. Also add a module table so that it automatically gets picked up by tools relying on modinfo output (like say, distro installers). Signed-off-by: Dave Jones Signed-off-by: James Bottomley --- drivers/scsi/advansys.c | 90 ++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index e32b4ab2f8fb..773f02e3b10b 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -888,10 +888,6 @@ typedef unsigned char uchar; #define ASC_PCI_ID2DEV(id) (((id) >> 11) & 0x1F) #define ASC_PCI_ID2FUNC(id) (((id) >> 8) & 0x7) #define ASC_PCI_MKID(bus, dev, func) ((((dev) & 0x1F) << 11) | (((func) & 0x7) << 8) | ((bus) & 0xFF)) -#define ASC_PCI_VENDORID 0x10CD -#define ASC_PCI_DEVICEID_1200A 0x1100 -#define ASC_PCI_DEVICEID_1200B 0x1200 -#define ASC_PCI_DEVICEID_ULTRA 0x1300 #define ASC_PCI_REVISION_3150 0x02 #define ASC_PCI_REVISION_3050 0x03 @@ -899,6 +895,14 @@ typedef unsigned char uchar; #define ASC_DVCLIB_CALL_FAILED (0) #define ASC_DVCLIB_CALL_ERROR (-1) +#define PCI_VENDOR_ID_ASP 0x10cd +#define PCI_DEVICE_ID_ASP_1200A 0x1100 +#define PCI_DEVICE_ID_ASP_ABP940 0x1200 +#define PCI_DEVICE_ID_ASP_ABP940U 0x1300 +#define PCI_DEVICE_ID_ASP_ABP940UW 0x2300 +#define PCI_DEVICE_ID_38C0800_REV1 0x2500 +#define PCI_DEVICE_ID_38C1600_REV1 0x2700 + /* * Enable CC_VERY_LONG_SG_LIST to support up to 64K element SG lists. * The SRB structure will have to be changed and the ASC_SRB2SCSIQ() @@ -1492,8 +1496,6 @@ typedef struct asc_dvc_cfg { #define ASC_INIT_STATE_END_INQUIRY 0x0080 #define ASC_INIT_RESET_SCSI_DONE 0x0100 #define ASC_INIT_STATE_WITHOUT_EEP 0x8000 -#define ASC_PCI_DEVICE_ID_REV_A 0x1100 -#define ASC_PCI_DEVICE_ID_REV_B 0x1200 #define ASC_BUG_FIX_IF_NOT_DWB 0x0001 #define ASC_BUG_FIX_ASYN_USE_SYN 0x0002 #define ASYN_SDTR_DATA_FIX_PCI_REV_AB 0x41 @@ -2100,12 +2102,6 @@ STATIC ASC_DCNT AscGetMaxDmaCount(ushort); #define ADV_NUM_PAGE_CROSSING \ ((ADV_SG_TOTAL_MEM_SIZE + (ADV_PAGE_SIZE - 1))/ADV_PAGE_SIZE) -/* a_condor.h */ -#define ADV_PCI_VENDOR_ID 0x10CD -#define ADV_PCI_DEVICE_ID_REV_A 0x2300 -#define ADV_PCI_DEVID_38C0800_REV1 0x2500 -#define ADV_PCI_DEVID_38C1600_REV1 0x2700 - #define ADV_EEP_DVC_CFG_BEGIN (0x00) #define ADV_EEP_DVC_CFG_END (0x15) #define ADV_EEP_DVC_CTL_BEGIN (0x16) /* location of OEM name */ @@ -3569,14 +3565,7 @@ typedef struct scsi_cmnd REQ, *REQP; #define PCI_MAX_SLOT 0x1F #define PCI_MAX_BUS 0xFF #define PCI_IOADDRESS_MASK 0xFFFE -#define ASC_PCI_VENDORID 0x10CD #define ASC_PCI_DEVICE_ID_CNT 6 /* PCI Device ID count. */ -#define ASC_PCI_DEVICE_ID_1100 0x1100 -#define ASC_PCI_DEVICE_ID_1200 0x1200 -#define ASC_PCI_DEVICE_ID_1300 0x1300 -#define ASC_PCI_DEVICE_ID_2300 0x2300 /* ASC-3550 */ -#define ASC_PCI_DEVICE_ID_2500 0x2500 /* ASC-38C0800 */ -#define ASC_PCI_DEVICE_ID_2700 0x2700 /* ASC-38C1600 */ #ifndef ADVANSYS_STATS #define ASC_STATS(shp, counter) @@ -4330,12 +4319,12 @@ advansys_detect(struct scsi_host_template *tpnt) struct pci_dev *pci_devp = NULL; int pci_device_id_cnt = 0; unsigned int pci_device_id[ASC_PCI_DEVICE_ID_CNT] = { - ASC_PCI_DEVICE_ID_1100, - ASC_PCI_DEVICE_ID_1200, - ASC_PCI_DEVICE_ID_1300, - ASC_PCI_DEVICE_ID_2300, - ASC_PCI_DEVICE_ID_2500, - ASC_PCI_DEVICE_ID_2700 + PCI_DEVICE_ID_ASP_1200A, + PCI_DEVICE_ID_ASP_ABP940, + PCI_DEVICE_ID_ASP_ABP940U, + PCI_DEVICE_ID_ASP_ABP940UW, + PCI_DEVICE_ID_38C0800_REV1, + PCI_DEVICE_ID_38C1600_REV1 }; ADV_PADDR pci_memory_address; #endif /* CONFIG_PCI */ @@ -4471,7 +4460,7 @@ advansys_detect(struct scsi_host_template *tpnt) /* Find all PCI cards. */ while (pci_device_id_cnt < ASC_PCI_DEVICE_ID_CNT) { - if ((pci_devp = pci_find_device(ASC_PCI_VENDORID, + if ((pci_devp = pci_find_device(PCI_VENDOR_ID_ASP, pci_device_id[pci_device_id_cnt], pci_devp)) == NULL) { pci_device_id_cnt++; @@ -4575,9 +4564,9 @@ advansys_detect(struct scsi_host_template *tpnt) */ #ifdef CONFIG_PCI if (asc_bus[bus] == ASC_IS_PCI && - (pci_devp->device == ASC_PCI_DEVICE_ID_2300 || - pci_devp->device == ASC_PCI_DEVICE_ID_2500 || - pci_devp->device == ASC_PCI_DEVICE_ID_2700)) + (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW || + pci_devp->device == PCI_DEVICE_ID_38C0800_REV1 || + pci_devp->device == PCI_DEVICE_ID_38C1600_REV1)) { boardp->flags |= ASC_IS_WIDE_BOARD; } @@ -4600,11 +4589,11 @@ advansys_detect(struct scsi_host_template *tpnt) adv_dvc_varp->isr_callback = adv_isr_callback; adv_dvc_varp->async_callback = adv_async_callback; #ifdef CONFIG_PCI - if (pci_devp->device == ASC_PCI_DEVICE_ID_2300) + if (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW) { ASC_DBG(1, "advansys_detect: ASC-3550\n"); adv_dvc_varp->chip_type = ADV_CHIP_ASC3550; - } else if (pci_devp->device == ASC_PCI_DEVICE_ID_2500) + } else if (pci_devp->device == PCI_DEVICE_ID_38C0800_REV1) { ASC_DBG(1, "advansys_detect: ASC-38C0800\n"); adv_dvc_varp->chip_type = ADV_CHIP_ASC38C0800; @@ -11922,7 +11911,7 @@ AscInitGetConfig( PCIRevisionID = DvcReadPCIConfigByte(asc_dvc, AscPCIConfigRevisionIDRegister); - if (PCIVendorID != ASC_PCI_VENDORID) { + if (PCIVendorID != PCI_VENDOR_ID_ASP) { warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE; } prevCmdRegBits = DvcReadPCIConfigByte(asc_dvc, @@ -11942,15 +11931,15 @@ AscInitGetConfig( warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE; } } - if ((PCIDeviceID == ASC_PCI_DEVICEID_1200A) || - (PCIDeviceID == ASC_PCI_DEVICEID_1200B)) { + if ((PCIDeviceID == PCI_DEVICE_ID_ASP_1200A) || + (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940)) { DvcWritePCIConfigByte(asc_dvc, AscPCIConfigLatencyTimer, 0x00); if (DvcReadPCIConfigByte(asc_dvc, AscPCIConfigLatencyTimer) != 0x00) { warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE; } - } else if (PCIDeviceID == ASC_PCI_DEVICEID_ULTRA) { + } else if (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940U) { if (DvcReadPCIConfigByte(asc_dvc, AscPCIConfigLatencyTimer) < 0x20) { DvcWritePCIConfigByte(asc_dvc, @@ -12037,8 +12026,8 @@ AscInitFromAscDvcVar( AscSetChipCfgMsw(iop_base, cfg_msw); if ((asc_dvc->bus_type & ASC_IS_PCI_ULTRA) == ASC_IS_PCI_ULTRA) { } else { - if ((pci_device_id == ASC_PCI_DEVICE_ID_REV_A) || - (pci_device_id == ASC_PCI_DEVICE_ID_REV_B)) { + if ((pci_device_id == PCI_DEVICE_ID_ASP_1200A) || + (pci_device_id == PCI_DEVICE_ID_ASP_ABP940)) { asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_IF_NOT_DWB; asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_ASYN_USE_SYN; } @@ -14275,8 +14264,8 @@ Default_38C0800_EEPROM_Config __initdata = { 0, /* 55 reserved */ 0, /* 56 cisptr_lsw */ 0, /* 57 cisprt_msw */ - ADV_PCI_VENDOR_ID, /* 58 subsysvid */ - ADV_PCI_DEVID_38C0800_REV1, /* 59 subsysid */ + PCI_VENDOR_ID_ASP, /* 58 subsysvid */ + PCI_DEVICE_ID_38C0800_REV1, /* 59 subsysid */ 0, /* 60 reserved */ 0, /* 61 reserved */ 0, /* 62 reserved */ @@ -14405,8 +14394,8 @@ Default_38C1600_EEPROM_Config __initdata = { 0, /* 55 reserved */ 0, /* 56 cisptr_lsw */ 0, /* 57 cisprt_msw */ - ADV_PCI_VENDOR_ID, /* 58 subsysvid */ - ADV_PCI_DEVID_38C1600_REV1, /* 59 subsysid */ + PCI_VENDOR_ID_ASP, /* 58 subsysvid */ + PCI_DEVICE_ID_38C1600_REV1, /* 59 subsysid */ 0, /* 60 reserved */ 0, /* 61 reserved */ 0, /* 62 reserved */ @@ -18225,3 +18214,22 @@ AdvInquiryHandling( } } MODULE_LICENSE("Dual BSD/GPL"); + +/* PCI Devices supported by this driver */ +static struct pci_device_id advansys_pci_tbl[] __devinitdata = { + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_1200A, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940U, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940UW, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C0800_REV1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C1600_REV1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { } +}; +MODULE_DEVICE_TABLE(pci, advansys_pci_tbl); + From fddafd3d21953d5ea740f7b2f27149f7dd493194 Mon Sep 17 00:00:00 2001 From: Brian King Date: Thu, 3 Aug 2006 13:54:59 -0500 Subject: [PATCH 0066/1063] [SCSI] DAC960: PCI id table fixup The PCI ID table in the DAC960 driver conflicts with some devices that use the ipr driver. All ipr adapters that use this chip have an IBM subvendor ID and all DAC960 adapters that use this chip have a Mylex subvendor id. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/block/DAC960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 4cd23c3eab41..a360215dbce7 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7115,7 +7115,7 @@ static struct pci_device_id DAC960_id_table[] = { { .vendor = PCI_VENDOR_ID_MYLEX, .device = PCI_DEVICE_ID_MYLEX_DAC960_GEM, - .subvendor = PCI_ANY_ID, + .subvendor = PCI_VENDOR_ID_MYLEX, .subdevice = PCI_ANY_ID, .driver_data = (unsigned long) &DAC960_GEM_privdata, }, From 9e5c50fa8686ede7c37b939a0b950df50346eb3d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Aug 2006 17:18:30 +0200 Subject: [PATCH 0067/1063] [SCSI] remove SCSI_STATE_ #defines These aren't used anymore since the field in scsi_cmnd where it was stored has been removed. Signed-off-by: Christoph Hellwig Signed-off-by: James Bottomley --- include/scsi/scsi_cmnd.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 58e6444eebee..be117f812deb 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -118,20 +118,6 @@ struct scsi_cmnd { unsigned long pid; /* Process ID, starts at 0. Unique per host. */ }; -/* - * These are the values that scsi_cmd->state can take. - */ -#define SCSI_STATE_TIMEOUT 0x1000 -#define SCSI_STATE_FINISHED 0x1001 -#define SCSI_STATE_FAILED 0x1002 -#define SCSI_STATE_QUEUED 0x1003 -#define SCSI_STATE_UNUSED 0x1006 -#define SCSI_STATE_DISCONNECTING 0x1008 -#define SCSI_STATE_INITIALIZING 0x1009 -#define SCSI_STATE_BHQUEUE 0x100a -#define SCSI_STATE_MLQUEUE 0x100b - - extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t); extern void scsi_put_command(struct scsi_cmnd *); extern void scsi_io_completion(struct scsi_cmnd *, unsigned int); From dd7e2f2266acf66ec882baa6fbd79f853b5fe966 Mon Sep 17 00:00:00 2001 From: Michael Reed Date: Fri, 4 Aug 2006 12:09:24 -0500 Subject: [PATCH 0068/1063] [SCSI] scsi_queue_work() documented return value is incorrect If you examine the queue_work() routine you'll see that it returns 1 on success, 0 if the work is already queued. This patch corrects the source code documentation for the scsi_queue_work function. Signed-off-by: Michael Reed Signed-off-by: James Bottomley --- drivers/scsi/hosts.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index dfcb96f3e60c..f244d4f6597a 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -487,7 +487,9 @@ EXPORT_SYMBOL(scsi_is_host_device); * @work: Work to queue for execution. * * Return value: - * 0 on success / != 0 for error + * 1 - work queued for execution + * 0 - work is already queued + * -EINVAL - work queue doesn't exist **/ int scsi_queue_work(struct Scsi_Host *shost, struct work_struct *work) { From 5d947f2b7607c4674d104accbd3768744aaa4154 Mon Sep 17 00:00:00 2001 From: Michael Reed Date: Mon, 31 Jul 2006 12:19:30 -0500 Subject: [PATCH 0069/1063] [SCSI] mptfc: add additional fc transport attributes Add host_supported_speeds, host_maxframe_size, host_speed, host_fabric_name, host_port_type, host_port_state, and host_symbolic_name transport attributes to fusion fibre channel. Signed-off-by: Michael Reed Acked-by: Moore, Eric Signed-off-by: James Bottomley --- drivers/message/fusion/mptfc.c | 92 +++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 12 deletions(-) diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index 90da7d63b08e..244a32c66f06 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -162,7 +162,13 @@ static struct fc_function_template mptfc_transport_functions = { .show_starget_port_id = 1, .set_rport_dev_loss_tmo = mptfc_set_rport_loss_tmo, .show_rport_dev_loss_tmo = 1, - + .show_host_supported_speeds = 1, + .show_host_maxframe_size = 1, + .show_host_speed = 1, + .show_host_fabric_name = 1, + .show_host_port_type = 1, + .show_host_port_state = 1, + .show_host_symbolic_name = 1, }; static void @@ -836,33 +842,95 @@ mptfc_SetFcPortPage1_defaults(MPT_ADAPTER *ioc) static void mptfc_init_host_attr(MPT_ADAPTER *ioc,int portnum) { - unsigned class = 0, cos = 0; + unsigned class = 0; + unsigned cos = 0; + unsigned speed; + unsigned port_type; + unsigned port_state; + FCPortPage0_t *pp0; + struct Scsi_Host *sh; + char *sn; /* don't know what to do as only one scsi (fc) host was allocated */ if (portnum != 0) return; - class = ioc->fc_port_page0[portnum].SupportedServiceClass; + pp0 = &ioc->fc_port_page0[portnum]; + sh = ioc->sh; + + sn = fc_host_symbolic_name(sh); + snprintf(sn, FC_SYMBOLIC_NAME_SIZE, "%s %s%08xh", + ioc->prod_name, + MPT_FW_REV_MAGIC_ID_STRING, + ioc->facts.FWVersion.Word); + + fc_host_tgtid_bind_type(sh) = FC_TGTID_BIND_BY_WWPN; + + fc_host_maxframe_size(sh) = pp0->MaxFrameSize; + + fc_host_node_name(sh) = + (u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low; + + fc_host_port_name(sh) = + (u64)pp0->WWPN.High << 32 | (u64)pp0->WWPN.Low; + + fc_host_port_id(sh) = pp0->PortIdentifier; + + class = pp0->SupportedServiceClass; if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_1) cos |= FC_COS_CLASS1; if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_2) cos |= FC_COS_CLASS2; if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_3) cos |= FC_COS_CLASS3; + fc_host_supported_classes(sh) = cos; - fc_host_node_name(ioc->sh) = - (u64)ioc->fc_port_page0[portnum].WWNN.High << 32 - | (u64)ioc->fc_port_page0[portnum].WWNN.Low; + if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_1GBIT) + speed = FC_PORTSPEED_1GBIT; + else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_2GBIT) + speed = FC_PORTSPEED_2GBIT; + else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_4GBIT) + speed = FC_PORTSPEED_4GBIT; + else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_10GBIT) + speed = FC_PORTSPEED_10GBIT; + else + speed = FC_PORTSPEED_UNKNOWN; + fc_host_speed(sh) = speed; - fc_host_port_name(ioc->sh) = - (u64)ioc->fc_port_page0[portnum].WWPN.High << 32 - | (u64)ioc->fc_port_page0[portnum].WWPN.Low; + speed = 0; + if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_1GBIT_SPEED) + speed |= FC_PORTSPEED_1GBIT; + if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_2GBIT_SPEED) + speed |= FC_PORTSPEED_2GBIT; + if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_4GBIT_SPEED) + speed |= FC_PORTSPEED_4GBIT; + if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_10GBIT_SPEED) + speed |= FC_PORTSPEED_10GBIT; + fc_host_supported_speeds(sh) = speed; - fc_host_port_id(ioc->sh) = ioc->fc_port_page0[portnum].PortIdentifier; + port_state = FC_PORTSTATE_UNKNOWN; + if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_ONLINE) + port_state = FC_PORTSTATE_ONLINE; + else if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_OFFLINE) + port_state = FC_PORTSTATE_LINKDOWN; + fc_host_port_state(sh) = port_state; - fc_host_supported_classes(ioc->sh) = cos; + port_type = FC_PORTTYPE_UNKNOWN; + if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_POINT_TO_POINT) + port_type = FC_PORTTYPE_PTP; + else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PRIVATE_LOOP) + port_type = FC_PORTTYPE_LPORT; + else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PUBLIC_LOOP) + port_type = FC_PORTTYPE_NLPORT; + else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_FABRIC_DIRECT) + port_type = FC_PORTTYPE_NPORT; + fc_host_port_type(sh) = port_type; + + fc_host_fabric_name(sh) = + (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_FABRIC_WWN_VALID) ? + (u64) pp0->FabricWWNN.High << 32 | (u64) pp0->FabricWWPN.Low : + (u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low; - fc_host_tgtid_bind_type(ioc->sh) = FC_TGTID_BIND_BY_WWPN; } static void From b5145d25f0d8eae21ad7969822f2d4ce7f22e72a Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 2 Aug 2006 14:57:36 -0500 Subject: [PATCH 0070/1063] [SCSI] ipr: Add some hardware defined types for SATA Add some hardware defined types for SATA. This is required by future patches to add SATA support to ipr. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.h | 78 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1ad24df69d70..1e9c1227fdae 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -45,6 +45,7 @@ * This can be adjusted at runtime through sysfs device attributes. */ #define IPR_MAX_CMD_PER_LUN 6 +#define IPR_MAX_CMD_PER_ATA_LUN 1 /* * IPR_NUM_BASE_CMD_BLKS: This defines the maximum number of @@ -106,7 +107,7 @@ #define IPR_IOA_BUS 0xff #define IPR_IOA_TARGET 0xff #define IPR_IOA_LUN 0xff -#define IPR_MAX_NUM_BUSES 8 +#define IPR_MAX_NUM_BUSES 16 #define IPR_MAX_BUS_TO_SCAN IPR_MAX_NUM_BUSES #define IPR_NUM_RESET_RELOAD_RETRIES 3 @@ -145,6 +146,7 @@ #define IPR_LUN_RESET 0x40 #define IPR_TARGET_RESET 0x20 #define IPR_BUS_RESET 0x10 +#define IPR_ATA_PHY_RESET 0x80 #define IPR_ID_HOST_RR_Q 0xC4 #define IPR_QUERY_IOA_CONFIG 0xC5 #define IPR_CANCEL_ALL_REQUESTS 0xCE @@ -295,7 +297,11 @@ struct ipr_std_inq_data { }__attribute__ ((packed)); struct ipr_config_table_entry { - u8 service_level; + u8 proto; +#define IPR_PROTO_SATA 0x02 +#define IPR_PROTO_SATA_ATAPI 0x03 +#define IPR_PROTO_SAS_STP 0x06 +#define IPR_PROTO_SAS_STP_ATAPI 0x07 u8 array_id; u8 flags; #define IPR_IS_IOA_RESOURCE 0x80 @@ -307,6 +313,7 @@ struct ipr_config_table_entry { #define IPR_SUBTYPE_AF_DASD 0 #define IPR_SUBTYPE_GENERIC_SCSI 1 #define IPR_SUBTYPE_VOLUME_SET 2 +#define IPR_SUBTYPE_GENERIC_ATA 4 #define IPR_QUEUEING_MODEL(res) ((((res)->cfgte.flags) & 0x70) >> 4) #define IPR_QUEUE_FROZEN_MODEL 0 @@ -350,6 +357,7 @@ struct ipr_cmd_pkt { #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 #define IPR_RQTYPE_HCAM 0x02 +#define IPR_RQTYPE_ATA_PASSTHRU 0x04 u8 luntar_luntrn; @@ -373,6 +381,37 @@ struct ipr_cmd_pkt { __be16 timeout; }__attribute__ ((packed, aligned(4))); +struct ipr_ioarcb_ata_regs { + u8 flags; +#define IPR_ATA_FLAG_PACKET_CMD 0x80 +#define IPR_ATA_FLAG_XFER_TYPE_DMA 0x40 +#define IPR_ATA_FLAG_STATUS_ON_GOOD_COMPLETION 0x20 + u8 reserved[3]; + + __be16 data; + u8 feature; + u8 nsect; + u8 lbal; + u8 lbam; + u8 lbah; + u8 device; + u8 command; + u8 reserved2[3]; + u8 hob_feature; + u8 hob_nsect; + u8 hob_lbal; + u8 hob_lbam; + u8 hob_lbah; + u8 ctl; +}__attribute__ ((packed, aligned(4))); + +struct ipr_ioarcb_add_data { + union { + struct ipr_ioarcb_ata_regs regs; + __be32 add_cmd_parms[10]; + }u; +}__attribute__ ((packed, aligned(4))); + /* IOA Request Control Block 128 bytes */ struct ipr_ioarcb { __be32 ioarcb_host_pci_addr; @@ -397,7 +436,7 @@ struct ipr_ioarcb { struct ipr_cmd_pkt cmd_pkt; __be32 add_cmd_parms_len; - __be32 add_cmd_parms[10]; + struct ipr_ioarcb_add_data add_data; }__attribute__((packed, aligned (4))); struct ipr_ioadl_desc { @@ -433,6 +472,21 @@ struct ipr_ioasa_gpdd { __be32 ioa_data[2]; }__attribute__((packed, aligned (4))); +struct ipr_ioasa_gata { + u8 error; + u8 nsect; /* Interrupt reason */ + u8 lbal; + u8 lbam; + u8 lbah; + u8 device; + u8 status; + u8 alt_status; /* ATA CTL */ + u8 hob_nsect; + u8 hob_lbal; + u8 hob_lbam; + u8 hob_lbah; +}__attribute__((packed, aligned (4))); + struct ipr_auto_sense { __be16 auto_sense_len; __be16 ioa_data_len; @@ -466,6 +520,7 @@ struct ipr_ioasa { __be32 ioasc_specific; /* status code specific field */ #define IPR_ADDITIONAL_STATUS_FMT 0x80000000 #define IPR_AUTOSENSE_VALID 0x40000000 +#define IPR_ATA_DEVICE_WAS_RESET 0x20000000 #define IPR_IOASC_SPECIFIC_MASK 0x00ffffff #define IPR_FIELD_POINTER_VALID (0x80000000 >> 8) #define IPR_FIELD_POINTER_MASK 0x0000ffff @@ -474,6 +529,7 @@ struct ipr_ioasa { struct ipr_ioasa_vset vset; struct ipr_ioasa_af_dasd dasd; struct ipr_ioasa_gpdd gpdd; + struct ipr_ioasa_gata gata; } u; struct ipr_auto_sense auto_sense; @@ -1307,6 +1363,22 @@ static inline int ipr_is_scsi_disk(struct ipr_resource_entry *res) return 0; } +/** + * ipr_is_gata - Determine if a resource is a generic ATA resource + * @res: resource entry struct + * + * Return value: + * 1 if GATA / 0 if not GATA + **/ +static inline int ipr_is_gata(struct ipr_resource_entry *res) +{ + if (!ipr_is_ioa_resource(res) && + IPR_RES_SUBTYPE(res) == IPR_SUBTYPE_GENERIC_ATA) + return 1; + else + return 0; +} + /** * ipr_is_naca_model - Determine if a resource is using NACA queueing model * @res: resource entry struct From 896bbd21408ddbfb9a57819404dbb04f4f0afb35 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 2 Aug 2006 14:57:44 -0500 Subject: [PATCH 0071/1063] [SCSI] ipr: Handle new SAS error codes Add definitions for some SAS error codes that can be logged by ipr SAS adapters. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 01080b3acf5e..7f2c5cfc57ba 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -175,6 +175,8 @@ struct ipr_error_table_t ipr_error_table[] = { "Qualified success"}, {0x01080000, 1, 1, "FFFE: Soft device bus error recovered by the IOA"}, + {0x01088100, 0, 1, + "4101: Soft device bus fabric error"}, {0x01170600, 0, 1, "FFF9: Device sector reassign successful"}, {0x01170900, 0, 1, @@ -225,6 +227,8 @@ struct ipr_error_table_t ipr_error_table[] = { "3109: IOA timed out a device command"}, {0x04088000, 0, 0, "3120: SCSI bus is not operational"}, + {0x04088100, 0, 1, + "4100: Hard device bus fabric error"}, {0x04118000, 0, 1, "9000: IOA reserved area data check"}, {0x04118100, 0, 1, @@ -273,6 +277,14 @@ struct ipr_error_table_t ipr_error_table[] = { "9091: Incorrect hardware configuration change has been detected"}, {0x04678000, 0, 1, "9073: Invalid multi-adapter configuration"}, + {0x04678100, 0, 1, + "4010: Incorrect connection between cascaded expanders"}, + {0x04678200, 0, 1, + "4020: Connections exceed IOA design limits"}, + {0x04678300, 0, 1, + "4030: Incorrect multipath connection"}, + {0x04679000, 0, 1, + "4110: Unsupported enclosure function"}, {0x046E0000, 0, 1, "FFF4: Command to logical unit failed"}, {0x05240000, 1, 0, @@ -297,6 +309,8 @@ struct ipr_error_table_t ipr_error_table[] = { "9031: Array protection temporarily suspended, protection resuming"}, {0x06040600, 0, 1, "9040: Array protection temporarily suspended, protection resuming"}, + {0x06288000, 0, 1, + "3140: Device bus not ready to ready transition"}, {0x06290000, 0, 1, "FFFB: SCSI bus was reset"}, {0x06290500, 0, 0, @@ -319,6 +333,16 @@ struct ipr_error_table_t ipr_error_table[] = { "3150: SCSI bus configuration error"}, {0x06678100, 0, 1, "9074: Asymmetric advanced function disk configuration"}, + {0x06678300, 0, 1, + "4040: Incomplete multipath connection between IOA and enclosure"}, + {0x06678400, 0, 1, + "4041: Incomplete multipath connection between enclosure and device"}, + {0x06678500, 0, 1, + "9075: Incomplete multipath connection between IOA and remote IOA"}, + {0x06678600, 0, 1, + "9076: Configuration error, missing remote IOA"}, + {0x06679100, 0, 1, + "4050: Enclosure does not support a required multipath function"}, {0x06690200, 0, 1, "9041: Array protection temporarily suspended"}, {0x06698200, 0, 1, @@ -331,6 +355,10 @@ struct ipr_error_table_t ipr_error_table[] = { "9072: Link not operational transition"}, {0x066B8200, 0, 1, "9032: Array exposed but still protected"}, + {0x066B9100, 0, 1, + "4061: Multipath redundancy level got better"}, + {0x066B9200, 0, 1, + "4060: Multipath redundancy level got worse"}, {0x07270000, 0, 0, "Failure due to other device"}, {0x07278000, 0, 1, From 5b7304fbfb74bfca6f7d5a88b28197e3f7f2743b Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 2 Aug 2006 14:57:51 -0500 Subject: [PATCH 0072/1063] [SCSI] ipr: Properly handle IOA recovered errors The ipr driver currently translates adapter recovered errors to DID_ERROR. This patch fixes this to translate these errors to success instead. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 7f2c5cfc57ba..55c0156e36b0 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4218,7 +4218,8 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, case IPR_IOASC_NR_INIT_CMD_REQUIRED: break; default: - scsi_cmd->result |= (DID_ERROR << 16); + if (IPR_IOASC_SENSE_KEY(ioasc) > RECOVERED_ERROR) + scsi_cmd->result |= (DID_ERROR << 16); if (!ipr_is_vset_device(res) && !ipr_is_naca_model(res)) res->needs_sync_complete = 1; break; From 117d2ce1cea25fc94302ff418ccef644cd3e59af Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 2 Aug 2006 14:57:58 -0500 Subject: [PATCH 0073/1063] [SCSI] ipr: Auto sense handling fix Fix up a logic error in the checking for valid sense data. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 55c0156e36b0..7ed4eef8347b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4127,8 +4127,7 @@ static int ipr_get_autosense(struct ipr_cmnd *ipr_cmd) { struct ipr_ioasa *ioasa = &ipr_cmd->ioasa; - if ((be32_to_cpu(ioasa->ioasc_specific) & - (IPR_ADDITIONAL_STATUS_FMT | IPR_AUTOSENSE_VALID)) == 0) + if ((be32_to_cpu(ioasa->ioasc_specific) & IPR_AUTOSENSE_VALID) == 0) return 0; memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa->auto_sense.data, From 008cd5bbfb4763322837cd1f7c621f02ebe22fef Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 2 Aug 2006 14:58:04 -0500 Subject: [PATCH 0074/1063] [SCSI] ipr: Bump driver version to 2.1.4 Bump the ipr driver version to 2.1.4. Signed-off-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 1e9c1227fdae..11eaff524327 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -36,8 +36,8 @@ /* * Literals */ -#define IPR_DRIVER_VERSION "2.1.3" -#define IPR_DRIVER_DATE "(March 29, 2006)" +#define IPR_DRIVER_VERSION "2.1.4" +#define IPR_DRIVER_DATE "(August 2, 2006)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding From 4ff36718ede26ee2da73f2dae94d71e2b06845fc Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 4 Jul 2006 12:15:20 -0600 Subject: [PATCH 0075/1063] [SCSI] Improve inquiry printing - Replace scsi_device_types array API with scsi_device_type function API. Gets rid of a lot of common code, as well as being easier to use. - Add the new device types in SPC4 r05a, and rename some of the older ones. - Reformat the printing of inquiry data; now fits on one line and includes PQ. I think I've addressed all the feedback from the previous versions. My current test box prints: scsi 2:0:1:0: Direct access HP 18.2G ATLAS10K3_18_SCA HP05 PQ: 0 ANSI: 2 Signed-off-by: Matthew Wilcox Signed-off-by: James Bottomley --- drivers/block/cciss_scsi.c | 14 +++----- drivers/scsi/fcal.c | 3 +- drivers/scsi/g_NCR5380.c | 3 +- drivers/scsi/megaraid.c | 4 +-- drivers/scsi/scsi.c | 36 +++++++++++++++------ drivers/scsi/scsi_proc.c | 4 +-- drivers/scsi/scsi_scan.c | 66 +++++--------------------------------- include/scsi/scsi.h | 10 ++---- 8 files changed, 46 insertions(+), 94 deletions(-) diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index afdff32f6724..05f79d7393f7 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -251,10 +251,6 @@ scsi_cmd_stack_free(int ctlr) stk->pool = NULL; } -/* scsi_device_types comes from scsi.h */ -#define DEVICETYPE(n) (n<0 || n>MAX_SCSI_DEVICE_CODE) ? \ - "Unknown" : scsi_device_types[n] - #if 0 static int xmargin=8; static int amargin=60; @@ -389,7 +385,7 @@ cciss_scsi_add_entry(int ctlr, int hostno, time anyway (the scsi layer's inquiries will show that info) */ if (hostno != -1) printk("cciss%d: %s device c%db%dt%dl%d added.\n", - ctlr, DEVICETYPE(sd->devtype), hostno, + ctlr, scsi_device_type(sd->devtype), hostno, sd->bus, sd->target, sd->lun); return 0; } @@ -407,7 +403,7 @@ cciss_scsi_remove_entry(int ctlr, int hostno, int entry) ccissscsi[ctlr].dev[i] = ccissscsi[ctlr].dev[i+1]; ccissscsi[ctlr].ndevices--; printk("cciss%d: %s device c%db%dt%dl%d removed.\n", - ctlr, DEVICETYPE(sd.devtype), hostno, + ctlr, scsi_device_type(sd.devtype), hostno, sd.bus, sd.target, sd.lun); } @@ -458,7 +454,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, if (found == 0) { /* device no longer present. */ changes++; /* printk("cciss%d: %s device c%db%dt%dl%d removed.\n", - ctlr, DEVICETYPE(csd->devtype), hostno, + ctlr, scsi_device_type(csd->devtype), hostno, csd->bus, csd->target, csd->lun); */ cciss_scsi_remove_entry(ctlr, hostno, i); /* note, i not incremented */ @@ -468,7 +464,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, printk("cciss%d: device c%db%dt%dl%d type changed " "(device type now %s).\n", ctlr, hostno, csd->bus, csd->target, csd->lun, - DEVICETYPE(csd->devtype)); + scsi_device_type(csd->devtype)); csd->devtype = sd[j].devtype; i++; /* so just move along. */ } else /* device is same as it ever was, */ @@ -1098,7 +1094,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { printk(KERN_INFO "cciss%d: %s ignored, " "too many devices.\n", cntl_num, - DEVICETYPE(devtype)); + scsi_device_type(devtype)); break; } memcpy(¤tsd[ncurrent].scsi3addr[0], diff --git a/drivers/scsi/fcal.c b/drivers/scsi/fcal.c index 7f891023aa15..c4e16c0775de 100644 --- a/drivers/scsi/fcal.c +++ b/drivers/scsi/fcal.c @@ -248,8 +248,7 @@ int fcal_proc_info (struct Scsi_Host *host, char *buffer, char **start, off_t of if (scd->id == target) { SPRINTF (" [AL-PA: %02x, Id: %02d, Port WWN: %08x%08x, Node WWN: %08x%08x] ", alpa, target, u1[0], u1[1], u2[0], u2[1]); - SPRINTF ("%s ", (scd->type < MAX_SCSI_DEVICE_CODE) ? - scsi_device_types[(short) scd->type] : "Unknown device"); + SPRINTF ("%s ", scsi_device_type(scd->type)); for (j = 0; (j < 8) && (scd->vendor[j] >= 0x20); j++) SPRINTF ("%c", scd->vendor[j]); diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c index 67f1100f3103..cdd893bb4e28 100644 --- a/drivers/scsi/g_NCR5380.c +++ b/drivers/scsi/g_NCR5380.c @@ -811,7 +811,6 @@ static int generic_NCR5380_proc_info(struct Scsi_Host *scsi_ptr, char *buffer, c struct NCR5380_hostdata *hostdata; #ifdef NCR5380_STATS struct scsi_device *dev; - extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE]; #endif NCR5380_setup(scsi_ptr); @@ -851,7 +850,7 @@ static int generic_NCR5380_proc_info(struct Scsi_Host *scsi_ptr, char *buffer, c long tr = hostdata->time_read[dev->id] / HZ; long tw = hostdata->time_write[dev->id] / HZ; - PRINTP(" T:%d %s " ANDP dev->id ANDP(dev->type < MAX_SCSI_DEVICE_CODE) ? scsi_device_types[(int) dev->type] : "Unknown"); + PRINTP(" T:%d %s " ANDP dev->id ANDP scsi_device_type(dev->type)); for (i = 0; i < 8; i++) if (dev->vendor[i] >= 0x20) *(buffer + (len++)) = dev->vendor[i]; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 76edbb639d37..ccb0055ac73a 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -2822,9 +2822,7 @@ mega_print_inquiry(char *page, char *scsi_inq) i = scsi_inq[0] & 0x1f; - len += sprintf(page+len, " Type: %s ", - i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : - "Unknown "); + len += sprintf(page+len, " Type: %s ", scsi_device_type(i)); len += sprintf(page+len, " ANSI SCSI revision: %02x", scsi_inq[2] & 0x07); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index b332caddd5b3..94df671d776a 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -96,24 +96,40 @@ unsigned int scsi_logging_level; EXPORT_SYMBOL(scsi_logging_level); #endif -const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] = { - "Direct-Access ", - "Sequential-Access", +static const char *const scsi_device_types[] = { + "Direct access ", + "Sequential access", "Printer ", "Processor ", "WORM ", - "CD-ROM ", + "CD/DVD ", "Scanner ", - "Optical Device ", - "Medium Changer ", + "Optical memory ", + "Media changer ", "Communications ", - "Unknown ", - "Unknown ", + "ASC IT8 ", + "ASC IT8 ", "RAID ", "Enclosure ", - "Direct-Access-RBC", + "Direct access RBC", + "Optical card ", + "Bridge controller", + "Object storage ", + "Automation/Drive ", }; -EXPORT_SYMBOL(scsi_device_types); + +const char * scsi_device_type(unsigned type) +{ + if (type == 0x1e) + return "Well-known LUN "; + if (type == 0x1f) + return "No Device "; + if (type > ARRAY_SIZE(scsi_device_types)) + return "Unknown "; + return scsi_device_types[type]; +} + +EXPORT_SYMBOL(scsi_device_type); struct scsi_host_cmd_pool { kmem_cache_t *slab; diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 55200e4fdf11..524a5f7a5193 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -178,9 +178,7 @@ static int proc_print_scsidevice(struct device *dev, void *data) seq_printf(s, "\n"); - seq_printf(s, " Type: %s ", - sdev->type < MAX_SCSI_DEVICE_CODE ? - scsi_device_types[(int) sdev->type] : "Unknown "); + seq_printf(s, " Type: %s ", scsi_device_type(sdev->type)); seq_printf(s, " ANSI" " SCSI revision: %02x", (sdev->scsi_level - 1) ? sdev->scsi_level - 1 : 1); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 1bd92b9b46d9..180399406510 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -133,59 +133,6 @@ static void scsi_unlock_floptical(struct scsi_device *sdev, SCSI_TIMEOUT, 3); } -/** - * print_inquiry - printk the inquiry information - * @inq_result: printk this SCSI INQUIRY - * - * Description: - * printk the vendor, model, and other information found in the - * INQUIRY data in @inq_result. - * - * Notes: - * Remove this, and replace with a hotplug event that logs any - * relevant information. - **/ -static void print_inquiry(unsigned char *inq_result) -{ - int i; - - printk(KERN_NOTICE " Vendor: "); - for (i = 8; i < 16; i++) - if (inq_result[i] >= 0x20 && i < inq_result[4] + 5) - printk("%c", inq_result[i]); - else - printk(" "); - - printk(" Model: "); - for (i = 16; i < 32; i++) - if (inq_result[i] >= 0x20 && i < inq_result[4] + 5) - printk("%c", inq_result[i]); - else - printk(" "); - - printk(" Rev: "); - for (i = 32; i < 36; i++) - if (inq_result[i] >= 0x20 && i < inq_result[4] + 5) - printk("%c", inq_result[i]); - else - printk(" "); - - printk("\n"); - - i = inq_result[0] & 0x1f; - - printk(KERN_NOTICE " Type: %s ", - i < - MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : - "Unknown "); - printk(" ANSI SCSI revision: %02x", - inq_result[2] & 0x07); - if ((inq_result[2] & 0x07) == 1 && (inq_result[3] & 0x0f) == 1) - printk(" CCS\n"); - else - printk("\n"); -} - /** * scsi_alloc_sdev - allocate and setup a scsi_Device * @@ -653,9 +600,8 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) if (*bflags & BLIST_ISROM) { /* * It would be better to modify sdev->type, and set - * sdev->removable, but then the print_inquiry() output - * would not show TYPE_ROM; if print_inquiry() is removed - * the issue goes away. + * sdev->removable; this can now be done since + * print_inquiry has gone away. */ inq_result[0] = TYPE_ROM; inq_result[1] |= 0x80; /* removable */ @@ -684,8 +630,6 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) printk(KERN_INFO "scsi: unknown device type %d\n", sdev->type); } - print_inquiry(inq_result); - /* * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI * spec says: The device server is capable of supporting the @@ -715,6 +659,12 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) if (inq_result[7] & 0x10) sdev->sdtr = 1; + sdev_printk(KERN_NOTICE "scsi", sdev, "%s %.8s %.16s %.4s PQ: %d " + "ANSI: %d%s\n", scsi_device_type(sdev->type), + sdev->vendor, sdev->model, sdev->rev, + sdev->inq_periph_qual, inq_result[2] & 0x07, + (inq_result[3] & 0x0f) == 1 ? " CCS" : ""); + /* * End sysfs code. */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index c60b8ff2f5e4..1bc675201413 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -24,13 +24,6 @@ extern const unsigned char scsi_command_size[8]; #define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7] -/* - * SCSI device types - */ - -#define MAX_SCSI_DEVICE_CODE 15 -extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE]; - /* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. @@ -225,6 +218,9 @@ static inline int scsi_status_is_good(int status) #define TYPE_RBC 0x0e #define TYPE_NO_LUN 0x7f +/* Returns a human-readable name for the device */ +extern const char * scsi_device_type(unsigned type); + /* * standard mode-select header prepended to all mode-select commands */ From 19ac0db3e22de3b00cc4aadc7efbad0420c7aa08 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 6 Aug 2006 18:15:22 -0500 Subject: [PATCH 0076/1063] [SCSI] fix up short inquiry printing A recent drivers base commit: 3e95637a48820ff8bedb33e6439def96ccff1de5 Caused the bus to be added to dev_printk, so now our SCSI inquiry short messages print like this: scsiscsi 2:0:0:0: Direct access IBM-ESXS ST973401SS B519 PQ: 0 ANSI: 5 Just remove the "scsi" from the sdev_printk to compensate. Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 180399406510..114e2067dce5 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -659,7 +659,7 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) if (inq_result[7] & 0x10) sdev->sdtr = 1; - sdev_printk(KERN_NOTICE "scsi", sdev, "%s %.8s %.16s %.4s PQ: %d " + sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d " "ANSI: %d%s\n", scsi_device_type(sdev->type), sdev->vendor, sdev->model, sdev->rev, sdev->inq_periph_qual, inq_result[2] & 0x07, From afd05423e02bc7391a7489b686ba1e166b6e8349 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 28 Jul 2006 13:58:37 +1000 Subject: [PATCH 0077/1063] [POWERPC] Enable PURR sysfs entry correctly We have CPU_FTR_PURR now, so let's use it. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ae927a4e46e4..406f308ddead 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -230,7 +230,7 @@ static void register_cpu_online(unsigned int cpu) if (cur_cpu_spec->num_pmcs >= 8) sysdev_create_file(s, &attr_pmc8); - if (cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_PURR)) sysdev_create_file(s, &attr_purr); } @@ -272,7 +272,7 @@ static void unregister_cpu_online(unsigned int cpu) if (cur_cpu_spec->num_pmcs >= 8) sysdev_remove_file(s, &attr_pmc8); - if (cpu_has_feature(CPU_FTR_SMT)) + if (cpu_has_feature(CPU_FTR_PURR)) sysdev_remove_file(s, &attr_purr); } #endif /* CONFIG_HOTPLUG_CPU */ From 919fede6edab94cccb3ca8c1c0b32fa62c9369a5 Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Mon, 31 Jul 2006 15:35:41 -0500 Subject: [PATCH 0078/1063] [POWERPC] Rewrite the PPC 86xx IRQ handling to use Flat Device Tree IRQ setup now comes from the Flat Device Tree and use the new generic IRQ code. Fixed the fsl_soc.c IRQ OF interrupt node parsing. Removed some unused MPC86xx macro definition. Signed-off-by: Zhang Wei Signed-off-by: Jon Loeliger Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/86xx/mpc8641_hpcn.h | 32 -- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 330 +++++++++++---------- arch/powerpc/sysdev/fsl_soc.c | 30 +- 3 files changed, 191 insertions(+), 201 deletions(-) diff --git a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h index 5d2bcf78cef7..41e554c4af94 100644 --- a/arch/powerpc/platforms/86xx/mpc8641_hpcn.h +++ b/arch/powerpc/platforms/86xx/mpc8641_hpcn.h @@ -16,38 +16,6 @@ #include -/* PCI interrupt controller */ -#define PIRQA 3 -#define PIRQB 4 -#define PIRQC 5 -#define PIRQD 6 -#define PIRQ7 7 -#define PIRQE 9 -#define PIRQF 10 -#define PIRQG 11 -#define PIRQH 12 - -/* PCI-Express memory map */ -#define MPC86XX_PCIE_LOWER_IO 0x00000000 -#define MPC86XX_PCIE_UPPER_IO 0x00ffffff - -#define MPC86XX_PCIE_LOWER_MEM 0x80000000 -#define MPC86XX_PCIE_UPPER_MEM 0x9fffffff - -#define MPC86XX_PCIE_IO_BASE 0xe2000000 -#define MPC86XX_PCIE_MEM_OFFSET 0x00000000 - -#define MPC86XX_PCIE_IO_SIZE 0x01000000 - -#define PCIE1_CFG_ADDR_OFFSET (0x8000) -#define PCIE1_CFG_DATA_OFFSET (0x8004) - -#define PCIE2_CFG_ADDR_OFFSET (0x9000) -#define PCIE2_CFG_DATA_OFFSET (0x9004) - -#define MPC86xx_PCIE_OFFSET PCIE1_CFG_ADDR_OFFSET -#define MPC86xx_PCIE_SIZE (0x1000) - #define MPC86XX_RSTCR_OFFSET (0xe00b0) /* Reset Control Register */ #endif /* __MPC8641_HPCN_H__ */ diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 839090682ab2..4a33d95e7ad7 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -37,6 +37,14 @@ #include "mpc86xx.h" #include "mpc8641_hpcn.h" +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) do { printk(KERN_ERR fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + #ifndef CONFIG_PCI unsigned long isa_io_base = 0; unsigned long isa_mem_base = 0; @@ -44,205 +52,215 @@ unsigned long pci_dram_offset = 0; #endif -/* - * Internal interrupts are all Level Sensitive, and Positive Polarity - */ - -static u_char mpc86xx_hpcn_openpic_initsenses[] __initdata = { - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 0: Reserved */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 1: MCM */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 2: DDR DRAM */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 3: LBIU */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 4: DMA 0 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 5: DMA 1 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 6: DMA 2 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 7: DMA 3 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 8: PCIE1 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 9: PCIE2 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 10: Reserved */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 11: Reserved */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 12: DUART2 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 13: TSEC 1 Transmit */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 14: TSEC 1 Receive */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 15: TSEC 3 transmit */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 16: TSEC 3 receive */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 17: TSEC 3 error */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 18: TSEC 1 Receive/Transmit Error */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 19: TSEC 2 Transmit */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 20: TSEC 2 Receive */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 21: TSEC 4 transmit */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 22: TSEC 4 receive */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 23: TSEC 4 error */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 24: TSEC 2 Receive/Transmit Error */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 25: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 26: DUART1 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 27: I2C */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 28: Performance Monitor */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 29: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 30: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 31: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 32: SRIO error/write-port unit */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 33: SRIO outbound doorbell */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 34: SRIO inbound doorbell */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 35: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 36: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 37: SRIO outbound message unit 1 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 38: SRIO inbound message unit 1 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 39: SRIO outbound message unit 2 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 40: SRIO inbound message unit 2 */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 41: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 42: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 43: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 44: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 45: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 46: Unused */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* Internal 47: Unused */ - 0x0, /* External 0: */ - 0x0, /* External 1: */ - 0x0, /* External 2: */ - 0x0, /* External 3: */ - 0x0, /* External 4: */ - 0x0, /* External 5: */ - 0x0, /* External 6: */ - 0x0, /* External 7: */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* External 8: Pixis FPGA */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE), /* External 9: ULI 8259 INTR Cascade */ - (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* External 10: Quad ETH PHY */ - 0x0, /* External 11: */ - 0x0, - 0x0, - 0x0, - 0x0, -}; - +static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc, + struct pt_regs *regs) +{ + unsigned int cascade_irq = i8259_irq(regs); + if (cascade_irq != NO_IRQ) + generic_handle_irq(cascade_irq, regs); + desc->chip->eoi(irq); +} void __init mpc86xx_hpcn_init_irq(void) { struct mpic *mpic1; + struct device_node *np, *cascade_node = NULL; + int cascade_irq; phys_addr_t openpic_paddr; + np = of_find_node_by_type(NULL, "open-pic"); + if (np == NULL) + return; + /* Determine the Physical Address of the OpenPIC regs */ openpic_paddr = get_immrbase() + MPC86xx_OPENPIC_OFFSET; /* Alloc mpic structure and per isu has 16 INT entries. */ - mpic1 = mpic_alloc(openpic_paddr, + mpic1 = mpic_alloc(np, openpic_paddr, MPIC_PRIMARY | MPIC_WANTS_RESET | MPIC_BIG_ENDIAN, - 16, MPC86xx_OPENPIC_IRQ_OFFSET, 0, 250, - mpc86xx_hpcn_openpic_initsenses, - sizeof(mpc86xx_hpcn_openpic_initsenses), + 16, NR_IRQS - 4, " MPIC "); BUG_ON(mpic1 == NULL); - /* 48 Internal Interrupts */ - mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10200); - mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10400); - mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10600); + mpic_assign_isu(mpic1, 0, openpic_paddr + 0x10000); - /* 16 External interrupts */ - mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10000); + /* 48 Internal Interrupts */ + mpic_assign_isu(mpic1, 1, openpic_paddr + 0x10200); + mpic_assign_isu(mpic1, 2, openpic_paddr + 0x10400); + mpic_assign_isu(mpic1, 3, openpic_paddr + 0x10600); + + /* 16 External interrupts + * Moving them from [0 - 15] to [64 - 79] + */ + mpic_assign_isu(mpic1, 4, openpic_paddr + 0x10000); mpic_init(mpic1); #ifdef CONFIG_PCI - mpic_setup_cascade(MPC86xx_IRQ_EXT9, i8259_irq_cascade, NULL); - i8259_init(0, I8259_OFFSET); + /* Initialize i8259 controller */ + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + cascade_node = np; + break; + } + if (cascade_node == NULL) { + printk(KERN_DEBUG "mpc86xxhpcn: no ISA interrupt controller\n"); + return; + } + + cascade_irq = irq_of_parse_and_map(cascade_node, 0); + if (cascade_irq == NO_IRQ) { + printk(KERN_ERR "mpc86xxhpcn: failed to map cascade interrupt"); + return; + } + DBG("mpc86xxhpcn: cascade mapped to irq %d\n", cascade_irq); + + i8259_init(cascade_node, 0); + set_irq_chained_handler(cascade_irq, mpc86xx_8259_cascade); #endif } - - #ifdef CONFIG_PCI -/* - * interrupt routing - */ -int -mpc86xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) +enum pirq{PIRQA = 8, PIRQB, PIRQC, PIRQD, PIRQE, PIRQF, PIRQG, PIRQH}; +const unsigned char uli1575_irq_route_table[16] = { + 0, /* 0: Reserved */ + 0x8, /* 1: 0b1000 */ + 0, /* 2: Reserved */ + 0x2, /* 3: 0b0010 */ + 0x4, /* 4: 0b0100 */ + 0x5, /* 5: 0b0101 */ + 0x7, /* 6: 0b0111 */ + 0x6, /* 7: 0b0110 */ + 0, /* 8: Reserved */ + 0x1, /* 9: 0b0001 */ + 0x3, /* 10: 0b0011 */ + 0x9, /* 11: 0b1001 */ + 0xb, /* 12: 0b1011 */ + 0, /* 13: Reserved */ + 0xd, /* 14, 0b1101 */ + 0xf, /* 15, 0b1111 */ +}; + +static int __devinit +get_pci_irq_from_of(struct pci_controller *hose, int slot, int pin) { - static char pci_irq_table[][4] = { - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - {PIRQA, PIRQB, PIRQC, PIRQD}, /* IDSEL 17 -- PCI Slot 1 */ - {PIRQB, PIRQC, PIRQD, PIRQA}, /* IDSEL 18 -- PCI Slot 2 */ - {0, 0, 0, 0}, /* IDSEL 19 */ - {0, 0, 0, 0}, /* IDSEL 20 */ - {0, 0, 0, 0}, /* IDSEL 21 */ - {0, 0, 0, 0}, /* IDSEL 22 */ - {0, 0, 0, 0}, /* IDSEL 23 */ - {0, 0, 0, 0}, /* IDSEL 24 */ - {0, 0, 0, 0}, /* IDSEL 25 */ - {PIRQD, PIRQA, PIRQB, PIRQC}, /* IDSEL 26 -- PCI Bridge*/ - {PIRQC, 0, 0, 0}, /* IDSEL 27 -- LAN */ - {PIRQE, PIRQF, PIRQH, PIRQ7}, /* IDSEL 28 -- USB 1.1 */ - {PIRQE, PIRQF, PIRQG, 0}, /* IDSEL 29 -- Audio & Modem */ - {PIRQH, 0, 0, 0}, /* IDSEL 30 -- LPC & PMU*/ - {PIRQD, 0, 0, 0}, /* IDSEL 31 -- ATA */ - }; + struct of_irq oirq; + u32 laddr[3]; + struct device_node *hosenode = hose ? hose->arch_data : NULL; - const long min_idsel = 17, max_idsel = 31, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP + I8259_OFFSET; + if (!hosenode) return -EINVAL; + + laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(slot, 0) << 8); + laddr[1] = laddr[2] = 0; + of_irq_map_raw(hosenode, &pin, laddr, &oirq); + DBG("mpc86xx_hpcn: pci irq addr %x, slot %d, pin %d, irq %d\n", + laddr[0], slot, pin, oirq.specifier[0]); + return oirq.specifier[0]; } -static void __devinit quirk_ali1575(struct pci_dev *dev) +static void __devinit quirk_uli1575(struct pci_dev *dev) { unsigned short temp; + struct pci_controller *hose = pci_bus_to_host(dev->bus); + unsigned char irq2pin[16]; + unsigned long pirq_map_word = 0; + u32 irq; + int i; /* - * ALI1575 interrupts route table setup: - * - * IRQ pin IRQ# - * PIRQA ---- 3 - * PIRQB ---- 4 - * PIRQC ---- 5 - * PIRQD ---- 6 - * PIRQE ---- 9 - * PIRQF ---- 10 - * PIRQG ---- 11 - * PIRQH ---- 12 + * ULI1575 interrupts route setup + */ + memset(irq2pin, 0, 16); /* Initialize default value 0 */ + + /* + * PIRQA -> PIRQD mapping read from OF-tree * * interrupts for PCI slot0 -- PIRQA / PIRQB / PIRQC / PIRQD * PCI slot1 -- PIRQB / PIRQC / PIRQD / PIRQA */ - pci_write_config_dword(dev, 0x48, 0xb9317542); + for (i = 0; i < 4; i++){ + irq = get_pci_irq_from_of(hose, 17, i + 1); + if (irq > 0 && irq < 16) + irq2pin[irq] = PIRQA + i; + else + printk(KERN_WARNING "ULI1575 device" + "(slot %d, pin %d) irq %d is invalid.\n", + 17, i, irq); + } - /* USB 1.1 OHCI controller 1, interrupt: PIRQE */ - pci_write_config_byte(dev, 0x86, 0x0c); + /* + * PIRQE -> PIRQF mapping set manually + * + * IRQ pin IRQ# + * PIRQE ---- 9 + * PIRQF ---- 10 + * PIRQG ---- 11 + * PIRQH ---- 12 + */ + for (i = 0; i < 4; i++) irq2pin[i + 9] = PIRQE + i; - /* USB 1.1 OHCI controller 2, interrupt: PIRQF */ - pci_write_config_byte(dev, 0x87, 0x0d); + /* Set IRQ-PIRQ Mapping to ULI1575 */ + for (i = 0; i < 16; i++) + if (irq2pin[i]) + pirq_map_word |= (uli1575_irq_route_table[i] & 0xf) + << ((irq2pin[i] - PIRQA) * 4); - /* USB 1.1 OHCI controller 3, interrupt: PIRQH */ - pci_write_config_byte(dev, 0x88, 0x0f); + /* ULI1575 IRQ mapping conf register default value is 0xb9317542 */ + DBG("Setup ULI1575 IRQ mapping configuration register value = 0x%x\n", + pirq_map_word); + pci_write_config_dword(dev, 0x48, pirq_map_word); - /* USB 2.0 controller, interrupt: PIRQ7 */ - pci_write_config_byte(dev, 0x74, 0x06); +#define ULI1575_SET_DEV_IRQ(slot, pin, reg) \ + do { \ + int irq; \ + irq = get_pci_irq_from_of(hose, slot, pin); \ + if (irq > 0 && irq < 16) \ + pci_write_config_byte(dev, reg, irq2pin[irq]); \ + else \ + printk(KERN_WARNING "ULI1575 device" \ + "(slot %d, pin %d) irq %d is invalid.\n", \ + slot, pin, irq); \ + } while(0) - /* Audio controller, interrupt: PIRQE */ - pci_write_config_byte(dev, 0x8a, 0x0c); + /* USB 1.1 OHCI controller 1, slot 28, pin 1 */ + ULI1575_SET_DEV_IRQ(28, 1, 0x86); - /* Modem controller, interrupt: PIRQF */ - pci_write_config_byte(dev, 0x8b, 0x0d); + /* USB 1.1 OHCI controller 2, slot 28, pin 2 */ + ULI1575_SET_DEV_IRQ(28, 2, 0x87); - /* HD audio controller, interrupt: PIRQG */ - pci_write_config_byte(dev, 0x8c, 0x0e); + /* USB 1.1 OHCI controller 3, slot 28, pin 3 */ + ULI1575_SET_DEV_IRQ(28, 3, 0x88); - /* Serial ATA interrupt: PIRQD */ - pci_write_config_byte(dev, 0x8d, 0x0b); + /* USB 2.0 controller, slot 28, pin 4 */ + irq = get_pci_irq_from_of(hose, 28, 4); + if (irq >= 0 && irq <=15) + pci_write_config_dword(dev, 0x74, uli1575_irq_route_table[irq]); - /* SMB interrupt: PIRQH */ - pci_write_config_byte(dev, 0x8e, 0x0f); + /* Audio controller, slot 29, pin 1 */ + ULI1575_SET_DEV_IRQ(29, 1, 0x8a); - /* PMU ACPI SCI interrupt: PIRQH */ - pci_write_config_byte(dev, 0x8f, 0x0f); + /* Modem controller, slot 29, pin 2 */ + ULI1575_SET_DEV_IRQ(29, 2, 0x8b); + + /* HD audio controller, slot 29, pin 3 */ + ULI1575_SET_DEV_IRQ(29, 3, 0x8c); + + /* SMB interrupt: slot 30, pin 1 */ + ULI1575_SET_DEV_IRQ(30, 1, 0x8e); + + /* PMU ACPI SCI interrupt: slot 30, pin 2 */ + ULI1575_SET_DEV_IRQ(30, 2, 0x8f); + + /* Serial ATA interrupt: slot 31, pin 1 */ + ULI1575_SET_DEV_IRQ(31, 1, 0x8d); /* Primary PATA IDE IRQ: 14 * Secondary PATA IDE IRQ: 15 */ - pci_write_config_byte(dev, 0x44, 0x3d); - pci_write_config_byte(dev, 0x75, 0x0f); + pci_write_config_byte(dev, 0x44, 0x30 | uli1575_irq_route_table[14]); + pci_write_config_byte(dev, 0x75, uli1575_irq_route_table[15]); /* Set IRQ14 and IRQ15 to legacy IRQs */ pci_read_config_word(dev, 0x46, &temp); @@ -264,6 +282,8 @@ static void __devinit quirk_ali1575(struct pci_dev *dev) */ outb(0xfa, 0x4d0); outb(0x1e, 0x4d1); + +#undef ULI1575_SET_DEV_IRQ } static void __devinit quirk_uli5288(struct pci_dev *dev) @@ -306,7 +326,7 @@ static void __devinit early_uli5249(struct pci_dev *dev) dev->class |= 0x1; } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_ali1575); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249); @@ -337,8 +357,6 @@ mpc86xx_hpcn_setup_arch(void) for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;) add_bridge(np); - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = mpc86xx_map_irq; ppc_md.pci_exclude_device = mpc86xx_exclude_device; #endif @@ -377,6 +395,15 @@ mpc86xx_hpcn_show_cpuinfo(struct seq_file *m) } +void __init mpc86xx_hpcn_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) + pci_read_irq_line(dev); +} + + /* * Called very early, device-tree isn't unflattened */ @@ -431,6 +458,7 @@ define_machine(mpc86xx_hpcn) { .setup_arch = mpc86xx_hpcn_setup_arch, .init_IRQ = mpc86xx_hpcn_init_irq, .show_cpuinfo = mpc86xx_hpcn_show_cpuinfo, + .pcibios_fixup = mpc86xx_hpcn_pcibios_fixup, .get_irq = mpic_get_irq, .restart = mpc86xx_restart, .time_init = mpc86xx_time_init, diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 07c47e8309ed..4a6aa640ac13 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -85,11 +85,8 @@ static int __init gfar_mdio_of_init(void) mdio_data.irq[k] = -1; while ((child = of_get_next_child(np, child)) != NULL) { - if (child->n_intrs) { - const u32 *id = - get_property(child, "reg", NULL); - mdio_data.irq[*id] = child->intrs[0].line; - } + const u32 *id = get_property(child, "reg", NULL); + mdio_data.irq[*id] = irq_of_parse_and_map(child, 0); } ret = @@ -131,6 +128,7 @@ static int __init gfar_of_init(void) const char *model; const void *mac_addr; const phandle *ph; + int n_res = 1; memset(r, 0, sizeof(r)); memset(&gfar_data, 0, sizeof(gfar_data)); @@ -139,8 +137,7 @@ static int __init gfar_of_init(void) if (ret) goto err; - r[1].start = np->intrs[0].line; - r[1].end = np->intrs[0].line; + r[1].start = r[1].end = irq_of_parse_and_map(np, 0); r[1].flags = IORESOURCE_IRQ; model = get_property(np, "model", NULL); @@ -150,19 +147,19 @@ static int __init gfar_of_init(void) r[1].name = gfar_tx_intr; r[2].name = gfar_rx_intr; - r[2].start = np->intrs[1].line; - r[2].end = np->intrs[1].line; + r[2].start = r[2].end = irq_of_parse_and_map(np, 1); r[2].flags = IORESOURCE_IRQ; r[3].name = gfar_err_intr; - r[3].start = np->intrs[2].line; - r[3].end = np->intrs[2].line; + r[3].start = r[3].end = irq_of_parse_and_map(np, 2); r[3].flags = IORESOURCE_IRQ; + + n_res += 2; } gfar_dev = platform_device_register_simple("fsl-gianfar", i, &r[0], - np->n_intrs + 1); + n_res + 1); if (IS_ERR(gfar_dev)) { ret = PTR_ERR(gfar_dev); @@ -251,8 +248,7 @@ static int __init fsl_i2c_of_init(void) if (ret) goto err; - r[1].start = np->intrs[0].line; - r[1].end = np->intrs[0].line; + r[1].start = r[1].end = irq_of_parse_and_map(np, 0); r[1].flags = IORESOURCE_IRQ; i2c_dev = platform_device_register_simple("fsl-i2c", i, r, 2); @@ -388,8 +384,7 @@ static int __init fsl_usb_of_init(void) if (ret) goto err; - r[1].start = np->intrs[0].line; - r[1].end = np->intrs[0].line; + r[1].start = r[1].end = irq_of_parse_and_map(np, 0); r[1].flags = IORESOURCE_IRQ; usb_dev_mph = @@ -437,8 +432,7 @@ static int __init fsl_usb_of_init(void) if (ret) goto unreg_mph; - r[1].start = np->intrs[0].line; - r[1].end = np->intrs[0].line; + r[1].start = r[1].end = irq_of_parse_and_map(np, 0); r[1].flags = IORESOURCE_IRQ; usb_dev_dr = From 45934c47237108903ec019f08e124e592ba0b6c2 Mon Sep 17 00:00:00 2001 From: Jake Moilanen Date: Thu, 27 Jul 2006 13:17:25 -0500 Subject: [PATCH 0079/1063] [POWERPC] Export msi symbols Forgot to export symbols for MSI. Signed-off-by: Jake Moilanen Acked-by: Segher Boessenkool Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/irq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 01bdae35cb55..b2ded6460a86 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -827,12 +828,14 @@ int pci_enable_msi(struct pci_dev * pdev) else return -1; } +EXPORT_SYMBOL(pci_enable_msi); void pci_disable_msi(struct pci_dev * pdev) { if (ppc_md.disable_msi) ppc_md.disable_msi(pdev); } +EXPORT_SYMBOL(pci_disable_msi); void pci_scan_msi_device(struct pci_dev *dev) {} int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) {return -1;} @@ -840,6 +843,8 @@ void pci_disable_msix(struct pci_dev *dev) {} void msi_remove_pci_irq_vectors(struct pci_dev *dev) {} void disable_msi_mode(struct pci_dev *dev, int pos, int type) {} void pci_no_msi(void) {} +EXPORT_SYMBOL(pci_enable_msix); +EXPORT_SYMBOL(pci_disable_msix); #endif From 3ab2b385c8a5cdf060c6a41582118a0cb27d0910 Mon Sep 17 00:00:00 2001 From: Amos Waterland Date: Tue, 1 Aug 2006 15:44:11 -0400 Subject: [PATCH 0080/1063] [POWERPC] Turn on tigon3 support in maple_defconfig I think that most people who use maple_defconfig are doing so for a JS21, so it might make sense to turn Tigon3 support on by default. Built and booted on a JS21. Signed-off-by: Amos Waterland Signed-off-by: Paul Mackerras --- arch/powerpc/configs/maple_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 80a0db43aeb7..27b18ca1549c 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -474,7 +474,7 @@ CONFIG_E1000=y # CONFIG_SKY2 is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set -# CONFIG_TIGON3 is not set +CONFIG_TIGON3=y # CONFIG_BNX2 is not set # CONFIG_MV643XX_ETH is not set From 40681b95a4ef798bc38c92e0d9b8c06bbdd34409 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Aug 2006 11:13:50 +1000 Subject: [PATCH 0081/1063] [POWERPC] Make doc comments extractable We don't have much in the way of doc comments, but some of those we do have don't work because they start with "/***" or "/*", not "/**" which is what kernel-doc requires. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/crash_dump.c | 2 +- arch/powerpc/sysdev/i8259.c | 2 +- include/asm-powerpc/irq.h | 24 ++++++++++++------------ include/asm-powerpc/prom.h | 8 ++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 371973be8d71..2f6f5a7bc69e 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -80,7 +80,7 @@ static int __init parse_savemaxmem(char *p) } __setup("savemaxmem=", parse_savemaxmem); -/* +/** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied * @buf: target memory address for the copy; this can be in kernel address diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 9855820b9548..26a6a3becd66 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -224,7 +224,7 @@ static struct irq_host_ops i8259_host_ops = { .xlate = i8259_host_xlate, }; -/**** +/** * i8259_init - Initialize the legacy controller * @node: device node of the legacy PIC (can be NULL, but then, it will match * all interrupts, so beware) diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index d903a62959da..4da41efb1319 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -137,7 +137,7 @@ struct irq_map_entry { extern struct irq_map_entry irq_map[NR_IRQS]; -/*** +/** * irq_alloc_host - Allocate a new irq_host data structure * @node: device-tree node of the interrupt controller * @revmap_type: type of reverse mapping to use @@ -159,14 +159,14 @@ extern struct irq_host *irq_alloc_host(unsigned int revmap_type, irq_hw_number_t inval_irq); -/*** +/** * irq_find_host - Locates a host for a given device node * @node: device-tree node of the interrupt controller */ extern struct irq_host *irq_find_host(struct device_node *node); -/*** +/** * irq_set_default_host - Set a "default" host * @host: default host pointer * @@ -178,7 +178,7 @@ extern struct irq_host *irq_find_host(struct device_node *node); extern void irq_set_default_host(struct irq_host *host); -/*** +/** * irq_set_virq_count - Set the maximum number of virt irqs * @count: number of linux virtual irqs, capped with NR_IRQS * @@ -188,7 +188,7 @@ extern void irq_set_default_host(struct irq_host *host); extern void irq_set_virq_count(unsigned int count); -/*** +/** * irq_create_mapping - Map a hardware interrupt into linux virq space * @host: host owning this hardware interrupt or NULL for default host * @hwirq: hardware irq number in that host space @@ -202,13 +202,13 @@ extern unsigned int irq_create_mapping(struct irq_host *host, irq_hw_number_t hwirq); -/*** +/** * irq_dispose_mapping - Unmap an interrupt * @virq: linux virq number of the interrupt to unmap */ extern void irq_dispose_mapping(unsigned int virq); -/*** +/** * irq_find_mapping - Find a linux virq from an hw irq number. * @host: host owning this hardware interrupt * @hwirq: hardware irq number in that host space @@ -221,7 +221,7 @@ extern unsigned int irq_find_mapping(struct irq_host *host, irq_hw_number_t hwirq); -/*** +/** * irq_radix_revmap - Find a linux virq from a hw irq number. * @host: host owning this hardware interrupt * @hwirq: hardware irq number in that host space @@ -232,7 +232,7 @@ extern unsigned int irq_find_mapping(struct irq_host *host, extern unsigned int irq_radix_revmap(struct irq_host *host, irq_hw_number_t hwirq); -/*** +/** * irq_linear_revmap - Find a linux virq from a hw irq number. * @host: host owning this hardware interrupt * @hwirq: hardware irq number in that host space @@ -247,7 +247,7 @@ extern unsigned int irq_linear_revmap(struct irq_host *host, -/*** +/** * irq_alloc_virt - Allocate virtual irq numbers * @host: host owning these new virtual irqs * @count: number of consecutive numbers to allocate @@ -261,7 +261,7 @@ extern unsigned int irq_alloc_virt(struct irq_host *host, unsigned int count, unsigned int hint); -/*** +/** * irq_free_virt - Free virtual irq numbers * @virq: virtual irq number of the first interrupt to free * @count: number of interrupts to free @@ -300,7 +300,7 @@ extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index); /* -- End OF helpers -- */ -/*** +/** * irq_early_init - Init irq remapping subsystem */ extern void irq_early_init(void); diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index 31bfea4686a6..7a457bd462a2 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -259,7 +259,7 @@ struct of_irq { u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */ }; -/*** +/** * of_irq_map_init - Initialize the irq remapper * @flags: flags defining workarounds to enable * @@ -272,7 +272,7 @@ struct of_irq { extern void of_irq_map_init(unsigned int flags); -/*** +/** * of_irq_map_raw - Low level interrupt tree parsing * @parent: the device interrupt parent * @intspec: interrupt specifier ("interrupts" property of the device) @@ -292,7 +292,7 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec, const u32 *addr, struct of_irq *out_irq); -/*** +/** * of_irq_map_one - Resolve an interrupt for a device * @device: the device whose interrupt is to be resolved * @index: index of the interrupt to resolve @@ -305,7 +305,7 @@ extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec, extern int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq); -/*** +/** * of_irq_map_pci - Resolve the interrupt for a PCI device * @pdev: the device whose interrupt is to be resolved * @out_irq: structure of_irq filled by this function From 3d7714867a8d240fae3ab0bde656a369de2b08ab Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Thu, 3 Aug 2006 16:27:57 -0500 Subject: [PATCH 0082/1063] [POWERPC] Add MPC8641 HPCN Device Tree Source file. As per list discussion, let's add device tree source files under powerpc/boot/dts. If nothing else, it is a starting point. Signed-off-by: Jon Loeliger Signed-off-by: Paul Mackerras --- arch/powerpc/boot/dts/mpc8641_hpcn.dts | 338 +++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 arch/powerpc/boot/dts/mpc8641_hpcn.dts diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts new file mode 100644 index 000000000000..e832a884d765 --- /dev/null +++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts @@ -0,0 +1,338 @@ +/* + * MPC8641 HPCN Device Tree Source + * + * Copyright 2006 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + + +/ { + model = "MPC8641HPCN"; + compatible = "mpc86xx"; + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #cpus = <2>; + #address-cells = <1>; + #size-cells = <0>; + + PowerPC,8641@0 { + device_type = "cpu"; + reg = <0>; + d-cache-line-size = <20>; // 32 bytes + i-cache-line-size = <20>; // 32 bytes + d-cache-size = <8000>; // L1, 32K + i-cache-size = <8000>; // L1, 32K + timebase-frequency = <0>; // 33 MHz, from uboot + bus-frequency = <0>; // From uboot + clock-frequency = <0>; // From uboot + 32-bit; + linux,boot-cpu; + }; + PowerPC,8641@1 { + device_type = "cpu"; + reg = <1>; + d-cache-line-size = <20>; // 32 bytes + i-cache-line-size = <20>; // 32 bytes + d-cache-size = <8000>; // L1, 32K + i-cache-size = <8000>; // L1, 32K + timebase-frequency = <0>; // 33 MHz, from uboot + bus-frequency = <0>; // From uboot + clock-frequency = <0>; // From uboot + 32-bit; + }; + }; + + memory { + device_type = "memory"; + reg = <00000000 40000000>; // 1G at 0x0 + }; + + soc8641@f8000000 { + #address-cells = <1>; + #size-cells = <1>; + #interrupt-cells = <2>; + device_type = "soc"; + ranges = <0 f8000000 00100000>; + reg = ; // CCSRBAR 1M + bus-frequency = <0>; + + i2c@3000 { + device_type = "i2c"; + compatible = "fsl-i2c"; + reg = <3000 100>; + interrupts = <2b 2>; + interrupt-parent = <40000>; + dfsrr; + }; + + i2c@3100 { + device_type = "i2c"; + compatible = "fsl-i2c"; + reg = <3100 100>; + interrupts = <2b 2>; + interrupt-parent = <40000>; + dfsrr; + }; + + mdio@24520 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "mdio"; + compatible = "gianfar"; + reg = <24520 20>; + linux,phandle = <24520>; + ethernet-phy@0 { + linux,phandle = <2452000>; + interrupt-parent = <40000>; + interrupts = <4a 1>; + reg = <0>; + device_type = "ethernet-phy"; + }; + ethernet-phy@1 { + linux,phandle = <2452001>; + interrupt-parent = <40000>; + interrupts = <4a 1>; + reg = <1>; + device_type = "ethernet-phy"; + }; + ethernet-phy@2 { + linux,phandle = <2452002>; + interrupt-parent = <40000>; + interrupts = <4a 1>; + reg = <2>; + device_type = "ethernet-phy"; + }; + ethernet-phy@3 { + linux,phandle = <2452003>; + interrupt-parent = <40000>; + interrupts = <4a 1>; + reg = <3>; + device_type = "ethernet-phy"; + }; + }; + + ethernet@24000 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <24000 1000>; + mac-address = [ 00 E0 0C 00 73 00 ]; + interrupts = <1d 2 1e 2 22 2>; + interrupt-parent = <40000>; + phy-handle = <2452000>; + }; + + ethernet@25000 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <25000 1000>; + mac-address = [ 00 E0 0C 00 73 01 ]; + interrupts = <23 2 24 2 28 2>; + interrupt-parent = <40000>; + phy-handle = <2452001>; + }; + + ethernet@26000 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <26000 1000>; + mac-address = [ 00 E0 0C 00 02 FD ]; + interrupts = <1F 2 20 2 21 2>; + interrupt-parent = <40000>; + phy-handle = <2452002>; + }; + + ethernet@27000 { + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; + model = "TSEC"; + compatible = "gianfar"; + reg = <27000 1000>; + mac-address = [ 00 E0 0C 00 03 FD ]; + interrupts = <25 2 26 2 27 2>; + interrupt-parent = <40000>; + phy-handle = <2452003>; + }; + serial@4500 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4500 100>; + clock-frequency = <0>; + interrupts = <2a 2>; + interrupt-parent = <40000>; + }; + + serial@4600 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4600 100>; + clock-frequency = <0>; + interrupts = <1c 2>; + interrupt-parent = <40000>; + }; + + pci@8000 { + compatible = "86xx"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + reg = <8000 1000>; + bus-range = <0 fe>; + ranges = <02000000 0 80000000 80000000 0 20000000 + 01000000 0 00000000 e2000000 0 00100000>; + clock-frequency = <1fca055>; + interrupt-parent = <40000>; + interrupts = <18 2>; + interrupt-map-mask = ; + interrupt-map = < + /* IDSEL 0x11 */ + 8800 0 0 1 4d0 3 2 + 8800 0 0 2 4d0 4 2 + 8800 0 0 3 4d0 5 2 + 8800 0 0 4 4d0 6 2 + + /* IDSEL 0x12 */ + 9000 0 0 1 4d0 4 2 + 9000 0 0 2 4d0 5 2 + 9000 0 0 3 4d0 6 2 + 9000 0 0 4 4d0 3 2 + + /* IDSEL 0x13 */ + 9800 0 0 1 4d0 0 0 + 9800 0 0 2 4d0 0 0 + 9800 0 0 3 4d0 0 0 + 9800 0 0 4 4d0 0 0 + + /* IDSEL 0x14 */ + a000 0 0 1 4d0 0 0 + a000 0 0 2 4d0 0 0 + a000 0 0 3 4d0 0 0 + a000 0 0 4 4d0 0 0 + + /* IDSEL 0x15 */ + a800 0 0 1 4d0 0 0 + a800 0 0 2 4d0 0 0 + a800 0 0 3 4d0 0 0 + a800 0 0 4 4d0 0 0 + + /* IDSEL 0x16 */ + b000 0 0 1 4d0 0 0 + b000 0 0 2 4d0 0 0 + b000 0 0 3 4d0 0 0 + b000 0 0 4 4d0 0 0 + + /* IDSEL 0x17 */ + b800 0 0 1 4d0 0 0 + b800 0 0 2 4d0 0 0 + b800 0 0 3 4d0 0 0 + b800 0 0 4 4d0 0 0 + + /* IDSEL 0x18 */ + c000 0 0 1 4d0 0 0 + c000 0 0 2 4d0 0 0 + c000 0 0 3 4d0 0 0 + c000 0 0 4 4d0 0 0 + + /* IDSEL 0x19 */ + c800 0 0 1 4d0 0 0 + c800 0 0 2 4d0 0 0 + c800 0 0 3 4d0 0 0 + c800 0 0 4 4d0 0 0 + + /* IDSEL 0x1a */ + d000 0 0 1 4d0 6 2 + d000 0 0 2 4d0 3 2 + d000 0 0 3 4d0 4 2 + d000 0 0 4 4d0 5 2 + + + /* IDSEL 0x1b */ + d800 0 0 1 4d0 5 2 + d800 0 0 2 4d0 0 0 + d800 0 0 3 4d0 0 0 + d800 0 0 4 4d0 0 0 + + /* IDSEL 0x1c */ + e000 0 0 1 4d0 9 2 + e000 0 0 2 4d0 a 2 + e000 0 0 3 4d0 c 2 + e000 0 0 4 4d0 7 2 + + /* IDSEL 0x1d */ + e800 0 0 1 4d0 9 2 + e800 0 0 2 4d0 a 2 + e800 0 0 3 4d0 b 2 + e800 0 0 4 4d0 0 0 + + /* IDSEL 0x1e */ + f000 0 0 1 4d0 c 2 + f000 0 0 2 4d0 0 0 + f000 0 0 3 4d0 0 0 + f000 0 0 4 4d0 0 0 + + /* IDSEL 0x1f */ + f800 0 0 1 4d0 6 2 + f800 0 0 2 4d0 0 0 + f800 0 0 3 4d0 0 0 + f800 0 0 4 4d0 0 0 + >; + i8259@4d0 { + clock-frequency = <0>; + interrupt-controller; + device_type = "interrupt-controller"; + #address-cells = <0>; + #interrupt-cells = <2>; + built-in; + compatible = "chrp,iic"; + big-endian; + interrupts = <49 2>; + interrupt-parent = <40000>; + }; + + }; + pic@40000 { + linux,phandle = <40000>; + clock-frequency = <0>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <2>; + reg = <40000 40000>; + built-in; + compatible = "chrp,open-pic"; + device_type = "open-pic"; + big-endian; + interrupts = < + 10 2 11 2 12 2 13 2 + 14 2 15 2 16 2 17 2 + 18 2 19 2 1a 2 1b 2 + 1c 2 1d 2 1e 2 1f 2 + 20 2 21 2 22 2 23 2 + 24 2 25 2 26 2 27 2 + 28 2 29 2 2a 2 2b 2 + 2c 2 2d 2 2e 2 2f 2 + 30 2 31 2 32 2 33 2 + 34 2 35 2 36 2 37 2 + 38 2 39 2 2a 2 3b 2 + 3c 2 3d 2 3e 2 3f 2 + 48 1 49 2 4a 1 + >; + interrupt-parent = <40000>; + }; + }; +}; From f4dddce57c105c447c566be52c3d210dec570a27 Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Fri, 4 Aug 2006 11:41:51 -0500 Subject: [PATCH 0083/1063] [POWERPC] Remove flush_dcache_all export Removes the flush_dcache_all export for non coherent platforms. We removed the last in-kernel user of this years ago in arch/ppc so it no longer serves a purpose. Plus, it breaks the build at the moment. Signed-off-by: Matt Porter Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/ppc_ksyms.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index f6a05f090b25..39d3bfcabcd2 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -126,10 +126,6 @@ EXPORT_SYMBOL(pci_bus_mem_base_phys); EXPORT_SYMBOL(pci_bus_to_hose); #endif /* CONFIG_PCI */ -#ifdef CONFIG_NOT_COHERENT_CACHE -EXPORT_SYMBOL(flush_dcache_all); -#endif - EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); From 452b5e21216011f2f068e80443568f5f3f3f4d63 Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Fri, 4 Aug 2006 11:44:01 -0500 Subject: [PATCH 0084/1063] [POWERPC] Fix powerpc 44x_mmu build The PIN_SIZE definition name changed, update 44x_mmu.c accordingly. Signed-off-by: Matt Porter Signed-off-by: Paul Mackerras --- arch/powerpc/mm/44x_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 376829ed2211..0a0a0487b334 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -103,7 +103,7 @@ unsigned long __init mmu_mapin_ram(void) /* Determine number of entries necessary to cover lowmem */ pinned_tlbs = (unsigned int) - (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT); + (_ALIGN(total_lowmem, PPC_PIN_SIZE) >> PPC44x_PIN_SHIFT); /* Write upper watermark to save location */ tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs; @@ -111,7 +111,7 @@ unsigned long __init mmu_mapin_ram(void) /* If necessary, set additional pinned TLBs */ if (pinned_tlbs > 1) for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) { - unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE; + unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC_PIN_SIZE; ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr); } From 2f6093c84730b4bad65bcd0f2f904a5769b1dfc5 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 7 Aug 2006 16:19:19 +1000 Subject: [PATCH 0085/1063] [POWERPC] Implement SLB shadow buffer This adds a shadow buffer for the SLBs and regsiters it with PHYP. Only the bolted SLB entries (top 3) are shadowed. The SLB shadow buffer tells the hypervisor what the kernel needs to have in the SLB for the kernel to be able to function. The hypervisor can use this information to speed up partition context switches. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/entry_64.S | 13 +++++++ arch/powerpc/kernel/paca.c | 15 +++++++- arch/powerpc/mm/slb.c | 37 +++++++++++++++++-- arch/powerpc/platforms/pseries/lpar.c | 24 ++++++++++-- .../platforms/pseries/plpar_wrappers.h | 10 +++++ arch/powerpc/platforms/pseries/setup.c | 12 +++++- include/asm-powerpc/lppaca.h | 19 ++++++++++ include/asm-powerpc/paca.h | 3 ++ 9 files changed, 124 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ac0631958b20..2ef7ea860379 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -135,11 +135,13 @@ int main(void) DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); + DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 54d9f5cdaab4..5baea498ea64 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -323,6 +323,11 @@ _GLOBAL(ret_from_fork) * The code which creates the new task context is in 'copy_thread' * in arch/powerpc/kernel/process.c */ +#define SHADOW_SLB_BOLTED_STACK_ESID \ + (SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1)) +#define SHADOW_SLB_BOLTED_STACK_VSID \ + (SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1) + 8) + .align 7 _GLOBAL(_switch) mflr r0 @@ -375,6 +380,14 @@ BEGIN_FTR_SECTION ld r7,KSP_VSID(r4) /* Get new stack's VSID */ oris r0,r6,(SLB_ESID_V)@h ori r0,r0,(SLB_NUM_BOLTED-1)@l + + /* Update the last bolted SLB */ + ld r9,PACA_SLBSHADOWPTR(r13) + li r12,0 + std r12,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Clear ESID */ + std r7,SHADOW_SLB_BOLTED_STACK_VSID(r9) /* Save VSID */ + std r0,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Save ESID */ + slbie r6 slbie r6 /* Workaround POWER5 < DD2.1 issue */ slbmte r7,r0 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index c68741fed14b..55f1a25085cd 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -17,6 +17,7 @@ #include #include #include +#include /* This symbol is provided by the linker - let it fill in the paca @@ -45,6 +46,17 @@ struct lppaca lppaca[] = { }, }; +/* + * 3 persistent SLBs are registered here. The buffer will be zero + * initially, hence will all be invaild until we actually write them. + */ +struct slb_shadow slb_shadow[] __cacheline_aligned = { + [0 ... (NR_CPUS-1)] = { + .persistent = SLB_NUM_BOLTED, + .buffer_length = sizeof(struct slb_shadow), + }, +}; + /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -59,7 +71,8 @@ struct lppaca lppaca[] = { .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .hw_cpu_id = 0xffff, + .hw_cpu_id = 0xffff, \ + .slb_shadow_ptr = &slb_shadow[number], #ifdef CONFIG_PPC_ISERIES #define PACA_INIT_ISERIES(number) \ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index de0c8842415c..d3733912adb4 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -50,9 +52,32 @@ static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; } -static inline void create_slbe(unsigned long ea, unsigned long flags, - unsigned long entry) +static inline void slb_shadow_update(unsigned long esid, unsigned long vsid, + unsigned long entry) { + /* + * Clear the ESID first so the entry is not valid while we are + * updating it. + */ + get_slb_shadow()->save_area[entry].esid = 0; + barrier(); + get_slb_shadow()->save_area[entry].vsid = vsid; + barrier(); + get_slb_shadow()->save_area[entry].esid = esid; + +} + +static inline void create_shadowed_slbe(unsigned long ea, unsigned long flags, + unsigned long entry) +{ + /* + * Updating the shadow buffer before writing the SLB ensures + * we don't get a stale entry here if we get preempted by PHYP + * between these two statements. + */ + slb_shadow_update(mk_esid_data(ea, entry), mk_vsid_data(ea, flags), + entry); + asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, flags)), "r" (mk_esid_data(ea, entry)) @@ -77,6 +102,10 @@ void slb_flush_and_rebolt(void) if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET) ksp_esid_data &= ~SLB_ESID_V; + /* Only third entry (stack) may change here so only resave that */ + slb_shadow_update(ksp_esid_data, + mk_vsid_data(ksp_esid_data, lflags), 2); + /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -209,9 +238,9 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(PAGE_OFFSET, lflags, 0); + create_shadowed_slbe(PAGE_OFFSET, lflags, 0); - create_slbe(VMALLOC_START, vflags, 1); + create_shadowed_slbe(VMALLOC_START, vflags, 1); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6cbf14266d5e..1820a0b0a8c6 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -252,18 +252,34 @@ out: void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); - unsigned long vpa = __pa(&lppaca[cpu]); + unsigned long addr; long ret; if (cpu_has_feature(CPU_FTR_ALTIVEC)) lppaca[cpu].vmxregs_in_use = 1; - ret = register_vpa(hwcpu, vpa); + addr = __pa(&lppaca[cpu]); + ret = register_vpa(hwcpu, addr); - if (ret) + if (ret) { printk(KERN_ERR "WARNING: vpa_init: VPA registration for " "cpu %d (hw %d) of area %lx returns %ld\n", - cpu, hwcpu, vpa, ret); + cpu, hwcpu, addr, ret); + return; + } + /* + * PAPR says this feature is SLB-Buffer but firmware never + * reports that. All SPLPAR support SLB shadow buffer. + */ + addr = __pa(&slb_shadow[cpu]); + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + ret = register_slb_shadow(hwcpu, addr); + if (ret) + printk(KERN_ERR + "WARNING: vpa_init: SLB shadow buffer " + "registration for cpu %d (hw %d) of area %lx " + "returns %ld\n", cpu, hwcpu, addr, ret); + } } long pSeries_lpar_hpte_insert(unsigned long hpte_group, diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index ebd15de7597e..3eb7b294d92f 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -37,6 +37,16 @@ static inline long register_vpa(unsigned long cpu, unsigned long vpa) return vpa_call(0x1, cpu, vpa); } +static inline long unregister_slb_shadow(unsigned long cpu, unsigned long vpa) +{ + return vpa_call(0x7, cpu, vpa); +} + +static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa) +{ + return vpa_call(0x3, cpu, vpa); +} + extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index de214d86ff44..6ebeecfd6bcb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -234,9 +234,17 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) { /* Don't risk a hypervisor call if we're crashing */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { - unsigned long vpa = __pa(get_lppaca()); + unsigned long addr; - if (unregister_vpa(hard_smp_processor_id(), vpa)) { + addr = __pa(get_slb_shadow()); + if (unregister_slb_shadow(hard_smp_processor_id(), addr)) + printk("SLB shadow buffer deregistration of " + "cpu %u (hw_cpu_id %d) failed\n", + smp_processor_id(), + hard_smp_processor_id()); + + addr = __pa(get_lppaca()); + if (unregister_vpa(hard_smp_processor_id(), addr)) { printk("VPA deregistration of cpu %u (hw_cpu_id %d) " "failed\n", smp_processor_id(), hard_smp_processor_id()); diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h index 4dc514aabfe7..942bb450baff 100644 --- a/include/asm-powerpc/lppaca.h +++ b/include/asm-powerpc/lppaca.h @@ -27,7 +27,9 @@ // // //---------------------------------------------------------------------------- +#include #include +#include /* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k * alignment is sufficient to prevent this */ @@ -133,5 +135,22 @@ struct lppaca { extern struct lppaca lppaca[]; +/* + * SLB shadow buffer structure as defined in the PAPR. The save_area + * contains adjacent ESID and VSID pairs for each shadowed SLB. The + * ESID is stored in the lower 64bits, then the VSID. + */ +struct slb_shadow { + u32 persistent; // Number of persistent SLBs x00-x03 + u32 buffer_length; // Total shadow buffer length x04-x07 + u64 reserved; // Alignment x08-x0f + struct { + u64 esid; + u64 vsid; + } save_area[SLB_NUM_BOLTED]; // x10-x40 +} ____cacheline_aligned; + +extern struct slb_shadow slb_shadow[]; + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h index 2d4585f06209..7ffa2512524e 100644 --- a/include/asm-powerpc/paca.h +++ b/include/asm-powerpc/paca.h @@ -23,6 +23,7 @@ register struct paca_struct *local_paca asm("r13"); #define get_paca() local_paca #define get_lppaca() (get_paca()->lppaca_ptr) +#define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; @@ -98,6 +99,8 @@ struct paca_struct { u64 user_time; /* accumulated usermode TB ticks */ u64 system_time; /* accumulated system TB ticks */ u64 startpurr; /* PURR/TB value snapshot */ + + struct slb_shadow *slb_shadow_ptr; }; extern struct paca_struct paca[]; From 5cf13911b1e72707b6f0eb39b2d819ec6e343d76 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 7 Aug 2006 17:34:50 +1000 Subject: [PATCH 0086/1063] [POWERPC] Update lppaca offset comments Update offset comments. No functional change. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- include/asm-powerpc/lppaca.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h index 942bb450baff..821ea0c512b4 100644 --- a/include/asm-powerpc/lppaca.h +++ b/include/asm-powerpc/lppaca.h @@ -116,7 +116,7 @@ struct lppaca { //============================================================================= -// CACHE_LINE_3 0x0100 - 0x007F: This line is shared with other processors +// CACHE_LINE_3 0x0100 - 0x017F: This line is shared with other processors //============================================================================= // This is the yield_count. An "odd" value (low bit on) means that // the processor is yielded (either because of an OS yield or a PLIC @@ -128,7 +128,7 @@ struct lppaca { u8 reserved6[124]; // Reserved x04-x7F //============================================================================= -// CACHE_LINE_4-5 0x0100 - 0x01FF Contains PMC interrupt data +// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data //============================================================================= u8 pmc_save_area[256]; // PMC interrupt Area x00-xFF } __attribute__((__aligned__(0x400))); From cd878479792cc1e4bc9d62ed0ef2c4454743848c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 11 Aug 2006 17:59:28 -0400 Subject: [PATCH 0087/1063] [CPUFREQ] Fix typo. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b3df613ae4ec..d35a9f06ab7b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -32,7 +32,7 @@ #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg) /** - * The "cpufreq driver" - the arch- or hardware-dependend low + * The "cpufreq driver" - the arch- or hardware-dependent low * level driver of CPUFreq support, and its spinlock. This lock * also protects the cpufreq_cpu_data array. */ From 1ce28d6b19112a7c76af8e971e2de3109d19a943 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 31 Jul 2006 22:25:20 +0400 Subject: [PATCH 0088/1063] [CPUFREQ][1/2] ondemand: updated tune for hardware coordination Try to make dbs_check_cpu() call on all CPUs at the same jiffy. This will help when multiple cores share P-states via Hardware Coordination. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 52cf1f021825..f507a869acbc 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -305,6 +305,9 @@ static void do_dbs_timer(void *data) { unsigned int cpu = smp_processor_id(); struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; if (!dbs_info->enable) return; @@ -312,18 +315,18 @@ static void do_dbs_timer(void *data) lock_cpu_hotplug(); dbs_check_cpu(dbs_info); unlock_cpu_hotplug(); - queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } static inline void dbs_timer_init(unsigned int cpu) { struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; INIT_WORK(&dbs_info->work, do_dbs_timer, 0); - queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, - usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); - return; + queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) From 05ca0350e8caa91a5ec9961c585c98005b6934ea Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Mon, 31 Jul 2006 22:28:12 +0400 Subject: [PATCH 0089/1063] [CPUFREQ][2/2] ondemand: updated add powersave_bias tunable ondemand selects the minimum frequency that can retire a workload with negligible idle time -- ideally resulting in the highest performance/power efficiency with negligible performance impact. But on some systems and some workloads, this algorithm is more performance biased than necessary, and de-tuning it a bit to allow some performance impact can save measurable power. This patch adds a "powersave_bias" tunable to ondemand to allow it to reduce its target frequency by a specified percent. By default, the powersave_bias is 0 and has no effect. powersave_bias is in units of 0.1%, so it has an effective range of 1 through 1000, resulting in 0.1% to 100% impact. In practice, users will not be able to detect a difference between 0.1% increments, but 1.0% increments turned out to be too large. Also, the max value of 1000 (100%) would simply peg the system in its deepest power saving P-state, unless the processor really has a hardware P-state at 0Hz:-) For example, If ondemand requests 2.0GHz based on utilization, and powersave_bias=100, this code will knock 10% off the target and seek a target of 1.8GHz instead of 2.0GHz until the next sampling. If 1.8 is an exact match with an hardware frequency we use it, otherwise we average our time between the frequency next higher than 1.8 and next lower than 1.8. Note that a user or administrative program can change powersave_bias at run-time depending on how they expect the system to be used. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 155 ++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index f507a869acbc..34874c2f1885 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -55,6 +55,10 @@ struct cpu_dbs_info_s { struct cpufreq_policy *cur_policy; struct work_struct work; unsigned int enable; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; }; static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); @@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex); static struct workqueue_struct *kondemand_wq; -struct dbs_tuners { +static struct dbs_tuners { unsigned int sampling_rate; unsigned int up_threshold; unsigned int ignore_nice; -}; - -static struct dbs_tuners dbs_tuners_ins = { + unsigned int powersave_bias; +} dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .ignore_nice = 0, + .powersave_bias = 0, }; static inline cputime64_t get_cpu_idle_time(unsigned int cpu) @@ -96,6 +100,69 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu) return retval; } +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void ondemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i); + dbs_info->freq_table = cpufreq_frequency_get_table(i); + dbs_info->freq_lo = 0; + } +} + /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) { @@ -124,6 +191,7 @@ static ssize_t show_##file_name \ show_one(sampling_rate, sampling_rate); show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); static ssize_t store_sampling_rate(struct cpufreq_policy *unused, const char *buf, size_t count) @@ -198,6 +266,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, return count; } +static ssize_t store_powersave_bias(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.powersave_bias = input; + ondemand_powersave_bias_init(); + mutex_unlock(&dbs_mutex); + + return count; +} + #define define_one_rw(_name) \ static struct freq_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) @@ -205,6 +294,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); define_one_rw(up_threshold); define_one_rw(ignore_nice_load); +define_one_rw(powersave_bias); static struct attribute * dbs_attributes[] = { &sampling_rate_max.attr, @@ -212,6 +302,7 @@ static struct attribute * dbs_attributes[] = { &sampling_rate.attr, &up_threshold.attr, &ignore_nice_load.attr, + &powersave_bias.attr, NULL }; @@ -234,6 +325,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) if (!this_dbs_info->enable) return; + this_dbs_info->freq_lo = 0; policy = this_dbs_info->cur_policy; cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); total_ticks = (unsigned int) cputime64_sub(cur_jiffies, @@ -274,11 +366,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) /* Check for frequency increase */ if (load > dbs_tuners_ins.up_threshold) { /* if we are already at full speed then break out early */ - if (policy->cur == policy->max) - return; + if (!dbs_tuners_ins.powersave_bias) { + if (policy->cur == policy->max) + return; - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + } else { + int freq = powersave_bias_target(policy, policy->max, + CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } return; } @@ -293,14 +392,23 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * policy. To be safe, we focus 10 points under the threshold. */ if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next; - freq_next = (policy->cur * load) / + unsigned int freq_next = (policy->cur * load) / (dbs_tuners_ins.up_threshold - 10); - - __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); + if (!dbs_tuners_ins.powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } } } +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + static void do_dbs_timer(void *data) { unsigned int cpu = smp_processor_id(); @@ -311,10 +419,24 @@ static void do_dbs_timer(void *data) if (!dbs_info->enable) return; - - lock_cpu_hotplug(); - dbs_check_cpu(dbs_info); - unlock_cpu_hotplug(); + /* Common NORMAL_SAMPLE setup */ + INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE); + if (!dbs_tuners_ins.powersave_bias || + (unsigned long) data == DBS_NORMAL_SAMPLE) { + lock_cpu_hotplug(); + dbs_check_cpu(dbs_info); + unlock_cpu_hotplug(); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + INIT_WORK(&dbs_info->work, do_dbs_timer, + (void *)DBS_SUB_SAMPLE); + delay = dbs_info->freq_hi_jiffies; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, + CPUFREQ_RELATION_H); + } queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } @@ -325,6 +447,7 @@ static inline void dbs_timer_init(unsigned int cpu) int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); delay -= jiffies % delay; + ondemand_powersave_bias_init(); INIT_WORK(&dbs_info->work, do_dbs_timer, 0); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } From 179da8e6e8903a8cdb19bb12672d50dc33f0fde6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Tue, 8 Aug 2006 19:12:20 +0200 Subject: [PATCH 0090/1063] [CPUFREQ] Longhaul - Disable arbiter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI C3 works for "Powersaver" processors, so use it only for them. Older CPU will change frequency on "halt" only. But we can protect transition in two ways: - by ACPI PM2 register, there is "bus master arbiter disable" bit. This isn't tested because VIA mainboards don't have PM2 register, - by PLE133 PCI/AGP arbiter disable register. There are two bits in this register. First is "PCI arbiter disable", second "AGP arbiter disable". This is working on VIA Epia 800 mainboards. Test on bm_control is more proper because this is true when PM2 register exist. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 86 ++++++++++++++++++------- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 4f2c3aeef724..83a8793f1db8 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,7 @@ static int can_scale_voltage; static int vrmrev; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; +static int port22_en = 0; /* Module parameters */ static int dont_scale_voltage; @@ -124,10 +126,9 @@ static int longhaul_get_cpu_mult(void) /* For processor with BCR2 MSR */ -static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) +static void do_longhaul1(unsigned int clock_ratio_index) { union msr_bcr2 bcr2; - u32 t; rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ @@ -136,13 +137,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) /* Sync to timer tick */ safe_halt(); - ACPI_FLUSH_CPU_CACHE(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_BCR2, bcr2.val); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_fadt.xpm_tmr_blk.address); + /* Invoke transition */ + ACPI_FLUSH_CPU_CACHE(); + halt(); /* Disable software clock multiplier */ local_irq_disable(); @@ -166,9 +165,9 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) /* Sync to timer tick */ safe_halt(); - ACPI_FLUSH_CPU_CACHE(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + ACPI_FLUSH_CPU_CACHE(); /* Invoke C3 */ inb(cx_address); /* Dummy op - must do something useless after P_LVL3 read */ @@ -227,10 +226,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index) outb(0xFF,0xA1); /* Overkill */ outb(0xFE,0x21); /* TMR0 only */ - /* Disable bus master arbitration */ - if (pr->flags.bm_check) { + if (pr->flags.bm_control) { + /* Disable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); + } else if (port22_en) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); } switch (longhaul_version) { @@ -244,7 +246,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) */ case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - do_longhaul1(cx->address, clock_ratio_index); + do_longhaul1(clock_ratio_index); break; /* @@ -259,14 +261,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, + ACPI_MTX_DO_NOT_LOCK); do_powersaver(cx->address, clock_ratio_index); break; } - /* Enable bus master arbitration */ - if (pr->flags.bm_check) { + if (pr->flags.bm_control) { + /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); + } else if (port22_en) { + /* Enable arbiters */ + outb(0, 0x22); } outb(pic2_mask,0xA1); /* restore mask */ @@ -540,21 +548,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, return 1; } +/* VIA don't support PM2 reg, but have something similar */ +static int enable_arbiter_disable(void) +{ + struct pci_dev *dev; + u8 pci_cmd; + + /* Find PLE133 host bridge */ + dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL); + if (dev != NULL) { + /* Enable access to port 0x22 */ + pci_read_config_byte(dev, 0x78, &pci_cmd); + if ( !(pci_cmd & 1<<7) ) { + pci_cmd |= 1<<7; + pci_write_config_byte(dev, 0x78, pci_cmd); + } + return 1; + } + return 0; +} + static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = cpu_data; char *cpuname=NULL; int ret; - /* Check ACPI support for C3 state */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - &longhaul_walk_callback, NULL, (void *)&pr); - if (pr == NULL) goto err_acpi; - - cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address == 0 || cx->latency > 1000) goto err_acpi; - - /* Now check what we have on this motherboard */ + /* Check what we have on this motherboard */ switch (c->x86_model) { case 6: cpu_model = CPU_SAMUEL; @@ -636,6 +656,28 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) break; }; + /* Find ACPI data for processor */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, + &longhaul_walk_callback, NULL, (void *)&pr); + if (pr == NULL) + goto err_acpi; + + if (longhaul_version == TYPE_POWERSAVER) { + /* Check ACPI support for C3 state */ + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address == 0 || cx->latency > 1000) + goto err_acpi; + } else { + /* Check ACPI support for bus master arbiter disable */ + if (!pr->flags.bm_control) { + if (!enable_arbiter_disable()) { + printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n"); + return -ENODEV; + } else + port22_en = 1; + } + } + ret = longhaul_get_ranges(); if (ret != 0) return ret; From e7745d4e0299a3460128917ceb6b6a807fa7f9e8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 11 Aug 2006 18:02:27 -0400 Subject: [PATCH 0091/1063] [AGPGART] Const'ify the agpgart driver version. Signed-off-by: Dave Jones --- drivers/char/agp/backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index 509adc403250..d59e037ddd12 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c @@ -44,7 +44,7 @@ * past 0.99 at all due to some boolean logic error. */ #define AGPGART_VERSION_MAJOR 0 #define AGPGART_VERSION_MINOR 101 -static struct agp_version agp_current_version = +static const struct agp_version agp_current_version = { .major = AGPGART_VERSION_MAJOR, .minor = AGPGART_VERSION_MINOR, From 71565619af84a15d0abef6f0d6704e6472cd87c1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 12 Aug 2006 01:59:50 +0400 Subject: [PATCH 0092/1063] [AGPGART] CONFIG_PM=n slim: drivers/char/agp/efficeon-agp.c Signed-off-by: Alexey Dobriyan Signed-off-by: Dave Jones --- drivers/char/agp/efficeon-agp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c index b788b0a3bbf3..30f730ff81c1 100644 --- a/drivers/char/agp/efficeon-agp.c +++ b/drivers/char/agp/efficeon-agp.c @@ -337,13 +337,6 @@ static struct agp_bridge_driver efficeon_driver = { .agp_destroy_page = agp_generic_destroy_page, }; - -static int agp_efficeon_resume(struct pci_dev *pdev) -{ - printk(KERN_DEBUG PFX "agp_efficeon_resume()\n"); - return efficeon_configure(); -} - static int __devinit agp_efficeon_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -414,11 +407,18 @@ static void __devexit agp_efficeon_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } +#ifdef CONFIG_PM static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state) { return 0; } +static int agp_efficeon_resume(struct pci_dev *pdev) +{ + printk(KERN_DEBUG PFX "agp_efficeon_resume()\n"); + return efficeon_configure(); +} +#endif static struct pci_device_id agp_efficeon_pci_table[] = { { @@ -439,8 +439,10 @@ static struct pci_driver agp_efficeon_pci_driver = { .id_table = agp_efficeon_pci_table, .probe = agp_efficeon_probe, .remove = agp_efficeon_remove, +#ifdef CONFIG_PM .suspend = agp_efficeon_suspend, .resume = agp_efficeon_resume, +#endif }; static int __init agp_efficeon_init(void) From 85be7d60595b4803731cec158b0023bc050fdd14 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 12 Aug 2006 02:02:02 +0400 Subject: [PATCH 0093/1063] [AGPGART] CONFIG_PM=n slim: drivers/char/agp/intel-agp.c Signed-off-by: Alexey Dobriyan Signed-off-by: Dave Jones --- drivers/char/agp/intel-agp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 61ac3809f997..42a1cb871992 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -1766,6 +1766,7 @@ static void __devexit agp_intel_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } +#ifdef CONFIG_PM static int agp_intel_resume(struct pci_dev *pdev) { struct agp_bridge_data *bridge = pci_get_drvdata(pdev); @@ -1789,6 +1790,7 @@ static int agp_intel_resume(struct pci_dev *pdev) return 0; } +#endif static struct pci_device_id agp_intel_pci_table[] = { #define ID(x) \ @@ -1835,7 +1837,9 @@ static struct pci_driver agp_intel_pci_driver = { .id_table = agp_intel_pci_table, .probe = agp_intel_probe, .remove = __devexit_p(agp_intel_remove), +#ifdef CONFIG_PM .resume = agp_intel_resume, +#endif }; static int __init agp_intel_init(void) From b53e674a707cf77e76339852abdc063696679261 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 11 Aug 2006 18:13:41 -0400 Subject: [PATCH 0094/1063] [AGPGART] const'ify VIA AGP PCI table. Signed-off-by: Dave Jones --- drivers/char/agp/via-agp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index b8ec25d17478..c149ac9ce9a7 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c @@ -9,7 +9,7 @@ #include #include "agp.h" -static struct pci_device_id agp_via_pci_table[]; +static const struct pci_device_id agp_via_pci_table[]; #define VIA_GARTCTRL 0x80 #define VIA_APSIZE 0x84 @@ -485,7 +485,7 @@ static int agp_via_resume(struct pci_dev *pdev) #endif /* CONFIG_PM */ /* must be the same order as name table above */ -static struct pci_device_id agp_via_pci_table[] = { +static const struct pci_device_id agp_via_pci_table[] = { #define ID(x) \ { \ .class = (PCI_CLASS_BRIDGE_HOST << 8), \ From 6595413fc9453a211f4b5d5cc42f0bbf3daa615b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Sun, 13 Aug 2006 09:16:20 +0200 Subject: [PATCH 0095/1063] [CPUFREQ] Longhaul - Add ignore_latency option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some laptops with VIA C3 processor, CLE266 chipset and AMI BIOS have incorrect latency values in FADT table. These laptops seems to be C3 capable, but latency values are to big: 101 for C2 and 1017 for C3. This option will allow user to skip C3 latency test but not C3 address test. AMI BIOS is setting C3 address to correct value in DSDT table. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 83a8793f1db8..43bbf948d45d 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -65,7 +65,7 @@ static int port22_en = 0; /* Module parameters */ static int dont_scale_voltage; - +static int ignore_latency = 0; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -665,8 +665,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) if (longhaul_version == TYPE_POWERSAVER) { /* Check ACPI support for C3 state */ cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address == 0 || cx->latency > 1000) + if (cx->address == 0 || + (cx->latency > 1000 && ignore_latency == 0) ) goto err_acpi; + } else { /* Check ACPI support for bus master arbiter disable */ if (!pr->flags.bm_control) { @@ -773,6 +775,8 @@ static void __exit longhaul_exit(void) module_param (dont_scale_voltage, int, 0644); MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor"); +module_param(ignore_latency, int, 0644); +MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); @@ -780,4 +784,3 @@ MODULE_LICENSE ("GPL"); late_initcall(longhaul_init); module_exit(longhaul_exit); - From b5ecf60fe6b18de0bc59d336d444835d4ef835ed Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 13 Aug 2006 23:00:08 +0200 Subject: [PATCH 0096/1063] [CPUFREQ] make drivers/cpufreq/cpufreq_ondemand.c:powersave_bias_target() static This patch makes the needlessly global powersave_bias_target() static. Signed-off-by: Adrian Bunk Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 34874c2f1885..5ca2fd5d1ed1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -105,8 +105,9 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu) * Returns the freq_hi to be used right now and will set freq_hi_jiffies, * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. */ -unsigned int powersave_bias_target(struct cpufreq_policy *policy, - unsigned int freq_next, unsigned int relation) +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) { unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; From c4e6952ffd71b263a64d1a9d79812446130560a5 Mon Sep 17 00:00:00 2001 From: Takashi YOSHI Date: Mon, 14 Aug 2006 19:48:30 -0500 Subject: [PATCH 0097/1063] [PATCH] MTD: Add Macronix MX29F040 to JEDEC Signed-off-by: Takashi YOSHII Signed-off-by: Josh Boyer --- drivers/mtd/chips/jedec_probe.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c index 8f39d0a31438..1154dac715aa 100644 --- a/drivers/mtd/chips/jedec_probe.c +++ b/drivers/mtd/chips/jedec_probe.c @@ -111,6 +111,7 @@ #define MX29LV040C 0x004F #define MX29LV160T 0x22C4 #define MX29LV160B 0x2249 +#define MX29F040 0x00A4 #define MX29F016 0x00AD #define MX29F002T 0x00B0 #define MX29F004T 0x0045 @@ -1171,6 +1172,19 @@ static const struct amd_flash_info jedec_table[] = { ERASEINFO(0x10000,31) } }, { + .mfr_id = MANUFACTURER_MACRONIX, + .dev_id = MX29F040, + .name = "Macronix MX29F040", + .uaddr = { + [0] = MTD_UADDR_0x0555_0x02AA /* x8 */ + }, + .DevSize = SIZE_512KiB, + .CmdSet = P_ID_AMD_STD, + .NumEraseRegions= 1, + .regions = { + ERASEINFO(0x10000,8), + } + }, { .mfr_id = MANUFACTURER_MACRONIX, .dev_id = MX29F016, .name = "Macronix MX29F016", From 79b9cd586f534f3f40ee66b6c27732149a5915ad Mon Sep 17 00:00:00 2001 From: Takashi YOSHII Date: Tue, 15 Aug 2006 07:26:32 -0500 Subject: [PATCH 0098/1063] [PATCH] [MTD] Maps: Add dependency on alternate probe methods to physmap map/physmap.c tries to probe "cfi_probe", "jedec_probe" and "map_rom", but map/Kconfig says it depends on MTD_CFI only. This patch adds MTD_JEDECPROBE and MTD_ROM to the dependency condition. Signed-off-by: Takashi YOSHII Signed-off-by: Josh Boyer --- drivers/mtd/maps/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 83d0b2a52527..64d1b6a6c920 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -13,13 +13,13 @@ config MTD_COMPLEX_MAPPINGS config MTD_PHYSMAP tristate "CFI Flash device in physical memory map" - depends on MTD_CFI + depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM help - This provides a 'mapping' driver which allows the CFI probe and - command set driver code to communicate with flash chips which - are mapped physically into the CPU's memory. You will need to - configure the physical address and size of the flash chips on - your particular board as well as the bus width, either statically + This provides a 'mapping' driver which allows the NOR Flash and + ROM driver code to communicate with chips which are mapped + physically into the CPU's memory. You will need to configure + the physical address and size of the flash chips on your + particular board as well as the bus width, either statically with config options or at run-time. config MTD_PHYSMAP_START From 0b6c0bb3f9621b128011bcd5f65047c73afdde3b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 15 Aug 2006 02:42:25 +0200 Subject: [PATCH 0099/1063] fs/jffs2/xattr.c: remove dead code This patch removes some obvious dead code spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Josh Boyer --- fs/jffs2/xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 25bc1ae08648..4da09ce1d1f5 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); if (rc) { JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen); - rc = rc ? rc : -EBADFD; goto out; } rc = save_xattr_datum(c, xd); From 5b0c5c2c0d04c29f85abb485378ba5476c7aeec2 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 9 Aug 2006 10:54:44 +0200 Subject: [PATCH 0100/1063] MTD: Convert Atmel PRI information to AMD format Atmel flash chips don't have PRI information in the same format as AMD flash chips. This patch installs a fixup for all Atmel chips that converts the relevant PRI fields into AMD format. Only the fields that are actually used by the command set is actually converted. The rest are initialized to zero (which should be safe) Signed-off-by: Haavard Skinnemoen Signed-off-by: Josh Boyer --- drivers/mtd/chips/cfi_cmdset_0002.c | 21 +++++++++++++++++++++ include/linux/mtd/cfi.h | 13 +++++++++++++ 2 files changed, 34 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 9885726a16e4..8901c4412daf 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -161,6 +161,26 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) } } +/* Atmel chips don't use the same PRI format as AMD chips */ +static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) +{ + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_amdstd *extp = cfi->cmdset_priv; + struct cfi_pri_atmel atmel_pri; + + memcpy(&atmel_pri, extp, sizeof(atmel_pri)); + memset(extp + 5, 0, sizeof(*extp) - 5); + + if (atmel_pri.Features & 0x02) + extp->EraseSuspend = 2; + + if (atmel_pri.BottomBoot) + extp->TopBottom = 2; + else + extp->TopBottom = 3; +} + static void fixup_use_secsi(struct mtd_info *mtd, void *param) { /* Setup for chips with a secsi area */ @@ -192,6 +212,7 @@ static struct cfi_fixup cfi_fixup_table[] = { #if !FORCE_WORD_WRITE { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, }, #endif + { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL }, { 0, 0, NULL, NULL } }; static struct cfi_fixup jedec_fixup_table[] = { diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 09bfae6938b3..123948b14547 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -199,6 +199,18 @@ struct cfi_pri_amdstd { uint8_t TopBottom; } __attribute__((packed)); +/* Vendor-Specific PRI for Atmel chips (command set 0x0002) */ + +struct cfi_pri_atmel { + uint8_t pri[3]; + uint8_t MajorVersion; + uint8_t MinorVersion; + uint8_t Features; + uint8_t BottomBoot; + uint8_t BurstMode; + uint8_t PageMode; +} __attribute__((packed)); + struct cfi_pri_query { uint8_t NumFields; uint32_t ProtField[1]; /* Not host ordered */ @@ -464,6 +476,7 @@ struct cfi_fixup { #define CFI_ID_ANY 0xffff #define CFI_MFR_AMD 0x0001 +#define CFI_MFR_ATMEL 0x001F #define CFI_MFR_ST 0x0020 /* STMicroelectronics */ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); From 0165508c80a2b5d5268d9c5dfa9b30c534a33693 Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Wed, 9 Aug 2006 11:06:07 +0200 Subject: [PATCH 0101/1063] MTD: Add lock/unlock operations for Atmel AT49BV6416 The AT49BV6416 is locked by default, so we really need to provide at least the unlock() operation for write and erase to work. This patch implements both ->lock() and ->unlock() and provides a fixup to install them when an AT49BV6416 chip is detected. These functions are probably valid on more Atmel chips, but I believe it's mostly obsolete ones. The AT49BV6416 is in fact obsolete, but it's used on all current AT32STK1000 development boards. Signed-off-by: Haavard Skinnemoen Signed-off-by: Josh Boyer --- drivers/mtd/chips/cfi_cmdset_0002.c | 90 +++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 8901c4412daf..ddc5bd783354 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -45,9 +45,11 @@ #define MAX_WORD_RETRIES 3 #define MANUFACTURER_AMD 0x0001 +#define MANUFACTURER_ATMEL 0x001F #define MANUFACTURER_SST 0x00BF #define SST49LF004B 0x0060 #define SST49LF008A 0x005a +#define AT49BV6416 0x00d6 static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); @@ -68,6 +70,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr); #include "fwh_lock.h" +static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len); +static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len); + static struct mtd_chip_driver cfi_amdstd_chipdrv = { .probe = NULL, /* Not usable directly */ .destroy = cfi_amdstd_destroy, @@ -199,6 +204,16 @@ static void fixup_use_erase_chip(struct mtd_info *mtd, void *param) } +/* + * Some Atmel chips (e.g. the AT49BV6416) power-up with all sectors + * locked by default. + */ +static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param) +{ + mtd->lock = cfi_atmel_lock; + mtd->unlock = cfi_atmel_unlock; +} + static struct cfi_fixup cfi_fixup_table[] = { #ifdef AMD_BOOTLOC_BUG { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL }, @@ -228,6 +243,7 @@ static struct cfi_fixup fixup_table[] = { * we know that is the case. */ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL }, + { CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock, NULL }, { 0, 0, NULL, NULL } }; @@ -1628,6 +1644,80 @@ static int cfi_amdstd_erase_chip(struct mtd_info *mtd, struct erase_info *instr) return 0; } +static int do_atmel_lock(struct map_info *map, struct flchip *chip, + unsigned long adr, int len, void *thunk) +{ + struct cfi_private *cfi = map->fldrv_priv; + int ret; + + spin_lock(chip->mutex); + ret = get_chip(map, chip, adr + chip->start, FL_LOCKING); + if (ret) + goto out_unlock; + chip->state = FL_LOCKING; + + DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n", + __func__, adr, len); + + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, + cfi->device_type, NULL); + map_write(map, CMD(0x40), chip->start + adr); + + chip->state = FL_READY; + put_chip(map, chip, adr + chip->start); + ret = 0; + +out_unlock: + spin_unlock(chip->mutex); + return ret; +} + +static int do_atmel_unlock(struct map_info *map, struct flchip *chip, + unsigned long adr, int len, void *thunk) +{ + struct cfi_private *cfi = map->fldrv_priv; + int ret; + + spin_lock(chip->mutex); + ret = get_chip(map, chip, adr + chip->start, FL_UNLOCKING); + if (ret) + goto out_unlock; + chip->state = FL_UNLOCKING; + + DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n", + __func__, adr, len); + + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + map_write(map, CMD(0x70), adr); + + chip->state = FL_READY; + put_chip(map, chip, adr + chip->start); + ret = 0; + +out_unlock: + spin_unlock(chip->mutex); + return ret; +} + +static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len) +{ + return cfi_varsize_frob(mtd, do_atmel_lock, ofs, len, NULL); +} + +static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) +{ + return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL); +} + static void cfi_amdstd_sync (struct mtd_info *mtd) { From 04846f25920d4b05d6040c531cc601049260db52 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 9 Aug 2006 17:31:16 +0200 Subject: [PATCH 0102/1063] [SCSI] limit recursion when flushing shost->starved_list Attached is a patch that should limit a possible recursion that can lead to a stack overflow like follows: Kernel stack overflow. CPU: 3 Not tainted Process zfcperp0.0.d819 (pid: 13897, task: 000000003e0d8cc8, ksp: 000000003499dbb8) Krnl PSW : 0404000180000000 000000000030f8b2 (get_device+0x12/0x48) Krnl GPRS: 00000000135a1980 000000000030f758 000000003ed6c1e8 0000000000000005 0000000000000000 000000000044a780 000000003dbf7000 0000000034e15800 000000003621c048 070000003499c108 000000003499c1a0 000000003ed6c000 0000000040895000 00000000408ab630 000000003499c0a0 000000003499c0a0 Krnl Code: a7 fb ff e8 a7 19 00 00 b9 02 00 22 e3 e0 f0 98 00 24 a7 84 Call Trace: ([<000000004089edc2>] scsi_request_fn+0x13e/0x650 [scsi_mod]) [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4 [<000000004089ff8c>] scsi_queue_insert+0x22c/0x2a4 [scsi_mod] [<000000004089779a>] scsi_dispatch_cmd+0x8a/0x3d0 [scsi_mod] [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod] ... [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod] [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4 [<000000004089ff8c>] scsi_queue_insert+0x22c/0x2a4 [scsi_mod] [<000000004089779a>] scsi_dispatch_cmd+0x8a/0x3d0 [scsi_mod] [<000000004089f1ec>] scsi_request_fn+0x568/0x650 [scsi_mod] [<00000000002c5ff4>] blk_run_queue+0xd4/0x1a4 [<000000004089fa9e>] scsi_run_host_queues+0x196/0x230 [scsi_mod] [<00000000409eba28>] zfcp_erp_thread+0x2638/0x3080 [zfcp] [<0000000000107462>] kernel_thread_starter+0x6/0xc [<000000000010745c>] kernel_thread_starter+0x0/0xc <0>Kernel panic - not syncing: Corrupt kernel stack, can't continue. This stack overflow occurred during tests on s390 using zfcp. Recursion depth for this panic was 19. Usually recursion between blk_run_queue and a request_fn is avoided using QUEUE_FLAG_REENTER. But this does not help if the scsi stack tries to flush the starved_list of a scsi_host. Limit recursion depth when flushing the starved_list of a scsi_host. Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 077c1c691210..d6743b959a72 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -551,7 +551,15 @@ static void scsi_run_queue(struct request_queue *q) list_del_init(&sdev->starved_entry); spin_unlock_irqrestore(shost->host_lock, flags); - blk_run_queue(sdev->request_queue); + + if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && + !test_and_set_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags)) { + blk_run_queue(sdev->request_queue); + clear_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags); + } else + blk_run_queue(sdev->request_queue); spin_lock_irqsave(shost->host_lock, flags); if (unlikely(!list_empty(&sdev->starved_entry))) From c8f7b073e0e81499474a84ee2a90f77f7805c7f8 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Thu, 3 Aug 2006 08:02:24 -0700 Subject: [PATCH 0103/1063] [SCSI] aacraid: interruptible ioctl Received from Mark Salyzyn This patch allows the FSACTL_SEND_LARGE_FIB, FSACTL_SENDFIB and FSACTL_SEND_RAW_SRB ioctl calls into the aacraid driver to be interruptible. Only necessary if the adapter and/or the management software has gone into some sort of misbehavior and the system is being rebooted, thus permitting the user management software applications to be killed relatively cleanly. The FIB queue resource is held out of the free queue until the adapter finally, if ever, completes the command. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/commctrl.c | 23 ++++++++++++++++++----- drivers/scsi/aacraid/commsup.c | 15 ++++++++++++--- drivers/scsi/aacraid/dpcsup.c | 10 ++++++++-- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 255421de9d1a..14d7aa9b7df3 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include /* ssleep prototype */ #include #include #include @@ -140,7 +140,8 @@ cleanup: fibptr->hw_fib_pa = hw_fib_pa; fibptr->hw_fib = hw_fib; } - aac_fib_free(fibptr); + if (retval != -EINTR) + aac_fib_free(fibptr); return retval; } @@ -621,7 +622,13 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg) actual_fibsize = sizeof (struct aac_srb) + (((user_srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry)); if(actual_fibsize != fibsize){ // User made a mistake - should not continue - dprintk((KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n")); + dprintk((KERN_DEBUG"aacraid: Bad Size specified in " + "Raw SRB command calculated fibsize=%d " + "user_srbcmd->sg.count=%d aac_srb=%d sgentry=%d " + "issued fibsize=%d\n", + actual_fibsize, user_srbcmd->sg.count, + sizeof(struct aac_srb), sizeof(struct sgentry), + fibsize)); rcode = -EINVAL; goto cleanup; } @@ -663,6 +670,10 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg) psg->count = cpu_to_le32(sg_indx+1); status = aac_fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL); } + if (status == -EINTR) { + rcode = -EINTR; + goto cleanup; + } if (status != 0){ dprintk((KERN_DEBUG"aacraid: Could not send raw srb fib to hba\n")); @@ -696,8 +707,10 @@ cleanup: for(i=0; i <= sg_indx; i++){ kfree(sg_list[i]); } - aac_fib_complete(srbfib); - aac_fib_free(srbfib); + if (rcode != -EINTR) { + aac_fib_complete(srbfib); + aac_fib_free(srbfib); + } return rcode; } diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 3f27419c66af..c67da1321133 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -464,6 +464,8 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, dprintk((KERN_DEBUG " hw_fib pa being sent=%lx\n",(ulong)fibptr->hw_fib_pa)); dprintk((KERN_DEBUG " fib being sent=%p\n",fibptr)); + if (!dev->queues) + return -ENODEV; q = &dev->queues->queue[AdapNormCmdQueue]; if(wait) @@ -527,8 +529,15 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } udelay(5); } - } else - down(&fibptr->event_wait); + } else if (down_interruptible(&fibptr->event_wait)) { + spin_lock_irqsave(&fibptr->event_lock, flags); + if (fibptr->done == 0) { + fibptr->done = 2; /* Tell interrupt we aborted */ + spin_unlock_irqrestore(&fibptr->event_lock, flags); + return -EINTR; + } + spin_unlock_irqrestore(&fibptr->event_lock, flags); + } BUG_ON(fibptr->done == 0); if((fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT)){ @@ -795,7 +804,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) /* Sniff for container changes */ - if (!dev) + if (!dev || !dev->fsa_dev) return; container = (u32)-1; diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c index b2a5c7262f36..8335f07b7720 100644 --- a/drivers/scsi/aacraid/dpcsup.c +++ b/drivers/scsi/aacraid/dpcsup.c @@ -124,10 +124,15 @@ unsigned int aac_response_normal(struct aac_queue * q) } else { unsigned long flagv; spin_lock_irqsave(&fib->event_lock, flagv); - fib->done = 1; + if (!fib->done) + fib->done = 1; up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); + if (fib->done == 2) { + aac_fib_complete(fib); + aac_fib_free(fib); + } } consumed++; spin_lock_irqsave(q->lock, flags); @@ -316,7 +321,8 @@ unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index) unsigned long flagv; dprintk((KERN_INFO "event_wait up\n")); spin_lock_irqsave(&fib->event_lock, flagv); - fib->done = 1; + if (!fib->done) + fib->done = 1; up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); From 8c23cd7457151fc8ace79ec700a8aeaa9fc5b3d9 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 8 Aug 2006 08:52:14 -0700 Subject: [PATCH 0104/1063] [SCSI] aacraid: Restart adapter on firmware assert (Update 2) Received from Mark Salyzyn If the adapter should be in a blinkled (Firmware Assert) state when the driver loads, we will perform a warm restart of the Adapter Firmware to see if we can rescue the adapter. Possible causes of a blinkled can occur on some early release motherboard BIOSes, transitory PCI bus problems on embedded systems or non-x86 based architectures, transitory startup failures of early release drives or transitory hardware failures; some of which can bite the adapter later at runtime. Future enhancements will include recovery during runtime. Fixed extra whitespace space issue. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/rkt.c | 29 ++++++++++++++++++++++------- drivers/scsi/aacraid/rx.c | 29 ++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index d0eecd4bec83..05f80982efa5 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1670,6 +1670,7 @@ extern struct aac_common aac_config; #define RCV_TEMP_READINGS 0x00000025 #define GET_COMM_PREFERRED_SETTINGS 0x00000026 #define IOP_RESET 0x00001000 +#define IOP_RESET_ALWAYS 0x00001001 #define RE_INIT_ADAPTER 0x000000ee /* diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c index 458ea897fd72..f850c3a7cce9 100644 --- a/drivers/scsi/aacraid/rkt.c +++ b/drivers/scsi/aacraid/rkt.c @@ -395,6 +395,25 @@ static int aac_rkt_send(struct fib * fib) return 0; } +static int aac_rkt_restart_adapter(struct aac_dev *dev) +{ + u32 var; + + printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", + dev->name, dev->id); + + if (aac_rkt_check_health(dev) <= 0) + return 1; + if (rkt_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0, + &var, NULL, NULL, NULL, NULL)) + return 1; + if (var != 0x00000001) + return 1; + if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) + return 1; + return 0; +} + /** * aac_rkt_init - initialize an i960 based AAC card * @dev: device to configure @@ -417,6 +436,9 @@ int aac_rkt_init(struct aac_dev *dev) /* * Check to see if the board panic'd while booting. */ + if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) + if (aac_rkt_restart_adapter(dev)) + goto error_iounmap; /* * Check to see if the board failed any self tests. */ @@ -431,13 +453,6 @@ int aac_rkt_init(struct aac_dev *dev) printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance); goto error_iounmap; } - /* - * Check to see if the board panic'd while booting. - */ - if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) { - printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", dev->name, instance); - goto error_iounmap; - } start = jiffies; /* * Wait for the adapter to be up and running. Wait up to 3 minutes diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index 035018db69b1..c715c4b2442d 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -394,6 +394,25 @@ static int aac_rx_send(struct fib * fib) return 0; } +static int aac_rx_restart_adapter(struct aac_dev *dev) +{ + u32 var; + + printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", + dev->name, dev->id); + + if (aac_rx_check_health(dev) <= 0) + return 1; + if (rx_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0, + &var, NULL, NULL, NULL, NULL)) + return 1; + if (var != 0x00000001) + return 1; + if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) + return 1; + return 0; +} + /** * aac_rx_init - initialize an i960 based AAC card * @dev: device to configure @@ -416,6 +435,9 @@ int aac_rx_init(struct aac_dev *dev) /* * Check to see if the board panic'd while booting. */ + if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) + if (aac_rx_restart_adapter(dev)) + goto error_iounmap; /* * Check to see if the board failed any self tests. */ @@ -423,13 +445,6 @@ int aac_rx_init(struct aac_dev *dev) printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance); goto error_iounmap; } - /* - * Check to see if the board panic'd while booting. - */ - if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) { - printk(KERN_ERR "%s%d: adapter kernel panic.\n", dev->name, instance); - goto error_iounmap; - } /* * Check to see if the monitor panic'd while booting. */ From 90ee346651524eb275405d410f5d3bb6765a2d93 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Thu, 3 Aug 2006 08:03:07 -0700 Subject: [PATCH 0105/1063] [SCSI] aacraid: Check for unlikely errors Received from Mark Salyzyn The enclosed patch cleans up some code fragments, adds some paranoia (unproven causes of potential driver failures). Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 15 +++++++++++++-- drivers/scsi/aacraid/comminit.c | 2 +- drivers/scsi/aacraid/linit.c | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 83b5c7d085f2..699351c15cc9 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -489,6 +489,8 @@ int aac_probe_container(struct aac_dev *dev, int cid) unsigned instance; fsa_dev_ptr = dev->fsa_dev; + if (!fsa_dev_ptr) + return -ENOMEM; instance = dev->scsi_host_ptr->unique_id; if (!(fibptr = aac_fib_alloc(dev))) @@ -1392,6 +1394,7 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid) struct scsi_cmnd *cmd; struct scsi_device *sdev = scsicmd->device; int active = 0; + struct aac_dev *aac; unsigned long flags; /* @@ -1413,11 +1416,11 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid) if (active) return SCSI_MLQUEUE_DEVICE_BUSY; + aac = (struct aac_dev *)scsicmd->device->host->hostdata; /* * Allocate and initialize a Fib */ - if (!(cmd_fibcontext = - aac_fib_alloc((struct aac_dev *)scsicmd->device->host->hostdata))) + if (!(cmd_fibcontext = aac_fib_alloc(aac))) return SCSI_MLQUEUE_HOST_BUSY; aac_fib_init(cmd_fibcontext); @@ -1470,6 +1473,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) struct aac_dev *dev = (struct aac_dev *)host->hostdata; struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev; + if (fsa_dev_ptr == NULL) + return -1; /* * If the bus, id or lun is out of range, return fail * Test does not apply to ID 16, the pseudo id for the controller @@ -1782,6 +1787,8 @@ static int query_disk(struct aac_dev *dev, void __user *arg) struct fsa_dev_info *fsa_dev_ptr; fsa_dev_ptr = dev->fsa_dev; + if (!fsa_dev_ptr) + return -ENODEV; if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk))) return -EFAULT; if (qd.cnum == -1) @@ -1843,6 +1850,10 @@ static int delete_disk(struct aac_dev *dev, void __user *arg) struct fsa_dev_info *fsa_dev_ptr; fsa_dev_ptr = dev->fsa_dev; + if (!fsa_dev_ptr) + return -ENODEV; + if (!fsa_dev_ptr) + return -ENODEV; if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk))) return -EFAULT; diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 1cd3584ba7ff..87a955096761 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -180,7 +180,7 @@ int aac_send_shutdown(struct aac_dev * dev) -2 /* Timeout silently */, 1, NULL, NULL); - if (status == 0) + if (status >= 0) aac_fib_complete(fibctx); aac_fib_free(fibctx); return status; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index e42a479ce64a..9d8b550a91cb 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1013,6 +1013,10 @@ static void __devexit aac_remove_one(struct pci_dev *pdev) list_del(&aac->entry); scsi_host_put(shost); pci_disable_device(pdev); + if (list_empty(&aac_devices)) { + unregister_chrdev(aac_cfg_major, "aac"); + aac_cfg_major = -1; + } } static struct pci_driver aac_pci_driver = { From 8c867b257d159ca04602d7087fa29f846785f9ea Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Thu, 3 Aug 2006 08:03:30 -0700 Subject: [PATCH 0106/1063] [SCSI] aacraid: Reset adapter in recovery timeout Received from Mark Salyzyn If the adapter is in blinkled (Firmware Assert) when error recovery timeout actions have been triggered, perform an adapter warm reset and restart the initialization. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 39 +++-- drivers/scsi/aacraid/aacraid.h | 4 +- drivers/scsi/aacraid/commctrl.c | 2 +- drivers/scsi/aacraid/commsup.c | 258 ++++++++++++++++++++++++++++++++ drivers/scsi/aacraid/linit.c | 14 +- 5 files changed, 296 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 699351c15cc9..37c55ddce214 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -175,7 +175,7 @@ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. * * Query config status, and commit the configuration if needed. */ -int aac_get_config_status(struct aac_dev *dev) +int aac_get_config_status(struct aac_dev *dev, int commit_flag) { int status = 0; struct fib * fibptr; @@ -219,7 +219,7 @@ int aac_get_config_status(struct aac_dev *dev) aac_fib_complete(fibptr); /* Send a CT_COMMIT_CONFIG to enable discovery of devices */ if (status >= 0) { - if (commit == 1) { + if ((commit == 1) || commit_flag) { struct aac_commit_config * dinfo; aac_fib_init(fibptr); dinfo = (struct aac_commit_config *) fib_data(fibptr); @@ -784,8 +784,9 @@ int aac_get_adapter_info(struct aac_dev* dev) dev->maximum_num_channels = le32_to_cpu(bus_info->BusCount); } - tmp = le32_to_cpu(dev->adapter_info.kernelrev); - printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", + if (!dev->in_reset) { + tmp = le32_to_cpu(dev->adapter_info.kernelrev); + printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", dev->name, dev->id, tmp>>24, @@ -794,20 +795,21 @@ int aac_get_adapter_info(struct aac_dev* dev) le32_to_cpu(dev->adapter_info.kernelbuild), (int)sizeof(dev->supplement_adapter_info.BuildDate), dev->supplement_adapter_info.BuildDate); - tmp = le32_to_cpu(dev->adapter_info.monitorrev); - printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n", + tmp = le32_to_cpu(dev->adapter_info.monitorrev); + printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n", dev->name, dev->id, tmp>>24,(tmp>>16)&0xff,tmp&0xff, le32_to_cpu(dev->adapter_info.monitorbuild)); - tmp = le32_to_cpu(dev->adapter_info.biosrev); - printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n", + tmp = le32_to_cpu(dev->adapter_info.biosrev); + printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n", dev->name, dev->id, tmp>>24,(tmp>>16)&0xff,tmp&0xff, le32_to_cpu(dev->adapter_info.biosbuild)); - if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) - printk(KERN_INFO "%s%d: serial %x\n", - dev->name, dev->id, - le32_to_cpu(dev->adapter_info.serial[0])); + if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) + printk(KERN_INFO "%s%d: serial %x\n", + dev->name, dev->id, + le32_to_cpu(dev->adapter_info.serial[0])); + } dev->nondasd_support = 0; dev->raid_scsi_mode = 0; @@ -1417,6 +1419,9 @@ static int aac_synchronize(struct scsi_cmnd *scsicmd, int cid) return SCSI_MLQUEUE_DEVICE_BUSY; aac = (struct aac_dev *)scsicmd->device->host->hostdata; + if (aac->in_reset) + return SCSI_MLQUEUE_HOST_BUSY; + /* * Allocate and initialize a Fib */ @@ -1504,6 +1509,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) case INQUIRY: case READ_CAPACITY: case TEST_UNIT_READY: + if (dev->in_reset) + return -1; spin_unlock_irq(host->host_lock); aac_probe_container(dev, cid); if ((fsa_dev_ptr[cid].valid & 1) == 0) @@ -1529,6 +1536,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) } } else { /* check for physical non-dasd devices */ if(dev->nondasd_support == 1){ + if (dev->in_reset) + return -1; return aac_send_srb_fib(scsicmd); } else { scsicmd->result = DID_NO_CONNECT << 16; @@ -1584,6 +1593,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) scsicmd->scsi_done(scsicmd); return 0; } + if (dev->in_reset) + return -1; setinqstr(dev, (void *) (inq_data.inqd_vid), fsa_dev_ptr[cid].type); inq_data.inqd_pdt = INQD_PDT_DA; /* Direct/random access device */ aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data)); @@ -1739,6 +1750,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) case READ_10: case READ_12: case READ_16: + if (dev->in_reset) + return -1; /* * Hack to keep track of ordinal number of the device that * corresponds to a container. Needed to convert @@ -1757,6 +1770,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) case WRITE_10: case WRITE_12: case WRITE_16: + if (dev->in_reset) + return -1; return aac_write(scsicmd, cid); case SYNCHRONIZE_CACHE: diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 05f80982efa5..8924c183d9c3 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1029,6 +1029,7 @@ struct aac_dev init->InitStructRevision==cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4) u8 raw_io_64; u8 printf_enabled; + u8 in_reset; }; #define aac_adapter_interrupt(dev) \ @@ -1789,7 +1790,7 @@ void aac_consumer_free(struct aac_dev * dev, struct aac_queue * q, u32 qnum); int aac_fib_complete(struct fib * context); #define fib_data(fibctx) ((void *)(fibctx)->hw_fib->data) struct aac_dev *aac_init_adapter(struct aac_dev *dev); -int aac_get_config_status(struct aac_dev *dev); +int aac_get_config_status(struct aac_dev *dev, int commit_flag); int aac_get_containers(struct aac_dev *dev); int aac_scsi_cmd(struct scsi_cmnd *cmd); int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg); @@ -1800,6 +1801,7 @@ int aac_sa_init(struct aac_dev *dev); unsigned int aac_response_normal(struct aac_queue * q); unsigned int aac_command_normal(struct aac_queue * q); unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index); +int aac_check_health(struct aac_dev * dev); int aac_command_thread(void *data); int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx); int aac_fib_adapter_complete(struct fib * fibptr, unsigned short size); diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 14d7aa9b7df3..da1d3a9212f8 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -298,7 +298,7 @@ return_fib: spin_unlock_irqrestore(&dev->fib_lock, flags); /* If someone killed the AIF aacraid thread, restart it */ status = !dev->aif_thread; - if (status && dev->queues && dev->fsa_dev) { + if (status && !dev->in_reset && dev->queues && dev->fsa_dev) { /* Be paranoid, be very paranoid! */ kthread_stop(dev->thread); ssleep(1); diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index c67da1321133..53add53be0bd 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -40,8 +40,10 @@ #include #include #include +#include #include #include +#include #include #include "aacraid.h" @@ -1054,6 +1056,262 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) } +static int _aac_reset_adapter(struct aac_dev *aac) +{ + int index, quirks; + u32 ret; + int retval; + struct Scsi_Host *host; + struct scsi_device *dev; + struct scsi_cmnd *command; + struct scsi_cmnd *command_list; + + /* + * Assumptions: + * - host is locked. + * - in_reset is asserted, so no new i/o is getting to the + * card. + * - The card is dead. + */ + host = aac->scsi_host_ptr; + scsi_block_requests(host); + aac_adapter_disable_int(aac); + spin_unlock_irq(host->host_lock); + kthread_stop(aac->thread); + + /* + * If a positive health, means in a known DEAD PANIC + * state and the adapter could be reset to `try again'. + */ + retval = aac_adapter_check_health(aac); + if (retval == 0) + retval = aac_adapter_sync_cmd(aac, IOP_RESET_ALWAYS, + 0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL); + if (retval) + retval = aac_adapter_sync_cmd(aac, IOP_RESET, + 0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL); + + if (retval) + goto out; + if (ret != 0x00000001) { + retval = -ENODEV; + goto out; + } + + index = aac->cardtype; + + /* + * Re-initialize the adapter, first free resources, then carefully + * apply the initialization sequence to come back again. Only risk + * is a change in Firmware dropping cache, it is assumed the caller + * will ensure that i/o is queisced and the card is flushed in that + * case. + */ + aac_fib_map_free(aac); + aac->hw_fib_va = NULL; + aac->hw_fib_pa = 0; + pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys); + aac->comm_addr = NULL; + aac->comm_phys = 0; + kfree(aac->queues); + aac->queues = NULL; + free_irq(aac->pdev->irq, aac); + kfree(aac->fsa_dev); + aac->fsa_dev = NULL; + if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) { + if (((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) || + ((retval = pci_set_consistent_dma_mask(aac->pdev, DMA_32BIT_MASK)))) + goto out; + } else { + if (((retval = pci_set_dma_mask(aac->pdev, 0x7FFFFFFFULL))) || + ((retval = pci_set_consistent_dma_mask(aac->pdev, 0x7FFFFFFFULL)))) + goto out; + } + if ((retval = (*(aac_get_driver_ident(index)->init))(aac))) + goto out; + if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) + if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) + goto out; + aac->thread = kthread_run(aac_command_thread, aac, aac->name); + if (IS_ERR(aac->thread)) { + retval = PTR_ERR(aac->thread); + goto out; + } + (void)aac_get_adapter_info(aac); + quirks = aac_get_driver_ident(index)->quirks; + if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) { + host->sg_tablesize = 34; + host->max_sectors = (host->sg_tablesize * 8) + 112; + } + if ((quirks & AAC_QUIRK_17SG) && (host->sg_tablesize > 17)) { + host->sg_tablesize = 17; + host->max_sectors = (host->sg_tablesize * 8) + 112; + } + aac_get_config_status(aac, 1); + aac_get_containers(aac); + /* + * This is where the assumption that the Adapter is quiesced + * is important. + */ + command_list = NULL; + __shost_for_each_device(dev, host) { + unsigned long flags; + spin_lock_irqsave(&dev->list_lock, flags); + list_for_each_entry(command, &dev->cmd_list, list) + if (command->SCp.phase == AAC_OWNER_FIRMWARE) { + command->SCp.buffer = (struct scatterlist *)command_list; + command_list = command; + } + spin_unlock_irqrestore(&dev->list_lock, flags); + } + while ((command = command_list)) { + command_list = (struct scsi_cmnd *)command->SCp.buffer; + command->SCp.buffer = NULL; + command->result = DID_OK << 16 + | COMMAND_COMPLETE << 8 + | SAM_STAT_TASK_SET_FULL; + command->SCp.phase = AAC_OWNER_ERROR_HANDLER; + command->scsi_done(command); + } + retval = 0; + +out: + aac->in_reset = 0; + scsi_unblock_requests(host); + spin_lock_irq(host->host_lock); + return retval; +} + +int aac_check_health(struct aac_dev * aac) +{ + int BlinkLED; + unsigned long time_now, flagv = 0; + struct list_head * entry; + struct Scsi_Host * host; + + /* Extending the scope of fib_lock slightly to protect aac->in_reset */ + if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0) + return 0; + + if (aac->in_reset || !(BlinkLED = aac_adapter_check_health(aac))) { + spin_unlock_irqrestore(&aac->fib_lock, flagv); + return 0; /* OK */ + } + + aac->in_reset = 1; + + /* Fake up an AIF: + * aac_aifcmd.command = AifCmdEventNotify = 1 + * aac_aifcmd.seqnum = 0xFFFFFFFF + * aac_aifcmd.data[0] = AifEnExpEvent = 23 + * aac_aifcmd.data[1] = AifExeFirmwarePanic = 3 + * aac.aifcmd.data[2] = AifHighPriority = 3 + * aac.aifcmd.data[3] = BlinkLED + */ + + time_now = jiffies/HZ; + entry = aac->fib_list.next; + + /* + * For each Context that is on the + * fibctxList, make a copy of the + * fib, and then set the event to wake up the + * thread that is waiting for it. + */ + while (entry != &aac->fib_list) { + /* + * Extract the fibctx + */ + struct aac_fib_context *fibctx = list_entry(entry, struct aac_fib_context, next); + struct hw_fib * hw_fib; + struct fib * fib; + /* + * Check if the queue is getting + * backlogged + */ + if (fibctx->count > 20) { + /* + * It's *not* jiffies folks, + * but jiffies / HZ, so do not + * panic ... + */ + u32 time_last = fibctx->jiffies; + /* + * Has it been > 2 minutes + * since the last read off + * the queue? + */ + if ((time_now - time_last) > aif_timeout) { + entry = entry->next; + aac_close_fib_context(aac, fibctx); + continue; + } + } + /* + * Warning: no sleep allowed while + * holding spinlock + */ + hw_fib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC); + fib = kmalloc(sizeof(struct fib), GFP_ATOMIC); + if (fib && hw_fib) { + struct aac_aifcmd * aif; + + memset(hw_fib, 0, sizeof(struct hw_fib)); + memset(fib, 0, sizeof(struct fib)); + fib->hw_fib = hw_fib; + fib->dev = aac; + aac_fib_init(fib); + fib->type = FSAFS_NTC_FIB_CONTEXT; + fib->size = sizeof (struct fib); + fib->data = hw_fib->data; + aif = (struct aac_aifcmd *)hw_fib->data; + aif->command = cpu_to_le32(AifCmdEventNotify); + aif->seqnum = cpu_to_le32(0xFFFFFFFF); + aif->data[0] = cpu_to_le32(AifEnExpEvent); + aif->data[1] = cpu_to_le32(AifExeFirmwarePanic); + aif->data[2] = cpu_to_le32(AifHighPriority); + aif->data[3] = cpu_to_le32(BlinkLED); + + /* + * Put the FIB onto the + * fibctx's fibs + */ + list_add_tail(&fib->fiblink, &fibctx->fib_list); + fibctx->count++; + /* + * Set the event to wake up the + * thread that will waiting. + */ + up(&fibctx->wait_sem); + } else { + printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); + kfree(fib); + kfree(hw_fib); + } + entry = entry->next; + } + + spin_unlock_irqrestore(&aac->fib_lock, flagv); + + if (BlinkLED < 0) { + printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED); + goto out; + } + + printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED); + + host = aac->scsi_host_ptr; + spin_lock_irqsave(host->host_lock, flagv); + BlinkLED = _aac_reset_adapter(aac); + spin_unlock_irqrestore(host->host_lock, flagv); + return BlinkLED; + +out: + aac->in_reset = 0; + return BlinkLED; +} + + /** * aac_command_thread - command processing thread * @dev: Adapter to monitor diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 9d8b550a91cb..d67058f80816 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -454,17 +454,17 @@ static int aac_eh_reset(struct scsi_cmnd* cmd) printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n", AAC_DRIVERNAME); aac = (struct aac_dev *)host->hostdata; - if (aac_adapter_check_health(aac)) { - printk(KERN_ERR "%s: Host adapter appears dead\n", - AAC_DRIVERNAME); - return -ENODEV; - } + + if ((count = aac_check_health(aac))) + return count; /* * Wait for all commands to complete to this specific * target (block maximum 60 seconds). */ for (count = 60; count; --count) { - int active = 0; + int active = aac->in_reset; + + if (active == 0) __shost_for_each_device(dev, host) { spin_lock_irqsave(&dev->list_lock, flags); list_for_each_entry(command, &dev->cmd_list, list) { @@ -933,7 +933,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev, else shost->max_channel = 0; - aac_get_config_status(aac); + aac_get_config_status(aac, 0); aac_get_containers(aac); list_add(&aac->entry, insert); From 84961f28e9d13a4b193d0c8545f3c060c1890ff3 Mon Sep 17 00:00:00 2001 From: dave wysochanski Date: Wed, 9 Aug 2006 14:56:32 -0400 Subject: [PATCH 0107/1063] [SCSI] Don't add scsi_device for devices that return PQ=1, PDT=0x1f Some targets may return slight variations of PQ and PDT to indicate no LUN mapped. USB UFI setting PDT=0x1f but having reserved bits for PQ is one example, and NetApp targets returning PQ=1 and PDT=0x1f is another. Both instances seem like reasonable responses according to SPC-3 and UFI specs. The current scsi_probe_and_add_lun() code adds a scsi_device for targets that return PQ=1 and PDT=0x1f. This causes LUNs of type "UNKNOWN" to show up in /proc/scsi/scsi when no LUNs are mapped. In addition, subsequent rescans fail to recognize LUNs that may be added on the target, unless preceded by a write to the delete attribute of the "UNKNOWN" LUN. This patch addresses this problem by skipping over the scsi_add_lun() when PQ=1,PDT=0x1f is encountered, and just returns SCSI_SCAN_TARGET_PRESENT. Signed-off-by: Dave Wysochanski Signed-off-by: Christoph Hellwig Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 114e2067dce5..a24d3461fc78 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -893,11 +893,26 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, } /* - * Non-standard SCSI targets may set the PDT to 0x1f (unknown or - * no device type) instead of using the Peripheral Qualifier to - * indicate that no LUN is present. For example, USB UFI does this. + * Some targets may set slight variations of PQ and PDT to signal + * that no LUN is present, so don't add sdev in these cases. + * Two specific examples are: + * 1) NetApp targets: return PQ=1, PDT=0x1f + * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved" + * in the UFI 1.0 spec (we cannot rely on reserved bits). + * + * References: + * 1) SCSI SPC-3, pp. 145-146 + * PQ=1: "A peripheral device having the specified peripheral + * device type is not connected to this logical unit. However, the + * device server is capable of supporting the specified peripheral + * device type on this logical unit." + * PDT=0x1f: "Unknown or no device type" + * 2) USB UFI 1.0, p. 20 + * PDT=00h Direct-access device (floppy) + * PDT=1Fh none (no FDD connected to the requested logical unit) */ - if (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f) { + if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) && + (result[0] & 0x1f) == 0x1f) { SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO "scsi scan: peripheral device type" " of 31, no device added\n")); From a2f5d4d94f0ab9560b9a99d73d5b86b377c7f201 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 10 Aug 2006 21:41:13 -0400 Subject: [PATCH 0108/1063] [SCSI] remove unnecessary includes of linux/config.h from drivers/scsi/ kbuild includes this automatically these days. Signed-off-by: Dave Jones Signed-off-by: James Bottomley --- drivers/scsi/aic7xxx_old.c | 2 -- drivers/scsi/dpt_i2o.c | 1 - drivers/scsi/hptiop.c | 1 - drivers/scsi/libata-eh.c | 1 - drivers/scsi/scsi.h | 2 -- 5 files changed, 7 deletions(-) diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 3f85b5e978f1..ba3bccafe113 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -249,8 +249,6 @@ #include #include /* for kmalloc() */ -#include /* for CONFIG_PCI */ - #define AIC7XXX_C_VERSION "5.2.6" #define ALL_TARGETS -1 diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index e1337339cacc..45806336ce02 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -46,7 +46,6 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver"); #include #include /* for kmalloc() */ -#include /* for CONFIG_PCI */ #include /* for PCI support */ #include #include diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index ab2f8b267908..6b41c2ef6e21 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -15,7 +15,6 @@ * * For more information, visit http://www.highpoint-tech.com */ -#include #include #include #include diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 4b6aa30f4d68..b3095fd92863 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -32,7 +32,6 @@ * */ -#include #include #include #include diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h index f51e466893e7..d5a55fae60e0 100644 --- a/drivers/scsi/scsi.h +++ b/drivers/scsi/scsi.h @@ -20,8 +20,6 @@ #ifndef _SCSI_H #define _SCSI_H -#include /* for CONFIG_SCSI_LOGGING */ - #include #include #include From 016131b8fffa1085b4ad165ab228116fdc278ebe Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 14 Aug 2006 08:20:25 -0400 Subject: [PATCH 0109/1063] [SCSI] fc transport: convert fc_host symbolic_name attribute to a dynamic attribute Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 4 ++-- include/scsi/scsi_transport_fc.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index b03aa85108e5..c1c5cdffca38 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -815,7 +815,6 @@ fc_private_host_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long); fc_private_host_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long); fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20, unsigned long long); -fc_private_host_rd_attr(symbolic_name, "%s\n", (FC_SYMBOLIC_NAME_SIZE +1)); fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20); fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1)); @@ -858,6 +857,7 @@ fc_host_rd_attr(port_id, "0x%06x\n", 20); fc_host_rd_enum_attr(port_type, FC_PORTTYPE_MAX_NAMELEN); fc_host_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN); fc_host_rd_attr_cast(fabric_name, "0x%llx\n", 20, unsigned long long); +fc_host_rd_attr(symbolic_name, "%s\n", FC_SYMBOLIC_NAME_SIZE + 1); /* Private Host Attributes */ @@ -1223,7 +1223,6 @@ fc_attach_transport(struct fc_function_template *ft) SETUP_HOST_ATTRIBUTE_RD(permanent_port_name); SETUP_HOST_ATTRIBUTE_RD(supported_classes); SETUP_HOST_ATTRIBUTE_RD(supported_fc4s); - SETUP_HOST_ATTRIBUTE_RD(symbolic_name); SETUP_HOST_ATTRIBUTE_RD(supported_speeds); SETUP_HOST_ATTRIBUTE_RD(maxframe_size); SETUP_HOST_ATTRIBUTE_RD(serial_number); @@ -1234,6 +1233,7 @@ fc_attach_transport(struct fc_function_template *ft) SETUP_HOST_ATTRIBUTE_RD(active_fc4s); SETUP_HOST_ATTRIBUTE_RD(speed); SETUP_HOST_ATTRIBUTE_RD(fabric_name); + SETUP_HOST_ATTRIBUTE_RD(symbolic_name); /* Transport-managed attributes */ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type); diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 6d28b0317657..b7f62b85f0b3 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -409,6 +409,7 @@ struct fc_function_template { void (*get_host_active_fc4s)(struct Scsi_Host *); void (*get_host_speed)(struct Scsi_Host *); void (*get_host_fabric_name)(struct Scsi_Host *); + void (*get_host_symbolic_name)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); @@ -445,7 +446,6 @@ struct fc_function_template { unsigned long show_host_permanent_port_name:1; unsigned long show_host_supported_classes:1; unsigned long show_host_supported_fc4s:1; - unsigned long show_host_symbolic_name:1; unsigned long show_host_supported_speeds:1; unsigned long show_host_maxframe_size:1; unsigned long show_host_serial_number:1; @@ -456,6 +456,7 @@ struct fc_function_template { unsigned long show_host_active_fc4s:1; unsigned long show_host_speed:1; unsigned long show_host_fabric_name:1; + unsigned long show_host_symbolic_name:1; }; From 2b6ee9b5295460017fc1bc3d60545512df280908 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Aug 2006 23:09:23 -0700 Subject: [PATCH 0110/1063] [SCSI] aic7*: cleanup MODULE_PARM_DESC strings Modify beginning string to be more readable. Remove one trailing newline. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- drivers/scsi/aic7xxx/aic79xx_osm.c | 4 ++-- drivers/scsi/aic7xxx/aic7xxx_osm.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 998999c0a972..c7eeaced324a 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -321,7 +321,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(AIC79XX_DRIVER_VERSION); module_param(aic79xx, charp, 0444); MODULE_PARM_DESC(aic79xx, -"period delimited, options string.\n" +"period-delimited options string:\n" " verbose Enable verbose/diagnostic logging\n" " allow_memio Allow device registers to be memory mapped\n" " debug Bitmask of debug values to enable\n" @@ -346,7 +346,7 @@ MODULE_PARM_DESC(aic79xx, " Shorten the selection timeout to 128ms\n" "\n" " options aic79xx 'aic79xx=verbose.tag_info:{{}.{}.{..10}}.seltime:1'\n" -"\n"); +); static void ahd_linux_handle_scsi_status(struct ahd_softc *, struct scsi_device *, diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index aa4be8a31415..e5bb4d87b307 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -341,7 +341,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(AIC7XXX_DRIVER_VERSION); module_param(aic7xxx, charp, 0444); MODULE_PARM_DESC(aic7xxx, -"period delimited, options string.\n" +"period-delimited options string:\n" " verbose Enable verbose/diagnostic logging\n" " allow_memio Allow device registers to be memory mapped\n" " debug Bitmask of debug values to enable\n" From f3d7271c5ac9029d19fc0252a85bc045334382cc Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Tue, 15 Aug 2006 11:17:21 +0200 Subject: [PATCH 0111/1063] [SCSI] convert to PCI_DEVICE() macro Convert the pci_device_id-table of the megaraid_sas-driver to the PCI_DEVICE-macro, to safe some lines. Signed-off-by: Henrik Kretzschmar Acked-by: "Patro, Sumant" Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 34 ++++++++-------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index a8c9627a15c4..b36307d2e283 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -53,31 +53,15 @@ MODULE_DESCRIPTION("LSI Logic MegaRAID SAS Driver"); */ static struct pci_device_id megasas_pci_table[] = { - { - PCI_VENDOR_ID_LSI_LOGIC, - PCI_DEVICE_ID_LSI_SAS1064R, /* xscale IOP */ - PCI_ANY_ID, - PCI_ANY_ID, - }, - { - PCI_VENDOR_ID_LSI_LOGIC, - PCI_DEVICE_ID_LSI_SAS1078R, /* ppc IOP */ - PCI_ANY_ID, - PCI_ANY_ID, - }, - { - PCI_VENDOR_ID_LSI_LOGIC, - PCI_DEVICE_ID_LSI_VERDE_ZCR, /* xscale IOP, vega */ - PCI_ANY_ID, - PCI_ANY_ID, - }, - { - PCI_VENDOR_ID_DELL, - PCI_DEVICE_ID_DELL_PERC5, /* xscale IOP */ - PCI_ANY_ID, - PCI_ANY_ID, - }, - {0} /* Terminating entry */ + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064R)}, + /* xscale IOP */ + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1078R)}, + /* ppc IOP */ + {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_VERDE_ZCR)}, + /* xscale IOP, vega */ + {PCI_DEVICE(PCI_VENDOR_ID_DELL, PCI_DEVICE_ID_DELL_PERC5)}, + /* xscale IOP */ + {} }; MODULE_DEVICE_TABLE(pci, megasas_pci_table); From b8d08210126a7b769b857720a59721a453a57a1e Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 17 Aug 2006 08:00:43 -0400 Subject: [PATCH 0112/1063] [SCSI] fc transport: add fc_host system_hostname attribute and u64_to_wwn() This patch updates the fc transport for the following: - Addition of a new attribute "system_hostname" which can be used to set the fully qualified hostname that the fc_host is attached to. The fc_host can then register this string as the FDMI-based host name attribute. Note: for NPIV, a fc_host could be associated with a system which is not the local system. - Add the inline function u64_to_wwn(), which is the inverse of the existing wwn_to_u64() function. - Slight reorg, just to keep dynamic attributes with each other, etc Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 30 +++++++++++++++++++++++-- include/scsi/scsi_transport_fc.h | 38 +++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index c1c5cdffca38..79d31ca2b741 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -301,8 +301,6 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, fc_host->supported_classes = FC_COS_UNSPECIFIED; memset(fc_host->supported_fc4s, 0, sizeof(fc_host->supported_fc4s)); - memset(fc_host->symbolic_name, 0, - sizeof(fc_host->symbolic_name)); fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN; fc_host->maxframe_size = -1; memset(fc_host->serial_number, 0, @@ -315,6 +313,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, sizeof(fc_host->active_fc4s)); fc_host->speed = FC_PORTSPEED_UNKNOWN; fc_host->fabric_name = -1; + memset(fc_host->symbolic_name, 0, sizeof(fc_host->symbolic_name)); + memset(fc_host->system_hostname, 0, sizeof(fc_host->system_hostname)); fc_host->tgtid_bind_type = FC_TGTID_BIND_BY_WWPN; @@ -688,6 +688,25 @@ store_fc_host_##field(struct class_device *cdev, const char *buf, \ return count; \ } +#define fc_host_store_str_function(field, slen) \ +static ssize_t \ +store_fc_host_##field(struct class_device *cdev, const char *buf, \ + size_t count) \ +{ \ + struct Scsi_Host *shost = transport_class_to_shost(cdev); \ + struct fc_internal *i = to_fc_internal(shost->transportt); \ + unsigned int cnt=count; \ + \ + /* count may include a LF at end of string */ \ + if (buf[cnt-1] == '\n') \ + cnt--; \ + if (cnt > ((slen) - 1)) \ + return -EINVAL; \ + memcpy(fc_host_##field(shost), buf, cnt); \ + i->f->set_host_##field(shost); \ + return count; \ +} + #define fc_host_rd_attr(field, format_string, sz) \ fc_host_show_function(field, format_string, sz, ) \ static FC_CLASS_DEVICE_ATTR(host, field, S_IRUGO, \ @@ -859,6 +878,12 @@ fc_host_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN); fc_host_rd_attr_cast(fabric_name, "0x%llx\n", 20, unsigned long long); fc_host_rd_attr(symbolic_name, "%s\n", FC_SYMBOLIC_NAME_SIZE + 1); +fc_private_host_show_function(system_hostname, "%s\n", + FC_SYMBOLIC_NAME_SIZE + 1, ) +fc_host_store_str_function(system_hostname, FC_SYMBOLIC_NAME_SIZE) +static FC_CLASS_DEVICE_ATTR(host, system_hostname, S_IRUGO | S_IWUSR, + show_fc_host_system_hostname, store_fc_host_system_hostname); + /* Private Host Attributes */ @@ -1234,6 +1259,7 @@ fc_attach_transport(struct fc_function_template *ft) SETUP_HOST_ATTRIBUTE_RD(speed); SETUP_HOST_ATTRIBUTE_RD(fabric_name); SETUP_HOST_ATTRIBUTE_RD(symbolic_name); + SETUP_HOST_ATTRIBUTE_RW(system_hostname); /* Transport-managed attributes */ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type); diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index b7f62b85f0b3..c74be5dabfeb 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -312,7 +312,6 @@ struct fc_host_attrs { u64 permanent_port_name; u32 supported_classes; u8 supported_fc4s[FC_FC4_LIST_SIZE]; - char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; u32 supported_speeds; u32 maxframe_size; char serial_number[FC_SERIAL_NUMBER_SIZE]; @@ -324,6 +323,8 @@ struct fc_host_attrs { u8 active_fc4s[FC_FC4_LIST_SIZE]; u32 speed; u64 fabric_name; + char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; + char system_hostname[FC_SYMBOLIC_NAME_SIZE]; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -354,8 +355,6 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->supported_classes) #define fc_host_supported_fc4s(x) \ (((struct fc_host_attrs *)(x)->shost_data)->supported_fc4s) -#define fc_host_symbolic_name(x) \ - (((struct fc_host_attrs *)(x)->shost_data)->symbolic_name) #define fc_host_supported_speeds(x) \ (((struct fc_host_attrs *)(x)->shost_data)->supported_speeds) #define fc_host_maxframe_size(x) \ @@ -374,6 +373,10 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->speed) #define fc_host_fabric_name(x) \ (((struct fc_host_attrs *)(x)->shost_data)->fabric_name) +#define fc_host_symbolic_name(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->symbolic_name) +#define fc_host_system_hostname(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->system_hostname) #define fc_host_tgtid_bind_type(x) \ (((struct fc_host_attrs *)(x)->shost_data)->tgtid_bind_type) #define fc_host_rports(x) \ @@ -410,6 +413,7 @@ struct fc_function_template { void (*get_host_speed)(struct Scsi_Host *); void (*get_host_fabric_name)(struct Scsi_Host *); void (*get_host_symbolic_name)(struct Scsi_Host *); + void (*set_host_system_hostname)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); @@ -457,6 +461,7 @@ struct fc_function_template { unsigned long show_host_speed:1; unsigned long show_host_fabric_name:1; unsigned long show_host_symbolic_name:1; + unsigned long show_host_system_hostname:1; }; @@ -492,6 +497,25 @@ fc_remote_port_chkready(struct fc_rport *rport) return result; } +static inline u64 wwn_to_u64(u8 *wwn) +{ + return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 | + (u64)wwn[2] << 40 | (u64)wwn[3] << 32 | + (u64)wwn[4] << 24 | (u64)wwn[5] << 16 | + (u64)wwn[6] << 8 | (u64)wwn[7]; +} + +static inline void u64_to_wwn(u64 inm, u8 *wwn) +{ + wwn[0] = (inm >> 56) & 0xff; + wwn[1] = (inm >> 48) & 0xff; + wwn[2] = (inm >> 40) & 0xff; + wwn[3] = (inm >> 32) & 0xff; + wwn[4] = (inm >> 24) & 0xff; + wwn[5] = (inm >> 16) & 0xff; + wwn[6] = (inm >> 8) & 0xff; + wwn[7] = inm & 0xff; +} struct scsi_transport_template *fc_attach_transport( struct fc_function_template *); @@ -503,12 +527,4 @@ void fc_remote_port_delete(struct fc_rport *rport); void fc_remote_port_rolechg(struct fc_rport *rport, u32 roles); int scsi_is_fc_rport(const struct device *); -static inline u64 wwn_to_u64(u8 *wwn) -{ - return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 | - (u64)wwn[2] << 40 | (u64)wwn[3] << 32 | - (u64)wwn[4] << 24 | (u64)wwn[5] << 16 | - (u64)wwn[6] << 8 | (u64)wwn[7]; -} - #endif /* SCSI_TRANSPORT_FC_H */ From 4041b9cd87d97a7c73a5bf5a9305dffee2599386 Mon Sep 17 00:00:00 2001 From: Michal Piotrowski Date: Thu, 17 Aug 2006 13:28:22 +0000 Subject: [PATCH 0113/1063] [SCSI] megaraid_sas: pci_module_init to pci_register_driver conversion Signed-off-by: Michal Piotrowski Acked-by: "Patro, Sumant" Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index b36307d2e283..4cab5b534b25 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -2838,7 +2838,7 @@ static int __init megasas_init(void) /* * Register ourselves as PCI hotplug module */ - rval = pci_module_init(&megasas_pci_driver); + rval = pci_register_driver(&megasas_pci_driver); if (rval) { printk(KERN_DEBUG "megasas: PCI hotplug regisration failed \n"); From d2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Mon, 14 Aug 2006 23:09:23 -0700 Subject: [PATCH 0114/1063] [SCSI] BusLogic gcc 4.1 warning fixes - Reworked all the very long lines in that block (this drivers full of them though) - Returns an error in three places that it didn't before. - Properly clean up after a scsi_add_host() failure. Signed-off-by: Daniel Walker Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- drivers/scsi/BusLogic.c | 46 +++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 16a12a3b7b2b..59d1adaed73e 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -2176,6 +2176,7 @@ static int __init BusLogic_init(void) { int BusLogicHostAdapterCount = 0, DriverOptionsIndex = 0, ProbeIndex; struct BusLogic_HostAdapter *PrototypeHostAdapter; + int ret = 0; #ifdef MODULE if (BusLogic) @@ -2282,25 +2283,49 @@ static int __init BusLogic_init(void) perform Target Device Inquiry. */ if (BusLogic_ReadHostAdapterConfiguration(HostAdapter) && - BusLogic_ReportHostAdapterConfiguration(HostAdapter) && BusLogic_AcquireResources(HostAdapter) && BusLogic_CreateInitialCCBs(HostAdapter) && BusLogic_InitializeHostAdapter(HostAdapter) && BusLogic_TargetDeviceInquiry(HostAdapter)) { + BusLogic_ReportHostAdapterConfiguration(HostAdapter) && + BusLogic_AcquireResources(HostAdapter) && + BusLogic_CreateInitialCCBs(HostAdapter) && + BusLogic_InitializeHostAdapter(HostAdapter) && + BusLogic_TargetDeviceInquiry(HostAdapter)) { /* Initialization has been completed successfully. Release and re-register usage of the I/O Address range so that the Model Name of the Host Adapter will appear, and initialize the SCSI Host structure. */ - release_region(HostAdapter->IO_Address, HostAdapter->AddressCount); - if (!request_region(HostAdapter->IO_Address, HostAdapter->AddressCount, HostAdapter->FullModelName)) { - printk(KERN_WARNING "BusLogic: Release and re-register of " "port 0x%04lx failed \n", (unsigned long) HostAdapter->IO_Address); + release_region(HostAdapter->IO_Address, + HostAdapter->AddressCount); + if (!request_region(HostAdapter->IO_Address, + HostAdapter->AddressCount, + HostAdapter->FullModelName)) { + printk(KERN_WARNING + "BusLogic: Release and re-register of " + "port 0x%04lx failed \n", + (unsigned long)HostAdapter->IO_Address); BusLogic_DestroyCCBs(HostAdapter); BusLogic_ReleaseResources(HostAdapter); list_del(&HostAdapter->host_list); scsi_host_put(Host); + ret = -ENOMEM; } else { - BusLogic_InitializeHostStructure(HostAdapter, Host); - scsi_add_host(Host, HostAdapter->PCI_Device ? &HostAdapter->PCI_Device->dev : NULL); - scsi_scan_host(Host); - BusLogicHostAdapterCount++; + BusLogic_InitializeHostStructure(HostAdapter, + Host); + if (scsi_add_host(Host, HostAdapter->PCI_Device + ? &HostAdapter->PCI_Device->dev + : NULL)) { + printk(KERN_WARNING + "BusLogic: scsi_add_host()" + "failed!\n"); + BusLogic_DestroyCCBs(HostAdapter); + BusLogic_ReleaseResources(HostAdapter); + list_del(&HostAdapter->host_list); + scsi_host_put(Host); + ret = -ENODEV; + } else { + scsi_scan_host(Host); + BusLogicHostAdapterCount++; + } } } else { /* @@ -2315,12 +2340,13 @@ static int __init BusLogic_init(void) BusLogic_ReleaseResources(HostAdapter); list_del(&HostAdapter->host_list); scsi_host_put(Host); + ret = -ENODEV; } } kfree(PrototypeHostAdapter); kfree(BusLogic_ProbeInfoList); BusLogic_ProbeInfoList = NULL; - return 0; + return ret; } @@ -2954,6 +2980,7 @@ static int BusLogic_QueueCommand(struct scsi_cmnd *Command, void (*CompletionRou } +#if 0 /* BusLogic_AbortCommand aborts Command if possible. */ @@ -3024,6 +3051,7 @@ static int BusLogic_AbortCommand(struct scsi_cmnd *Command) return SUCCESS; } +#endif /* BusLogic_ResetHostAdapter resets Host Adapter if possible, marking all currently executing SCSI Commands as having been Reset. From 11a27ad782fc7ae4b7d6ac8fefad4ceb415300d6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 9 Aug 2006 17:00:30 +1000 Subject: [PATCH 0115/1063] [POWERPC] SLB shadow buffer cleanup Cleanup some of the #define magic as suggested by Milton. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 5 +++++ arch/powerpc/kernel/entry_64.S | 13 ++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2ef7ea860379..a2f95e467a75 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -42,6 +42,7 @@ #include #include #include +#include #endif #define DEFINE(sym, val) \ @@ -137,6 +138,10 @@ int main(void) DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); + DEFINE(SLBSHADOW_STACKVSID, + offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); + DEFINE(SLBSHADOW_STACKESID, + offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5baea498ea64..2cd872b5283b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -323,11 +323,6 @@ _GLOBAL(ret_from_fork) * The code which creates the new task context is in 'copy_thread' * in arch/powerpc/kernel/process.c */ -#define SHADOW_SLB_BOLTED_STACK_ESID \ - (SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1)) -#define SHADOW_SLB_BOLTED_STACK_VSID \ - (SLBSHADOW_SAVEAREA + 0x10*(SLB_NUM_BOLTED-1) + 8) - .align 7 _GLOBAL(_switch) mflr r0 @@ -383,10 +378,10 @@ BEGIN_FTR_SECTION /* Update the last bolted SLB */ ld r9,PACA_SLBSHADOWPTR(r13) - li r12,0 - std r12,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Clear ESID */ - std r7,SHADOW_SLB_BOLTED_STACK_VSID(r9) /* Save VSID */ - std r0,SHADOW_SLB_BOLTED_STACK_ESID(r9) /* Save ESID */ + li r12,0 + std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */ + std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ + std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ slbie r6 slbie r6 /* Workaround POWER5 < DD2.1 issue */ From 9e6ee340194e8bd8f463b55c6d028272c0e64155 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Wed, 9 Aug 2006 15:28:13 -0700 Subject: [PATCH 0116/1063] [POWERPC] cell: interrupt.c whitespace clean up Whitespace clean up for cell/interrupt.c. Signed-off-by: Geoff Levand Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/cell/interrupt.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 7813a58e0db4..6b57a47c5d37 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -89,17 +89,17 @@ static struct irq_chip iic_chip = { /* Get an IRQ number from the pending state register of the IIC */ static unsigned int iic_get_irq(struct pt_regs *regs) { - struct cbe_iic_pending_bits pending; - struct iic *iic; + struct cbe_iic_pending_bits pending; + struct iic *iic; - iic = &__get_cpu_var(iic); - *(unsigned long *) &pending = - in_be64((unsigned long __iomem *) &iic->regs->pending_destr); - iic->eoi_stack[++iic->eoi_ptr] = pending.prio; - BUG_ON(iic->eoi_ptr > 15); + iic = &__get_cpu_var(iic); + *(unsigned long *) &pending = + in_be64((unsigned long __iomem *) &iic->regs->pending_destr); + iic->eoi_stack[++iic->eoi_ptr] = pending.prio; + BUG_ON(iic->eoi_ptr > 15); if (pending.flags & CBE_IIC_IRQ_VALID) return irq_linear_revmap(iic->host, - iic_pending_to_hwnum(pending)); + iic_pending_to_hwnum(pending)); return NO_IRQ; } @@ -250,7 +250,7 @@ static int __init setup_iic(void) struct resource r0, r1; struct irq_host *host; int found = 0; - const u32 *np; + const u32 *np; for (dn = NULL; (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { @@ -258,7 +258,7 @@ static int __init setup_iic(void) "IBM,CBEA-Internal-Interrupt-Controller")) continue; np = get_property(dn, "ibm,interrupt-server-ranges", NULL); - if (np == NULL) { + if (np == NULL) { printk(KERN_WARNING "IIC: CPU association not found\n"); of_node_put(dn); return -ENODEV; From 2e97425197ecf85641a89e5a4868f8e147cc443f Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 11 Aug 2006 00:03:01 -0500 Subject: [PATCH 0117/1063] [POWERPC] Rename cpu_setup_power4.S to cpu_setup_ppc970.S Rename cpu_setup_power4.S to cpu_setup_ppc970.S, since that's really what it is. No functional or other changes. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/{cpu_setup_power4.S => cpu_setup_ppc970.S} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename arch/powerpc/kernel/{cpu_setup_power4.S => cpu_setup_ppc970.S} (100%) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 814f242aeb8c..bcf50031a92a 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -16,7 +16,7 @@ obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ - paca.o cpu_setup_power4.o \ + paca.o cpu_setup_ppc970.o \ firmware.o sysfs.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_ppc970.S similarity index 100% rename from arch/powerpc/kernel/cpu_setup_power4.S rename to arch/powerpc/kernel/cpu_setup_ppc970.S From f39b7a55a84e34e3074b168e30dc73b66e85261d Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 11 Aug 2006 00:07:08 -0500 Subject: [PATCH 0118/1063] [POWERPC] Cleanup CPU inits Cleanup CPU inits a bit more, Geoff Levand already did some earlier. * Move CPU state save to cpu_setup, since cpu_setup is only ever done on cpu 0 on 64-bit and save is never done more than once. * Rename __restore_cpu_setup to __restore_cpu_ppc970 and add function pointers to the cputable to use instead. Powermac always has 970 so no need to check there. * Rename __970_cpu_preinit to __cpu_preinit_ppc970 and check PVR before calling it instead of in it, it's too early to use cputable. * Rename pSeries_secondary_smp_init to generic_secondary_smp_init since everyone but powermac and iSeries use it. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/cpu_setup_ppc970.S | 99 +++++++------------------- arch/powerpc/kernel/cputable.c | 6 ++ arch/powerpc/kernel/head_64.S | 52 +++++++++----- arch/powerpc/platforms/cell/smp.c | 4 +- arch/powerpc/platforms/pseries/smp.c | 4 +- include/asm-powerpc/cputable.h | 3 + 7 files changed, 71 insertions(+), 98 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a2f95e467a75..c53acd2a6dfc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -246,6 +246,7 @@ int main(void) DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value)); DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); #ifndef CONFIG_PPC64 DEFINE(pbe_address, offsetof(struct pbe, address)); diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index f69af2c5d7b3..f619932794e8 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -16,27 +16,12 @@ #include #include -_GLOBAL(__970_cpu_preinit) - /* - * Do nothing if not running in HV mode - */ +_GLOBAL(__cpu_preinit_ppc970) + /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 beqlr - /* - * Deal only with PPC970 and PPC970FX. - */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bnelr -1: - /* Make sure HID4:rm_ci is off before MMU is turned off, that large * pages are enabled with HID4:61 and clear HID5:DCBZ_size and * HID5:DCBZ32_ill @@ -72,21 +57,6 @@ _GLOBAL(__970_cpu_preinit) isync blr -_GLOBAL(__setup_cpu_ppc970) - mfspr r0,SPRN_HID0 - li r11,5 /* clear DOZE and SLEEP */ - rldimi r0,r11,52,8 /* set NAP and DPM */ - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - sync - isync - blr - /* Definitions for the table use to save CPU states */ #define CS_HID0 0 #define CS_HID1 8 @@ -101,33 +71,28 @@ cpu_state_storage: .balign L1_CACHE_BYTES,0 .text -/* Called in normal context to backup CPU 0 state. This - * does not include cache settings. This function is also - * called for machine sleep. This does not include the MMU - * setup, BATs, etc... but rather the "special" registers - * like HID0, HID1, HID4, etc... - */ -_GLOBAL(__save_cpu_setup) - /* Some CR fields are volatile, we back it up all */ - mfcr r7 - /* Get storage ptr */ - LOAD_REG_IMMEDIATE(r5,cpu_state_storage) - - /* We only deal with 970 for now */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bne 2f - -1: /* skip if not running in HV mode */ +_GLOBAL(__setup_cpu_ppc970) + /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beq 2f + beqlr + + mfspr r0,SPRN_HID0 + li r11,5 /* clear DOZE and SLEEP */ + rldimi r0,r11,52,8 /* set NAP and DPM */ + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + sync + isync + + /* Save away cpu state */ + LOAD_REG_IMMEDIATE(r5,cpu_state_storage) /* Save HID0,1,4 and 5 */ mfspr r3,SPRN_HID0 @@ -139,35 +104,19 @@ _GLOBAL(__save_cpu_setup) mfspr r3,SPRN_HID5 std r3,CS_HID5(r5) -2: - mtcr r7 blr /* Called with no MMU context (typically MSR:IR/DR off) to * restore CPU state as backed up by the previous * function. This does not include cache setting */ -_GLOBAL(__restore_cpu_setup) - /* Get storage ptr (FIXME when using anton reloc as we - * are running with translation disabled here - */ - LOAD_REG_IMMEDIATE(r5,cpu_state_storage) - - /* We only deal with 970 for now */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bnelr - -1: /* skip if not running in HV mode */ +_GLOBAL(__restore_cpu_ppc970) + /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 beqlr + LOAD_REG_IMMEDIATE(r5,cpu_state_storage) /* Before accessing memory, we make sure rm_ci is clear */ li r0,0 mfspr r3,SPRN_HID4 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 272e43622fd6..306da4cd37a0 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -39,7 +39,10 @@ extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ +#ifdef CONFIG_PPC64 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); +extern void __restore_cpu_ppc970(void); +#endif /* CONFIG_PPC64 */ /* This table only contains "desktop" CPUs, it need to be filled with embedded * ones as well... @@ -184,6 +187,7 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", @@ -199,6 +203,7 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", @@ -214,6 +219,7 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 8, .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "ppc970", diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 6ff3cf506088..e9963d9f335a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -132,7 +132,7 @@ _GLOBAL(__secondary_hold) bne 100b #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) - LOAD_REG_IMMEDIATE(r4, .pSeries_secondary_smp_init) + LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init) mtctr r4 mr r3,r24 bctr @@ -1484,19 +1484,17 @@ fwnmi_data_area: . = 0x8000 /* - * On pSeries, secondary processors spin in the following code. + * On pSeries and most other platforms, secondary processors spin + * in the following code. * At entry, r3 = this processor's number (physical cpu id) */ -_GLOBAL(pSeries_secondary_smp_init) +_GLOBAL(generic_secondary_smp_init) mr r24,r3 /* turn on 64-bit mode */ bl .enable_64b_mode isync - /* Copy some CPU settings from CPU 0 */ - bl .__restore_cpu_setup - /* Set up a paca value for this processor. Since we have the * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. @@ -1522,15 +1520,28 @@ _GLOBAL(pSeries_secondary_smp_init) /* start. */ sync - /* Create a temp kernel stack for use before relocation is on. */ +#ifndef CONFIG_SMP + b 3b /* Never go on non-SMP */ +#else + cmpwi 0,r23,0 + beq 3b /* Loop until told to go */ + + /* See if we need to call a cpu state restore handler */ + LOAD_REG_IMMEDIATE(r23, cur_cpu_spec) + ld r23,0(r23) + ld r23,CPU_SPEC_RESTORE(r23) + cmpdi 0,r23,0 + beq 4f + ld r23,0(r23) + mtctr r23 + bctrl + +4: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD - cmpwi 0,r23,0 -#ifdef CONFIG_SMP - bne .__secondary_start + b .__secondary_start #endif - b 3b /* Loop until told to go */ #ifdef CONFIG_PPC_ISERIES _STATIC(__start_initialization_iSeries) @@ -1611,7 +1622,16 @@ _GLOBAL(__start_initialization_multiplatform) bl .enable_64b_mode /* Setup some critical 970 SPRs before switching MMU off */ - bl .__970_cpu_preinit + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + bne 2f +1: bl .__cpu_preinit_ppc970 +2: /* Switch off MMU if not already */ LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE) @@ -1782,7 +1802,7 @@ _GLOBAL(pmac_secondary_start) isync /* Copy some CPU settings from CPU 0 */ - bl .__restore_cpu_setup + bl .__restore_cpu_ppc970 /* pSeries do that early though I don't think we really need it */ mfmsr r3 @@ -1932,12 +1952,6 @@ _STATIC(start_here_multiplatform) mr r5,r26 bl .identify_cpu - /* Save some low level config HIDs of CPU0 to be copied to - * other CPUs later on, or used for suspend/resume - */ - bl .__save_cpu_setup - sync - /* Do very early kernel initializations, including initial hash table, * stab and slb setup before we turn on relocation. */ diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 46aef0640742..1c0acbad7425 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -57,7 +57,7 @@ */ static cpumask_t of_spin_map; -extern void pSeries_secondary_smp_init(unsigned long); +extern void generic_secondary_smp_init(unsigned long); /** * smp_startup_cpu() - start the given cpu @@ -74,7 +74,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) { int status; unsigned long start_here = __pa((u32)*((unsigned long *) - pSeries_secondary_smp_init)); + generic_secondary_smp_init)); unsigned int pcpu; int start_cpu; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index f39dad8b99e0..c6624b8a0e77 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -62,7 +62,7 @@ */ static cpumask_t of_spin_map; -extern void pSeries_secondary_smp_init(unsigned long); +extern void generic_secondary_smp_init(unsigned long); #ifdef CONFIG_HOTPLUG_CPU @@ -270,7 +270,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) { int status; unsigned long start_here = __pa((u32)*((unsigned long *) - pSeries_secondary_smp_init)); + generic_secondary_smp_init)); unsigned int pcpu; int start_cpu; diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 1ba3c9983614..748bc1805da9 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -36,6 +36,7 @@ struct cpu_spec; typedef void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec); +typedef void (*cpu_restore_t)(void); enum powerpc_oprofile_type { PPC_OPROFILE_INVALID = 0, @@ -65,6 +66,8 @@ struct cpu_spec { * BHT, SPD, etc... from head.S before branching to identify_machine */ cpu_setup_t cpu_setup; + /* Used to restore cpu setup on secondary processors and at resume */ + cpu_restore_t cpu_restore; /* Used by oprofile userspace to select the right counters */ char *oprofile_cpu_type; From 869d7f381e8c32de85ddfa9621125fb10a885f87 Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Tue, 15 Aug 2006 16:19:02 -0500 Subject: [PATCH 0119/1063] [POWERPC] Allow MPC8641 HPCN to build with CONFIG_PCI disabled too. Signed-off-by: Jon Loeliger Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 9 +++++++-- arch/powerpc/platforms/86xx/pci.c | 3 ++- include/asm-powerpc/mpc86xx.h | 2 -- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 4a33d95e7ad7..496cc7c3a54c 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -52,6 +52,7 @@ unsigned long pci_dram_offset = 0; #endif +#ifdef CONFIG_PCI static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) { @@ -60,14 +61,18 @@ static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc, generic_handle_irq(cascade_irq, regs); desc->chip->eoi(irq); } +#endif /* CONFIG_PCI */ void __init mpc86xx_hpcn_init_irq(void) { struct mpic *mpic1; - struct device_node *np, *cascade_node = NULL; - int cascade_irq; + struct device_node *np; phys_addr_t openpic_paddr; +#ifdef CONFIG_PCI + struct device_node *cascade_node = NULL; + int cascade_irq; +#endif np = of_find_node_by_type(NULL, "open-pic"); if (np == NULL) diff --git a/arch/powerpc/platforms/86xx/pci.c b/arch/powerpc/platforms/86xx/pci.c index d7050c1108ff..481e18ed5be9 100644 --- a/arch/powerpc/platforms/86xx/pci.c +++ b/arch/powerpc/platforms/86xx/pci.c @@ -188,7 +188,8 @@ int __init add_bridge(struct device_node *dev) printk(KERN_INFO "Found MPC86xx PCIE host bridge at 0x%08lx. " "Firmware bus number: %d->%d\n", - rsrc.start, hose->first_busno, hose->last_busno); + (unsigned long) rsrc.start, + hose->first_busno, hose->last_busno); DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", hose, hose->cfg_addr, hose->cfg_data); diff --git a/include/asm-powerpc/mpc86xx.h b/include/asm-powerpc/mpc86xx.h index f260382739fa..2d6ad859df7f 100644 --- a/include/asm-powerpc/mpc86xx.h +++ b/include/asm-powerpc/mpc86xx.h @@ -23,8 +23,6 @@ #define _ISA_MEM_BASE isa_mem_base #ifdef CONFIG_PCI #define PCI_DRAM_OFFSET pci_dram_offset -#else -#define PCI_DRAM_OFFSET 0 #endif #define CPU0_BOOT_RELEASE 0x01000000 From 9a2ded55c40ad17b8b12f87c592a40b2e8593c4d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 16 Aug 2006 23:12:14 -0500 Subject: [PATCH 0120/1063] [POWERPC] powerpc: Make RTAS console init generic The rtas console doesn't have to be Cell specific. If we get both RTAS tokens, we should just enabled the console then and there. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 3 ++- arch/powerpc/kernel/rtas.c | 5 +++++ arch/powerpc/platforms/cell/setup.c | 4 ---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 13e583f16ede..7f782b338e8e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -426,7 +426,8 @@ config PPC_IBM_CELL_BLADE select UDBG_RTAS_CONSOLE config UDBG_RTAS_CONSOLE - bool + bool "RTAS based debug console" + depends on PPC_RTAS default n config XICS diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index bfd66d3a035c..6b0699b82b41 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -910,6 +910,11 @@ int __init early_init_dt_scan_rtas(unsigned long node, basep = of_get_flat_dt_prop(node, "get-term-char", NULL); if (basep) rtas_getchar_token = *basep; + + if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && + rtas_getchar_token != RTAS_UNKNOWN_SERVICE) + udbg_init_rtas_console(); + #endif /* break now */ diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 282987d6d4a2..22c228a49c33 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -150,10 +150,6 @@ static int __init cell_probe(void) !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) return 0; -#ifdef CONFIG_UDBG_RTAS_CONSOLE - udbg_init_rtas_console(); -#endif - hpte_init_native(); return 1; From a0a428e30077fd64c39aadf5221cf2c7a14dc281 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 16 Aug 2006 15:24:28 +1000 Subject: [PATCH 0121/1063] [POWERPC] iseries: remove const warning Just one bit of fallout from the constification of the get_property return value. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/iseries/viopath.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index efeb6ae9df64..9baa4ee82592 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -117,6 +117,7 @@ static int proc_viopath_show(struct seq_file *m, void *v) HvLpEvent_Rc hvrc; DECLARE_MUTEX_LOCKED(Semaphore); struct device_node *node; + const char *sysid; buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL); if (!buf) @@ -152,15 +153,15 @@ static int proc_viopath_show(struct seq_file *m, void *v) seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); node = of_find_node_by_path("/"); - buf = NULL; + sysid = NULL; if (node != NULL) - buf = get_property(node, "system-id", NULL); + sysid = get_property(node, "system-id", NULL); - if (buf == NULL) + if (sysid == NULL) seq_printf(m, "SRLNBR=\n"); else /* Skip "IBM," on front of serial number, see dt.c */ - seq_printf(m, "SRLNBR=%s\n", buf + 4); + seq_printf(m, "SRLNBR=%s\n", sysid + 4); of_node_put(node); From 6f3d5d3cc4b1447578ae8484166bbc34a64150c5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Aug 2006 22:04:14 +1000 Subject: [PATCH 0122/1063] [POWERPC] Add a helper for calculating RTAS "config_addr" parameters Several RTAS calls take a "config_addr" parameter, which is a particular way of specifying a PCI busno, devfn and register number into a 32-bit word. Currently these are open-coded, and I'll be adding another soon, replace them with a helper that encapsulates the logic. Be more strict about masking the busno too, just in case. Booted on P5 LPAR. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/rtas_pci.c | 6 ++---- include/asm-powerpc/rtas.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 5a798ac6aecf..b4a0de79c060 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -81,8 +81,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) if (!config_access_valid(pdn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; - addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | - (pdn->devfn << 8) | (where & 0xff); + addr = rtas_config_addr(pdn->busno, pdn->devfn, where); buid = pdn->phb->buid; if (buid) { ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, @@ -134,8 +133,7 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) if (!config_access_valid(pdn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; - addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | - (pdn->devfn << 8) | (where & 0xff); + addr = rtas_config_addr(pdn->busno, pdn->devfn, where); buid = pdn->phb->buid; if (buid) { ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, diff --git a/include/asm-powerpc/rtas.h b/include/asm-powerpc/rtas.h index 82a27e9a041f..d34f9e1f242c 100644 --- a/include/asm-powerpc/rtas.h +++ b/include/asm-powerpc/rtas.h @@ -230,5 +230,21 @@ extern unsigned long rtas_rmo_buf; #define GLOBAL_INTERRUPT_QUEUE 9005 +/** + * rtas_config_addr - Format a busno, devfn and reg for RTAS. + * @busno: The bus number. + * @devfn: The device and function number as encoded by PCI_DEVFN(). + * @reg: The register number. + * + * This function encodes the given busno, devfn and register number as + * required for RTAS calls that take a "config_addr" parameter. + * See PAPR requirement 7.3.4-1 for more info. + */ +static inline u32 rtas_config_addr(int busno, int devfn, int reg) +{ + return ((reg & 0xf00) << 20) | ((busno & 0xff) << 16) | + (devfn << 8) | (reg & 0xff); +} + #endif /* __KERNEL__ */ #endif /* _POWERPC_RTAS_H */ From e2bf2e26c0915d54208315fc8c9864f1d987217a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 17 Aug 2006 16:28:28 +1000 Subject: [PATCH 0123/1063] [POWERPC] iseries: remove some gcc 4.1 warnings gcc 4.1 produces some warnings that say it is ignoring the packed attribute on some structure elements, so, since all the elements of these structs are packed, pack the structs instead. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/iseries/main_store.h | 104 ++++++++++---------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/platforms/iseries/main_store.h b/arch/powerpc/platforms/iseries/main_store.h index 74f6889f834f..1a7a3f50e40b 100644 --- a/arch/powerpc/platforms/iseries/main_store.h +++ b/arch/powerpc/platforms/iseries/main_store.h @@ -61,9 +61,9 @@ struct IoHriMainStoreSegment4 { }; /* Main Store VPD for Power4 */ -struct IoHriMainStoreChipInfo1 { - u32 chipMfgID __attribute((packed)); - char chipECLevel[4] __attribute((packed)); +struct __attribute((packed)) IoHriMainStoreChipInfo1 { + u32 chipMfgID; + char chipECLevel[4]; }; struct IoHriMainStoreVpdIdData { @@ -73,72 +73,72 @@ struct IoHriMainStoreVpdIdData { char serialNumber[12]; }; -struct IoHriMainStoreVpdFruData { - char fruLabel[8] __attribute((packed)); - u8 numberOfSlots __attribute((packed)); - u8 pluggingType __attribute((packed)); - u16 slotMapIndex __attribute((packed)); +struct __attribute((packed)) IoHriMainStoreVpdFruData { + char fruLabel[8]; + u8 numberOfSlots; + u8 pluggingType; + u16 slotMapIndex; }; -struct IoHriMainStoreAdrRangeBlock { - void *blockStart __attribute((packed)); - void *blockEnd __attribute((packed)); - u32 blockProcChipId __attribute((packed)); +struct __attribute((packed)) IoHriMainStoreAdrRangeBlock { + void *blockStart; + void *blockEnd; + u32 blockProcChipId; }; #define MaxAreaAdrRangeBlocks 4 -struct IoHriMainStoreArea4 { - u32 msVpdFormat __attribute((packed)); - u8 containedVpdType __attribute((packed)); - u8 reserved1 __attribute((packed)); - u16 reserved2 __attribute((packed)); +struct __attribute((packed)) IoHriMainStoreArea4 { + u32 msVpdFormat; + u8 containedVpdType; + u8 reserved1; + u16 reserved2; - u64 msExists __attribute((packed)); - u64 msFunctional __attribute((packed)); + u64 msExists; + u64 msFunctional; - u32 memorySize __attribute((packed)); - u32 procNodeId __attribute((packed)); + u32 memorySize; + u32 procNodeId; - u32 numAdrRangeBlocks __attribute((packed)); - struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks] __attribute((packed)); + u32 numAdrRangeBlocks; + struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks]; - struct IoHriMainStoreChipInfo1 chipInfo0 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo1 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo2 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo3 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo4 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo5 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo6 __attribute((packed)); - struct IoHriMainStoreChipInfo1 chipInfo7 __attribute((packed)); + struct IoHriMainStoreChipInfo1 chipInfo0; + struct IoHriMainStoreChipInfo1 chipInfo1; + struct IoHriMainStoreChipInfo1 chipInfo2; + struct IoHriMainStoreChipInfo1 chipInfo3; + struct IoHriMainStoreChipInfo1 chipInfo4; + struct IoHriMainStoreChipInfo1 chipInfo5; + struct IoHriMainStoreChipInfo1 chipInfo6; + struct IoHriMainStoreChipInfo1 chipInfo7; - void *msRamAreaArray __attribute((packed)); - u32 msRamAreaArrayNumEntries __attribute((packed)); - u32 msRamAreaArrayEntrySize __attribute((packed)); + void *msRamAreaArray; + u32 msRamAreaArrayNumEntries; + u32 msRamAreaArrayEntrySize; - u32 numaDimmExists __attribute((packed)); - u32 numaDimmFunctional __attribute((packed)); - void *numaDimmArray __attribute((packed)); - u32 numaDimmArrayNumEntries __attribute((packed)); - u32 numaDimmArrayEntrySize __attribute((packed)); + u32 numaDimmExists; + u32 numaDimmFunctional; + void *numaDimmArray; + u32 numaDimmArrayNumEntries; + u32 numaDimmArrayEntrySize; - struct IoHriMainStoreVpdIdData idData __attribute((packed)); + struct IoHriMainStoreVpdIdData idData; - u64 powerData __attribute((packed)); - u64 cardAssemblyPartNum __attribute((packed)); - u64 chipSerialNum __attribute((packed)); + u64 powerData; + u64 cardAssemblyPartNum; + u64 chipSerialNum; - u64 reserved3 __attribute((packed)); - char reserved4[16] __attribute((packed)); + u64 reserved3; + char reserved4[16]; - struct IoHriMainStoreVpdFruData fruData __attribute((packed)); + struct IoHriMainStoreVpdFruData fruData; - u8 vpdPortNum __attribute((packed)); - u8 reserved5 __attribute((packed)); - u8 frameId __attribute((packed)); - u8 rackUnit __attribute((packed)); - char asciiKeywordVpd[256] __attribute((packed)); - u32 reserved6 __attribute((packed)); + u8 vpdPortNum; + u8 reserved5; + u8 frameId; + u8 rackUnit; + char asciiKeywordVpd[256]; + u32 reserved6; }; From 39ed2fe62c39ac46cda00b1759806a297f38743b Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 21 Aug 2006 18:11:32 +0200 Subject: [PATCH 0124/1063] [POWERPC] reboot when panic_timout is set Only call into RTAS when booted with panic=0 because the RTAS call does not return. The system has to be rebooted via the HMC or via the management console right now. This is cumbersome and not what the default panic=180 is supposed to do. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/rtas.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6b0699b82b41..6ef80d4e38d3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -628,6 +628,9 @@ void rtas_os_term(char *str) { int status; + if (panic_timeout) + return; + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term")) return; From 271c511db9d37d6797745adb1f151a8bd2838c6f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Aug 2006 16:57:05 +0200 Subject: [PATCH 0125/1063] [POWERPC] make checkstack work with ARCH=powerpc This patch adds 'powerpc' architecture support to checkstack.pl. Signed-off-by: Johannes Berg Signed-off-by: Paul Mackerras --- scripts/checkstack.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index b34924663ac1..f7844f6aa487 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -62,6 +62,8 @@ my (@stack, $re, $x, $xs); } elsif ($arch eq 'ppc64') { #XXX $re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o; + } elsif ($arch eq 'powerpc') { + $re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o; } elsif ($arch =~ /^s390x?$/) { # 11160: a7 fb ff 60 aghi %r15,-160 $re = qr/.*ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})/o; From 2818c5dec5e28d65d52afbb7695bbbafe6377ee5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 25 Aug 2006 15:08:21 +1000 Subject: [PATCH 0126/1063] [POWERPC] Only offer CONFIG_BRIQ_PANEL if CONFIG_PPC_CHRP is enabled since only the briQ has a briQ front panel, and the briQ is a CHRP and is only supported if CONFIG_PPC_CHRP is set. Signed-off-by: Paul Mackerras --- drivers/char/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 320ad7ba11d4..52ea94b891f5 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -497,7 +497,7 @@ config LEGACY_PTY_COUNT config BRIQ_PANEL tristate 'Total Impact briQ front panel driver' - depends on PPC + depends on PPC_CHRP ---help--- The briQ is a small footprint CHRP computer with a frontpanel VFD, a tristate led and two switches. It is the size of a CDROM drive. From f4ad7b5807385ad1fed0347d966e51a797cd1013 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 25 Aug 2006 13:48:18 -0500 Subject: [PATCH 0127/1063] [SCSI] scsi_transport_sas: remove local_attached flag This flag denotes local attachment of the phy. There are two problems with it: 1) It's actually redundant ... you can get the same information simply by seeing whether a host is the phys parent 2) we condition a lot of phy parameters on it on the false assumption that we can only control local phys. I'm wiring up phy resets in the aic94xx now, and it will be able to reset non-local phys as well. I fixed 2) by moving the local check into the reset and stats function of the mptsas, since that seems to be the only HBA that can't (currently) control non-local phys. Signed-off-by: James Bottomley --- drivers/message/fusion/mptsas.c | 11 ++++++++--- drivers/scsi/scsi_transport_sas.c | 10 ++-------- include/scsi/scsi_transport_sas.h | 5 ++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index dfdd1e445768..b752a479f6db 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -852,6 +852,10 @@ static int mptsas_get_linkerrors(struct sas_phy *phy) dma_addr_t dma_handle; int error; + /* FIXME: only have link errors on local phys */ + if (!scsi_is_sas_phy_local(phy)) + return -EINVAL; + hdr.PageVersion = MPI_SASPHY1_PAGEVERSION; hdr.ExtPageLength = 0; hdr.PageNumber = 1 /* page number 1*/; @@ -924,6 +928,10 @@ static int mptsas_phy_reset(struct sas_phy *phy, int hard_reset) unsigned long timeleft; int error = -ERESTARTSYS; + /* FIXME: fusion doesn't allow non-local phy reset */ + if (!scsi_is_sas_phy_local(phy)) + return -EINVAL; + /* not implemented for expanders */ if (phy->identify.target_port_protocols & SAS_PROTOCOL_SMP) return -ENXIO; @@ -1570,9 +1578,6 @@ static int mptsas_probe_one_phy(struct device *dev, if (!phy_info->phy) { - if (local) - phy->local_attached = 1; - error = sas_phy_add(phy); if (error) { sas_phy_free(phy); diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 5a625c3fddae..d518c1207fb4 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -266,9 +266,6 @@ show_sas_phy_##field(struct class_device *cdev, char *buf) \ struct sas_internal *i = to_sas_internal(shost->transportt); \ int error; \ \ - if (!phy->local_attached) \ - return -EINVAL; \ - \ error = i->f->get_linkerrors ? i->f->get_linkerrors(phy) : 0; \ if (error) \ return error; \ @@ -299,9 +296,6 @@ static ssize_t do_sas_phy_reset(struct class_device *cdev, struct sas_internal *i = to_sas_internal(shost->transportt); int error; - if (!phy->local_attached) - return -EINVAL; - error = i->f->phy_reset(phy, hard_reset); if (error) return error; @@ -849,7 +843,7 @@ show_sas_rphy_enclosure_identifier(struct class_device *cdev, char *buf) * Only devices behind an expander are supported, because the * enclosure identifier is a SMP feature. */ - if (phy->local_attached) + if (scsi_is_sas_phy_local(phy)) return -EINVAL; error = i->f->get_enclosure_identifier(rphy, &identifier); @@ -870,7 +864,7 @@ show_sas_rphy_bay_identifier(struct class_device *cdev, char *buf) struct sas_internal *i = to_sas_internal(shost->transportt); int val; - if (phy->local_attached) + if (scsi_is_sas_phy_local(phy)) return -EINVAL; val = i->f->get_bay_identifier(rphy); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 6cc2314098cf..eeb2200de855 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -57,9 +57,6 @@ struct sas_phy { enum sas_linkrate maximum_linkrate_hw; enum sas_linkrate maximum_linkrate; - /* internal state */ - unsigned int local_attached : 1; - /* link error statistics */ u32 invalid_dword_count; u32 running_disparity_error_count; @@ -196,4 +193,6 @@ scsi_is_sas_expander_device(struct device *dev) rphy->identify.device_type == SAS_EDGE_EXPANDER_DEVICE; } +#define scsi_is_sas_phy_local(phy) scsi_is_host_device((phy)->dev.parent) + #endif /* SCSI_TRANSPORT_SAS_H */ From 2908d778ab3e244900c310974e1fc1c69066e450 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 29 Aug 2006 09:22:51 -0500 Subject: [PATCH 0128/1063] [SCSI] aic94xx: new driver This is the end point of the separate aic94xx driver based on the original driver and transport class from Luben Tuikov The log of the separate development is: Alexis Bruemmer: o aic94xx: fix hotplug/unplug for expanderless systems o aic94xx: disable split completion timer/setting by default o aic94xx: wide port off expander support o aic94xx: remove various inline functions o aic94xx: use bitops o aic94xx: remove queue comment o aic94xx: remove sas_common.c o aic94xx: sas remove depot's o aic94xx: use available list_for_each_entry_safe_reverse() o aic94xx: sas header file merge James Bottomley: o aic94xx: fix TF_TMF_NO_CTX processing o aic94xx: convert to request_firmware interface o aic94xx: fix hotplug/unplug o aic94xx: add link error counts to the expander phys o aic94xx: add transport class phy reset capability o aic94xx: remove local_attached flag o Remove README o Fixup Makefile variable for libsas rename o Rename sas->libsas o aic94xx: correct return code for sas_discover_event o aic94xx: use parent backlink port o aic94xx: remove channel abstraction o aic94xx: fix routing algorithms o aic94xx: add backlink port o aic94xx: fix cascaded expander properties o aic94xx: fix sleep under lock o aic94xx: fix panic on module removal in complex topology o aic94xx: make use of the new sas_port o rename sas_port to asd_sas_port o Fix for eh_strategy_handler move o aic94xx: move entirely over to correct transport class formulation o remove last vestages of sas_rphy_alloc() o update for eh_timed_out move o Preliminary expander support for aic94xx o sas: remove event thread o minor warning cleanups o remove last vestiges of id mapping arrays o Further updates o Convert aic94xx over entirely to the transport class end device and o update aic94xx/sas to use the new sas transport class end device o [PATCH] aic94xx: attaching to the sas transport class o Add missing completion removal from prior patch o [PATCH] aic94xx: attaching to the sas transport class o Build fixes from akpm Jeff Garzik: o [scsi aic94xx] Remove ->owner from PCI info table Luben Tuikov: o initial aic94xx driver Mike Anderson: o aic94xx: fix panic on module insertion o aic94xx: stub out SATA_DEV case o aic94xx: compile warning cleanups o aic94xx: sas_alloc_task o aic94xx: ref count update o aic94xx nexus loss time value o [PATCH] aic94xx: driver assertion in non-x86 BIOS env Randy Dunlap: o libsas: externs not needed Robert Tarte: o aic94xx: sequence patch - fixes SATA support Signed-off-by: James Bottomley --- Documentation/scsi/libsas.txt | 484 +++++ drivers/scsi/Kconfig | 5 +- drivers/scsi/Makefile | 2 + drivers/scsi/aic94xx/Kconfig | 41 + drivers/scsi/aic94xx/Makefile | 39 + drivers/scsi/aic94xx/aic94xx.h | 114 ++ drivers/scsi/aic94xx/aic94xx_dev.c | 353 ++++ drivers/scsi/aic94xx/aic94xx_dump.c | 959 ++++++++++ drivers/scsi/aic94xx/aic94xx_dump.h | 52 + drivers/scsi/aic94xx/aic94xx_hwi.c | 1376 ++++++++++++++ drivers/scsi/aic94xx/aic94xx_hwi.h | 397 ++++ drivers/scsi/aic94xx/aic94xx_init.c | 860 +++++++++ drivers/scsi/aic94xx/aic94xx_reg.c | 332 ++++ drivers/scsi/aic94xx/aic94xx_reg.h | 302 +++ drivers/scsi/aic94xx/aic94xx_reg_def.h | 2398 ++++++++++++++++++++++++ drivers/scsi/aic94xx/aic94xx_sas.h | 785 ++++++++ drivers/scsi/aic94xx/aic94xx_scb.c | 732 ++++++++ drivers/scsi/aic94xx/aic94xx_sds.c | 1136 +++++++++++ drivers/scsi/aic94xx/aic94xx_seq.c | 1401 ++++++++++++++ drivers/scsi/aic94xx/aic94xx_seq.h | 70 + drivers/scsi/aic94xx/aic94xx_task.c | 642 +++++++ drivers/scsi/aic94xx/aic94xx_tmf.c | 636 +++++++ drivers/scsi/libsas/Kconfig | 39 + drivers/scsi/libsas/Makefile | 36 + drivers/scsi/libsas/sas_discover.c | 749 ++++++++ drivers/scsi/libsas/sas_dump.c | 76 + drivers/scsi/libsas/sas_dump.h | 42 + drivers/scsi/libsas/sas_event.c | 75 + drivers/scsi/libsas/sas_expander.c | 1862 ++++++++++++++++++ drivers/scsi/libsas/sas_init.c | 227 +++ drivers/scsi/libsas/sas_internal.h | 146 ++ drivers/scsi/libsas/sas_phy.c | 157 ++ drivers/scsi/libsas/sas_port.c | 279 +++ drivers/scsi/libsas/sas_scsi_host.c | 786 ++++++++ include/scsi/libsas.h | 627 +++++++ include/scsi/sas.h | 644 +++++++ include/scsi/scsi.h | 6 + 37 files changed, 18866 insertions(+), 1 deletion(-) create mode 100644 Documentation/scsi/libsas.txt create mode 100644 drivers/scsi/aic94xx/Kconfig create mode 100644 drivers/scsi/aic94xx/Makefile create mode 100644 drivers/scsi/aic94xx/aic94xx.h create mode 100644 drivers/scsi/aic94xx/aic94xx_dev.c create mode 100644 drivers/scsi/aic94xx/aic94xx_dump.c create mode 100644 drivers/scsi/aic94xx/aic94xx_dump.h create mode 100644 drivers/scsi/aic94xx/aic94xx_hwi.c create mode 100644 drivers/scsi/aic94xx/aic94xx_hwi.h create mode 100644 drivers/scsi/aic94xx/aic94xx_init.c create mode 100644 drivers/scsi/aic94xx/aic94xx_reg.c create mode 100644 drivers/scsi/aic94xx/aic94xx_reg.h create mode 100644 drivers/scsi/aic94xx/aic94xx_reg_def.h create mode 100644 drivers/scsi/aic94xx/aic94xx_sas.h create mode 100644 drivers/scsi/aic94xx/aic94xx_scb.c create mode 100644 drivers/scsi/aic94xx/aic94xx_sds.c create mode 100644 drivers/scsi/aic94xx/aic94xx_seq.c create mode 100644 drivers/scsi/aic94xx/aic94xx_seq.h create mode 100644 drivers/scsi/aic94xx/aic94xx_task.c create mode 100644 drivers/scsi/aic94xx/aic94xx_tmf.c create mode 100644 drivers/scsi/libsas/Kconfig create mode 100644 drivers/scsi/libsas/Makefile create mode 100644 drivers/scsi/libsas/sas_discover.c create mode 100644 drivers/scsi/libsas/sas_dump.c create mode 100644 drivers/scsi/libsas/sas_dump.h create mode 100644 drivers/scsi/libsas/sas_event.c create mode 100644 drivers/scsi/libsas/sas_expander.c create mode 100644 drivers/scsi/libsas/sas_init.c create mode 100644 drivers/scsi/libsas/sas_internal.h create mode 100644 drivers/scsi/libsas/sas_phy.c create mode 100644 drivers/scsi/libsas/sas_port.c create mode 100644 drivers/scsi/libsas/sas_scsi_host.c create mode 100644 include/scsi/libsas.h create mode 100644 include/scsi/sas.h diff --git a/Documentation/scsi/libsas.txt b/Documentation/scsi/libsas.txt new file mode 100644 index 000000000000..9e2078b2a615 --- /dev/null +++ b/Documentation/scsi/libsas.txt @@ -0,0 +1,484 @@ +SAS Layer +--------- + +The SAS Layer is a management infrastructure which manages +SAS LLDDs. It sits between SCSI Core and SAS LLDDs. The +layout is as follows: while SCSI Core is concerned with +SAM/SPC issues, and a SAS LLDD+sequencer is concerned with +phy/OOB/link management, the SAS layer is concerned with: + + * SAS Phy/Port/HA event management (LLDD generates, + SAS Layer processes), + * SAS Port management (creation/destruction), + * SAS Domain discovery and revalidation, + * SAS Domain device management, + * SCSI Host registration/unregistration, + * Device registration with SCSI Core (SAS) or libata + (SATA), and + * Expander management and exporting expander control + to user space. + +A SAS LLDD is a PCI device driver. It is concerned with +phy/OOB management, and vendor specific tasks and generates +events to the SAS layer. + +The SAS Layer does most SAS tasks as outlined in the SAS 1.1 +spec. + +The sas_ha_struct describes the SAS LLDD to the SAS layer. +Most of it is used by the SAS Layer but a few fields need to +be initialized by the LLDDs. + +After initializing your hardware, from the probe() function +you call sas_register_ha(). It will register your LLDD with +the SCSI subsystem, creating a SCSI host and it will +register your SAS driver with the sysfs SAS tree it creates. +It will then return. Then you enable your phys to actually +start OOB (at which point your driver will start calling the +notify_* event callbacks). + +Structure descriptions: + +struct sas_phy -------------------- +Normally this is statically embedded to your driver's +phy structure: + struct my_phy { + blah; + struct sas_phy sas_phy; + bleh; + }; +And then all the phys are an array of my_phy in your HA +struct (shown below). + +Then as you go along and initialize your phys you also +initialize the sas_phy struct, along with your own +phy structure. + +In general, the phys are managed by the LLDD and the ports +are managed by the SAS layer. So the phys are initialized +and updated by the LLDD and the ports are initialized and +updated by the SAS layer. + +There is a scheme where the LLDD can RW certain fields, +and the SAS layer can only read such ones, and vice versa. +The idea is to avoid unnecessary locking. + +enabled -- must be set (0/1) +id -- must be set [0,MAX_PHYS) +class, proto, type, role, oob_mode, linkrate -- must be set +oob_mode -- you set this when OOB has finished and then notify +the SAS Layer. + +sas_addr -- this normally points to an array holding the sas +address of the phy, possibly somewhere in your my_phy +struct. + +attached_sas_addr -- set this when you (LLDD) receive an +IDENTIFY frame or a FIS frame, _before_ notifying the SAS +layer. The idea is that sometimes the LLDD may want to fake +or provide a different SAS address on that phy/port and this +allows it to do this. At best you should copy the sas +address from the IDENTIFY frame or maybe generate a SAS +address for SATA directly attached devices. The Discover +process may later change this. + +frame_rcvd -- this is where you copy the IDENTIFY/FIS frame +when you get it; you lock, copy, set frame_rcvd_size and +unlock the lock, and then call the event. It is a pointer +since there's no way to know your hw frame size _exactly_, +so you define the actual array in your phy struct and let +this pointer point to it. You copy the frame from your +DMAable memory to that area holding the lock. + +sas_prim -- this is where primitives go when they're +received. See sas.h. Grab the lock, set the primitive, +release the lock, notify. + +port -- this points to the sas_port if the phy belongs +to a port -- the LLDD only reads this. It points to the +sas_port this phy is part of. Set by the SAS Layer. + +ha -- may be set; the SAS layer sets it anyway. + +lldd_phy -- you should set this to point to your phy so you +can find your way around faster when the SAS layer calls one +of your callbacks and passes you a phy. If the sas_phy is +embedded you can also use container_of -- whatever you +prefer. + + +struct sas_port -------------------- +The LLDD doesn't set any fields of this struct -- it only +reads them. They should be self explanatory. + +phy_mask is 32 bit, this should be enough for now, as I +haven't heard of a HA having more than 8 phys. + +lldd_port -- I haven't found use for that -- maybe other +LLDD who wish to have internal port representation can make +use of this. + + +struct sas_ha_struct -------------------- +It normally is statically declared in your own LLDD +structure describing your adapter: +struct my_sas_ha { + blah; + struct sas_ha_struct sas_ha; + struct my_phy phys[MAX_PHYS]; + struct sas_port sas_ports[MAX_PHYS]; /* (1) */ + bleh; +}; + +(1) If your LLDD doesn't have its own port representation. + +What needs to be initialized (sample function given below). + +pcidev +sas_addr -- since the SAS layer doesn't want to mess with + memory allocation, etc, this points to statically + allocated array somewhere (say in your host adapter + structure) and holds the SAS address of the host + adapter as given by you or the manufacturer, etc. +sas_port +sas_phy -- an array of pointers to structures. (see + note above on sas_addr). + These must be set. See more notes below. +num_phys -- the number of phys present in the sas_phy array, + and the number of ports present in the sas_port + array. There can be a maximum num_phys ports (one per + port) so we drop the num_ports, and only use + num_phys. + +The event interface: + + /* LLDD calls these to notify the class of an event. */ + void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event); + void (*notify_port_event)(struct sas_phy *, enum port_event); + void (*notify_phy_event)(struct sas_phy *, enum phy_event); + +When sas_register_ha() returns, those are set and can be +called by the LLDD to notify the SAS layer of such events +the SAS layer. + +The port notification: + + /* The class calls these to notify the LLDD of an event. */ + void (*lldd_port_formed)(struct sas_phy *); + void (*lldd_port_deformed)(struct sas_phy *); + +If the LLDD wants notification when a port has been formed +or deformed it sets those to a function satisfying the type. + +A SAS LLDD should also implement at least one of the Task +Management Functions (TMFs) described in SAM: + + /* Task Management Functions. Must be called from process context. */ + int (*lldd_abort_task)(struct sas_task *); + int (*lldd_abort_task_set)(struct domain_device *, u8 *lun); + int (*lldd_clear_aca)(struct domain_device *, u8 *lun); + int (*lldd_clear_task_set)(struct domain_device *, u8 *lun); + int (*lldd_I_T_nexus_reset)(struct domain_device *); + int (*lldd_lu_reset)(struct domain_device *, u8 *lun); + int (*lldd_query_task)(struct sas_task *); + +For more information please read SAM from T10.org. + +Port and Adapter management: + + /* Port and Adapter management */ + int (*lldd_clear_nexus_port)(struct sas_port *); + int (*lldd_clear_nexus_ha)(struct sas_ha_struct *); + +A SAS LLDD should implement at least one of those. + +Phy management: + + /* Phy management */ + int (*lldd_control_phy)(struct sas_phy *, enum phy_func); + +lldd_ha -- set this to point to your HA struct. You can also +use container_of if you embedded it as shown above. + +A sample initialization and registration function +can look like this (called last thing from probe()) +*but* before you enable the phys to do OOB: + +static int register_sas_ha(struct my_sas_ha *my_ha) +{ + int i; + static struct sas_phy *sas_phys[MAX_PHYS]; + static struct sas_port *sas_ports[MAX_PHYS]; + + my_ha->sas_ha.sas_addr = &my_ha->sas_addr[0]; + + for (i = 0; i < MAX_PHYS; i++) { + sas_phys[i] = &my_ha->phys[i].sas_phy; + sas_ports[i] = &my_ha->sas_ports[i]; + } + + my_ha->sas_ha.sas_phy = sas_phys; + my_ha->sas_ha.sas_port = sas_ports; + my_ha->sas_ha.num_phys = MAX_PHYS; + + my_ha->sas_ha.lldd_port_formed = my_port_formed; + + my_ha->sas_ha.lldd_dev_found = my_dev_found; + my_ha->sas_ha.lldd_dev_gone = my_dev_gone; + + my_ha->sas_ha.lldd_max_execute_num = lldd_max_execute_num; (1) + + my_ha->sas_ha.lldd_queue_size = ha_can_queue; + my_ha->sas_ha.lldd_execute_task = my_execute_task; + + my_ha->sas_ha.lldd_abort_task = my_abort_task; + my_ha->sas_ha.lldd_abort_task_set = my_abort_task_set; + my_ha->sas_ha.lldd_clear_aca = my_clear_aca; + my_ha->sas_ha.lldd_clear_task_set = my_clear_task_set; + my_ha->sas_ha.lldd_I_T_nexus_reset= NULL; (2) + my_ha->sas_ha.lldd_lu_reset = my_lu_reset; + my_ha->sas_ha.lldd_query_task = my_query_task; + + my_ha->sas_ha.lldd_clear_nexus_port = my_clear_nexus_port; + my_ha->sas_ha.lldd_clear_nexus_ha = my_clear_nexus_ha; + + my_ha->sas_ha.lldd_control_phy = my_control_phy; + + return sas_register_ha(&my_ha->sas_ha); +} + +(1) This is normally a LLDD parameter, something of the +lines of a task collector. What it tells the SAS Layer is +whether the SAS layer should run in Direct Mode (default: +value 0 or 1) or Task Collector Mode (value greater than 1). + +In Direct Mode, the SAS Layer calls Execute Task as soon as +it has a command to send to the SDS, _and_ this is a single +command, i.e. not linked. + +Some hardware (e.g. aic94xx) has the capability to DMA more +than one task at a time (interrupt) from host memory. Task +Collector Mode is an optional feature for HAs which support +this in their hardware. (Again, it is completely optional +even if your hardware supports it.) + +In Task Collector Mode, the SAS Layer would do _natural_ +coalescing of tasks and at the appropriate moment it would +call your driver to DMA more than one task in a single HA +interrupt. DMBS may want to use this by insmod/modprobe +setting the lldd_max_execute_num to something greater than +1. + +(2) SAS 1.1 does not define I_T Nexus Reset TMF. + +Events +------ + +Events are _the only way_ a SAS LLDD notifies the SAS layer +of anything. There is no other method or way a LLDD to tell +the SAS layer of anything happening internally or in the SAS +domain. + +Phy events: + PHYE_LOSS_OF_SIGNAL, (C) + PHYE_OOB_DONE, + PHYE_OOB_ERROR, (C) + PHYE_SPINUP_HOLD. + +Port events, passed on a _phy_: + PORTE_BYTES_DMAED, (M) + PORTE_BROADCAST_RCVD, (E) + PORTE_LINK_RESET_ERR, (C) + PORTE_TIMER_EVENT, (C) + PORTE_HARD_RESET. + +Host Adapter event: + HAE_RESET + +A SAS LLDD should be able to generate + - at least one event from group C (choice), + - events marked M (mandatory) are mandatory (only one), + - events marked E (expander) if it wants the SAS layer + to handle domain revalidation (only one such). + - Unmarked events are optional. + +Meaning: + +HAE_RESET -- when your HA got internal error and was reset. + +PORTE_BYTES_DMAED -- on receiving an IDENTIFY/FIS frame +PORTE_BROADCAST_RCVD -- on receiving a primitive +PORTE_LINK_RESET_ERR -- timer expired, loss of signal, loss +of DWS, etc. (*) +PORTE_TIMER_EVENT -- DWS reset timeout timer expired (*) +PORTE_HARD_RESET -- Hard Reset primitive received. + +PHYE_LOSS_OF_SIGNAL -- the device is gone (*) +PHYE_OOB_DONE -- OOB went fine and oob_mode is valid +PHYE_OOB_ERROR -- Error while doing OOB, the device probably +got disconnected. (*) +PHYE_SPINUP_HOLD -- SATA is present, COMWAKE not sent. + +(*) should set/clear the appropriate fields in the phy, + or alternatively call the inlined sas_phy_disconnected() + which is just a helper, from their tasklet. + +The Execute Command SCSI RPC: + + int (*lldd_execute_task)(struct sas_task *, int num, + unsigned long gfp_flags); + +Used to queue a task to the SAS LLDD. @task is the tasks to +be executed. @num should be the number of tasks being +queued at this function call (they are linked listed via +task::list), @gfp_mask should be the gfp_mask defining the +context of the caller. + +This function should implement the Execute Command SCSI RPC, +or if you're sending a SCSI Task as linked commands, you +should also use this function. + +That is, when lldd_execute_task() is called, the command(s) +go out on the transport *immediately*. There is *no* +queuing of any sort and at any level in a SAS LLDD. + +The use of task::list is two-fold, one for linked commands, +the other discussed below. + +It is possible to queue up more than one task at a time, by +initializing the list element of struct sas_task, and +passing the number of tasks enlisted in this manner in num. + +Returns: -SAS_QUEUE_FULL, -ENOMEM, nothing was queued; + 0, the task(s) were queued. + +If you want to pass num > 1, then either +A) you're the only caller of this function and keep track + of what you've queued to the LLDD, or +B) you know what you're doing and have a strategy of + retrying. + +As opposed to queuing one task at a time (function call), +batch queuing of tasks, by having num > 1, greatly +simplifies LLDD code, sequencer code, and _hardware design_, +and has some performance advantages in certain situations +(DBMS). + +The LLDD advertises if it can take more than one command at +a time at lldd_execute_task(), by setting the +lldd_max_execute_num parameter (controlled by "collector" +module parameter in aic94xx SAS LLDD). + +You should leave this to the default 1, unless you know what +you're doing. + +This is a function of the LLDD, to which the SAS layer can +cater to. + +int lldd_queue_size + The host adapter's queue size. This is the maximum +number of commands the lldd can have pending to domain +devices on behalf of all upper layers submitting through +lldd_execute_task(). + +You really want to set this to something (much) larger than +1. + +This _really_ has absolutely nothing to do with queuing. +There is no queuing in SAS LLDDs. + +struct sas_task { + dev -- the device this task is destined to + list -- must be initialized (INIT_LIST_HEAD) + task_proto -- _one_ of enum sas_proto + scatter -- pointer to scatter gather list array + num_scatter -- number of elements in scatter + total_xfer_len -- total number of bytes expected to be transfered + data_dir -- PCI_DMA_... + task_done -- callback when the task has finished execution +}; + +When an external entity, entity other than the LLDD or the +SAS Layer, wants to work with a struct domain_device, it +_must_ call kobject_get() when getting a handle on the +device and kobject_put() when it is done with the device. + +This does two things: + A) implements proper kfree() for the device; + B) increments/decrements the kref for all players: + domain_device + all domain_device's ... (if past an expander) + port + host adapter + pci device + and up the ladder, etc. + +DISCOVERY +--------- + +The sysfs tree has the following purposes: + a) It shows you the physical layout of the SAS domain at + the current time, i.e. how the domain looks in the + physical world right now. + b) Shows some device parameters _at_discovery_time_. + +This is a link to the tree(1) program, very useful in +viewing the SAS domain: +ftp://mama.indstate.edu/linux/tree/ +I expect user space applications to actually create a +graphical interface of this. + +That is, the sysfs domain tree doesn't show or keep state if +you e.g., change the meaning of the READY LED MEANING +setting, but it does show you the current connection status +of the domain device. + +Keeping internal device state changes is responsibility of +upper layers (Command set drivers) and user space. + +When a device or devices are unplugged from the domain, this +is reflected in the sysfs tree immediately, and the device(s) +removed from the system. + +The structure domain_device describes any device in the SAS +domain. It is completely managed by the SAS layer. A task +points to a domain device, this is how the SAS LLDD knows +where to send the task(s) to. A SAS LLDD only reads the +contents of the domain_device structure, but it never creates +or destroys one. + +Expander management from User Space +----------------------------------- + +In each expander directory in sysfs, there is a file called +"smp_portal". It is a binary sysfs attribute file, which +implements an SMP portal (Note: this is *NOT* an SMP port), +to which user space applications can send SMP requests and +receive SMP responses. + +Functionality is deceptively simple: + +1. Build the SMP frame you want to send. The format and layout + is described in the SAS spec. Leave the CRC field equal 0. +open(2) +2. Open the expander's SMP portal sysfs file in RW mode. +write(2) +3. Write the frame you built in 1. +read(2) +4. Read the amount of data you expect to receive for the frame you built. + If you receive different amount of data you expected to receive, + then there was some kind of error. +close(2) +All this process is shown in detail in the function do_smp_func() +and its callers, in the file "expander_conf.c". + +The kernel functionality is implemented in the file +"sas_expander.c". + +The program "expander_conf.c" implements this. It takes one +argument, the sysfs file name of the SMP portal to the +expander, and gives expander information, including routing +tables. + +The SMP portal gives you complete control of the expander, +so please be careful. diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index d61662c1a0ee..7de5fdfdab67 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -209,7 +209,7 @@ config SCSI_LOGGING there should be no noticeable performance impact as long as you have logging turned off. -menu "SCSI Transport Attributes" +menu "SCSI Transports" depends on SCSI config SCSI_SPI_ATTRS @@ -242,6 +242,8 @@ config SCSI_SAS_ATTRS If you wish to export transport-specific information about each attached SAS device to sysfs, say Y. +source "drivers/scsi/libsas/Kconfig" + endmenu menu "SCSI low-level drivers" @@ -431,6 +433,7 @@ config SCSI_AIC7XXX_OLD module will be called aic7xxx_old. source "drivers/scsi/aic7xxx/Kconfig.aic79xx" +source "drivers/scsi/aic94xx/Kconfig" # All the I2O code and drivers do not seem to be 64bit safe. config SCSI_DPT_I2O diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index b2de9bfdfdcd..83da70decdd1 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_SCSI_SPI_ATTRS) += scsi_transport_spi.o obj-$(CONFIG_SCSI_FC_ATTRS) += scsi_transport_fc.o obj-$(CONFIG_SCSI_ISCSI_ATTRS) += scsi_transport_iscsi.o obj-$(CONFIG_SCSI_SAS_ATTRS) += scsi_transport_sas.o +obj-$(CONFIG_SCSI_SAS_LIBSAS) += libsas/ obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o @@ -68,6 +69,7 @@ obj-$(CONFIG_SCSI_AIC7XXX) += aic7xxx/ obj-$(CONFIG_SCSI_AIC79XX) += aic7xxx/ obj-$(CONFIG_SCSI_AACRAID) += aacraid/ obj-$(CONFIG_SCSI_AIC7XXX_OLD) += aic7xxx_old.o +obj-$(CONFIG_SCSI_AIC94XX) += aic94xx/ obj-$(CONFIG_SCSI_IPS) += ips.o obj-$(CONFIG_SCSI_FD_MCS) += fd_mcs.o obj-$(CONFIG_SCSI_FUTURE_DOMAIN)+= fdomain.o diff --git a/drivers/scsi/aic94xx/Kconfig b/drivers/scsi/aic94xx/Kconfig new file mode 100644 index 000000000000..0ed391d8ee84 --- /dev/null +++ b/drivers/scsi/aic94xx/Kconfig @@ -0,0 +1,41 @@ +# +# Kernel configuration file for aic94xx SAS/SATA driver. +# +# Copyright (c) 2005 Adaptec, Inc. All rights reserved. +# Copyright (c) 2005 Luben Tuikov +# +# This file is licensed under GPLv2. +# +# This file is part of the aic94xx driver. +# +# The aic94xx driver is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 of the +# License. +# +# The aic94xx driver is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Aic94xx Driver; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# + +config SCSI_AIC94XX + tristate "Adaptec AIC94xx SAS/SATA support" + depends on PCI + select SCSI_SAS_LIBSAS + help + This driver supports Adaptec's SAS/SATA 3Gb/s 64 bit PCI-X + AIC94xx chip based host adapters. + +config AIC94XX_DEBUG + bool "Compile in debug mode" + default y + depends on SCSI_AIC94XX + help + Compiles the aic94xx driver in debug mode. In debug mode, + the driver prints some messages to the console. diff --git a/drivers/scsi/aic94xx/Makefile b/drivers/scsi/aic94xx/Makefile new file mode 100644 index 000000000000..e6b70123940c --- /dev/null +++ b/drivers/scsi/aic94xx/Makefile @@ -0,0 +1,39 @@ +# +# Makefile for Adaptec aic94xx SAS/SATA driver. +# +# Copyright (C) 2005 Adaptec, Inc. All rights reserved. +# Copyright (C) 2005 Luben Tuikov +# +# This file is licensed under GPLv2. +# +# This file is part of the the aic94xx driver. +# +# The aic94xx driver is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 of the +# License. +# +# The aic94xx driver is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with the aic94xx driver; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +ifeq ($(CONFIG_AIC94XX_DEBUG),y) + EXTRA_CFLAGS += -DASD_DEBUG -DASD_ENTER_EXIT +endif + +obj-$(CONFIG_SCSI_AIC94XX) += aic94xx.o +aic94xx-y += aic94xx_init.o \ + aic94xx_hwi.o \ + aic94xx_reg.o \ + aic94xx_sds.o \ + aic94xx_seq.o \ + aic94xx_dump.o \ + aic94xx_scb.o \ + aic94xx_dev.o \ + aic94xx_tmf.o \ + aic94xx_task.o diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h new file mode 100644 index 000000000000..cb7caf1c9ce1 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx.h @@ -0,0 +1,114 @@ +/* + * Aic94xx SAS/SATA driver header file. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id: //depot/aic94xx/aic94xx.h#31 $ + */ + +#ifndef _AIC94XX_H_ +#define _AIC94XX_H_ + +#include +#include +#include + +#define ASD_DRIVER_NAME "aic94xx" +#define ASD_DRIVER_DESCRIPTION "Adaptec aic94xx SAS/SATA driver" + +#define asd_printk(fmt, ...) printk(KERN_NOTICE ASD_DRIVER_NAME ": " fmt, ## __VA_ARGS__) + +#ifdef ASD_ENTER_EXIT +#define ENTER printk(KERN_NOTICE "%s: ENTER %s\n", ASD_DRIVER_NAME, \ + __FUNCTION__) +#define EXIT printk(KERN_NOTICE "%s: --EXIT %s\n", ASD_DRIVER_NAME, \ + __FUNCTION__) +#else +#define ENTER +#define EXIT +#endif + +#ifdef ASD_DEBUG +#define ASD_DPRINTK asd_printk +#else +#define ASD_DPRINTK(fmt, ...) +#endif + +/* 2*ITNL timeout + 1 second */ +#define AIC94XX_SCB_TIMEOUT (5*HZ) + +extern kmem_cache_t *asd_dma_token_cache; +extern kmem_cache_t *asd_ascb_cache; +extern char sas_addr_str[2*SAS_ADDR_SIZE + 1]; + +static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr) +{ + int i; + for (i = 0; i < SAS_ADDR_SIZE; i++, p += 2) + snprintf(p, 3, "%02X", sas_addr[i]); + *p = '\0'; +} + +static inline void asd_destringify_sas_addr(u8 *sas_addr, const char *p) +{ + int i; + for (i = 0; i < SAS_ADDR_SIZE; i++) { + u8 h, l; + if (!*p) + break; + h = isdigit(*p) ? *p-'0' : *p-'A'+10; + p++; + l = isdigit(*p) ? *p-'0' : *p-'A'+10; + p++; + sas_addr[i] = (h<<4) | l; + } +} + +struct asd_ha_struct; +struct asd_ascb; + +int asd_read_ocm(struct asd_ha_struct *asd_ha); +int asd_read_flash(struct asd_ha_struct *asd_ha); + +int asd_dev_found(struct domain_device *dev); +void asd_dev_gone(struct domain_device *dev); + +void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id); + +int asd_execute_task(struct sas_task *, int num, unsigned long gfp_flags); + +/* ---------- TMFs ---------- */ +int asd_abort_task(struct sas_task *); +int asd_abort_task_set(struct domain_device *, u8 *lun); +int asd_clear_aca(struct domain_device *, u8 *lun); +int asd_clear_task_set(struct domain_device *, u8 *lun); +int asd_lu_reset(struct domain_device *, u8 *lun); +int asd_query_task(struct sas_task *); + +/* ---------- Adapter and Port management ---------- */ +int asd_clear_nexus_port(struct asd_sas_port *port); +int asd_clear_nexus_ha(struct sas_ha_struct *sas_ha); + +/* ---------- Phy Management ---------- */ +int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func); + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_dev.c b/drivers/scsi/aic94xx/aic94xx_dev.c new file mode 100644 index 000000000000..6f8901b748f7 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_dev.c @@ -0,0 +1,353 @@ +/* + * Aic94xx SAS/SATA DDB management + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id: //depot/aic94xx/aic94xx_dev.c#21 $ + */ + +#include "aic94xx.h" +#include "aic94xx_hwi.h" +#include "aic94xx_reg.h" +#include "aic94xx_sas.h" + +#define FIND_FREE_DDB(_ha) find_first_zero_bit((_ha)->hw_prof.ddb_bitmap, \ + (_ha)->hw_prof.max_ddbs) +#define SET_DDB(_ddb, _ha) set_bit(_ddb, (_ha)->hw_prof.ddb_bitmap) +#define CLEAR_DDB(_ddb, _ha) clear_bit(_ddb, (_ha)->hw_prof.ddb_bitmap) + +static inline int asd_get_ddb(struct asd_ha_struct *asd_ha) +{ + unsigned long flags; + int ddb, i; + + spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags); + ddb = FIND_FREE_DDB(asd_ha); + if (ddb >= asd_ha->hw_prof.max_ddbs) { + ddb = -ENOMEM; + spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags); + goto out; + } + SET_DDB(ddb, asd_ha); + spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags); + + for (i = 0; i < sizeof(struct asd_ddb_ssp_smp_target_port); i+= 4) + asd_ddbsite_write_dword(asd_ha, ddb, i, 0); +out: + return ddb; +} + +#define INIT_CONN_TAG offsetof(struct asd_ddb_ssp_smp_target_port, init_conn_tag) +#define DEST_SAS_ADDR offsetof(struct asd_ddb_ssp_smp_target_port, dest_sas_addr) +#define SEND_QUEUE_HEAD offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_head) +#define DDB_TYPE offsetof(struct asd_ddb_ssp_smp_target_port, ddb_type) +#define CONN_MASK offsetof(struct asd_ddb_ssp_smp_target_port, conn_mask) +#define DDB_TARG_FLAGS offsetof(struct asd_ddb_ssp_smp_target_port, flags) +#define DDB_TARG_FLAGS2 offsetof(struct asd_ddb_stp_sata_target_port, flags2) +#define EXEC_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, exec_queue_tail) +#define SEND_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_tail) +#define SISTER_DDB offsetof(struct asd_ddb_ssp_smp_target_port, sister_ddb) +#define MAX_CCONN offsetof(struct asd_ddb_ssp_smp_target_port, max_concurrent_conn) +#define NUM_CTX offsetof(struct asd_ddb_ssp_smp_target_port, num_contexts) +#define ATA_CMD_SCBPTR offsetof(struct asd_ddb_stp_sata_target_port, ata_cmd_scbptr) +#define SATA_TAG_ALLOC_MASK offsetof(struct asd_ddb_stp_sata_target_port, sata_tag_alloc_mask) +#define NUM_SATA_TAGS offsetof(struct asd_ddb_stp_sata_target_port, num_sata_tags) +#define SATA_STATUS offsetof(struct asd_ddb_stp_sata_target_port, sata_status) +#define NCQ_DATA_SCB_PTR offsetof(struct asd_ddb_stp_sata_target_port, ncq_data_scb_ptr) +#define ITNL_TIMEOUT offsetof(struct asd_ddb_ssp_smp_target_port, itnl_timeout) + +static inline void asd_free_ddb(struct asd_ha_struct *asd_ha, int ddb) +{ + unsigned long flags; + + if (!ddb || ddb >= 0xFFFF) + return; + asd_ddbsite_write_byte(asd_ha, ddb, DDB_TYPE, DDB_TYPE_UNUSED); + spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags); + CLEAR_DDB(ddb, asd_ha); + spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags); +} + +static inline void asd_set_ddb_type(struct domain_device *dev) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + int ddb = (int) (unsigned long) dev->lldd_dev; + + if (dev->dev_type == SATA_PM_PORT) + asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_PM_PORT); + else if (dev->tproto) + asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_TARGET); + else + asd_ddbsite_write_byte(asd_ha,ddb,DDB_TYPE,DDB_TYPE_INITIATOR); +} + +static int asd_init_sata_tag_ddb(struct domain_device *dev) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + int ddb, i; + + ddb = asd_get_ddb(asd_ha); + if (ddb < 0) + return ddb; + + for (i = 0; i < sizeof(struct asd_ddb_sata_tag); i += 2) + asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF); + + asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev, + SISTER_DDB, ddb); + return 0; +} + +static inline int asd_init_sata(struct domain_device *dev) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + int ddb = (int) (unsigned long) dev->lldd_dev; + u32 qdepth = 0; + int res = 0; + + asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF); + if ((dev->dev_type == SATA_DEV || dev->dev_type == SATA_PM_PORT) && + dev->sata_dev.identify_device && + dev->sata_dev.identify_device[10] != 0) { + u16 w75 = le16_to_cpu(dev->sata_dev.identify_device[75]); + u16 w76 = le16_to_cpu(dev->sata_dev.identify_device[76]); + + if (w76 & 0x100) /* NCQ? */ + qdepth = (w75 & 0x1F) + 1; + asd_ddbsite_write_dword(asd_ha, ddb, SATA_TAG_ALLOC_MASK, + (1<dev_type == SATA_DEV || dev->dev_type == SATA_PM || + dev->dev_type == SATA_PM_PORT) { + struct dev_to_host_fis *fis = (struct dev_to_host_fis *) + dev->frame_rcvd; + asd_ddbsite_write_byte(asd_ha, ddb, SATA_STATUS, fis->status); + } + asd_ddbsite_write_word(asd_ha, ddb, NCQ_DATA_SCB_PTR, 0xFFFF); + if (qdepth > 0) + res = asd_init_sata_tag_ddb(dev); + return res; +} + +static int asd_init_target_ddb(struct domain_device *dev) +{ + int ddb, i; + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + u8 flags = 0; + + ddb = asd_get_ddb(asd_ha); + if (ddb < 0) + return ddb; + + dev->lldd_dev = (void *) (unsigned long) ddb; + + asd_ddbsite_write_byte(asd_ha, ddb, 0, DDB_TP_CONN_TYPE); + asd_ddbsite_write_byte(asd_ha, ddb, 1, 0); + asd_ddbsite_write_word(asd_ha, ddb, INIT_CONN_TAG, 0xFFFF); + for (i = 0; i < SAS_ADDR_SIZE; i++) + asd_ddbsite_write_byte(asd_ha, ddb, DEST_SAS_ADDR+i, + dev->sas_addr[i]); + asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_HEAD, 0xFFFF); + asd_set_ddb_type(dev); + asd_ddbsite_write_byte(asd_ha, ddb, CONN_MASK, dev->port->phy_mask); + if (dev->port->oob_mode != SATA_OOB_MODE) { + flags |= OPEN_REQUIRED; + if ((dev->dev_type == SATA_DEV) || + (dev->tproto & SAS_PROTO_STP)) { + struct smp_resp *rps_resp = &dev->sata_dev.rps_resp; + if (rps_resp->frame_type == SMP_RESPONSE && + rps_resp->function == SMP_REPORT_PHY_SATA && + rps_resp->result == SMP_RESP_FUNC_ACC) { + if (rps_resp->rps.affil_valid) + flags |= STP_AFFIL_POL; + if (rps_resp->rps.affil_supp) + flags |= SUPPORTS_AFFIL; + } + } else { + flags |= CONCURRENT_CONN_SUPP; + if (!dev->parent && + (dev->dev_type == EDGE_DEV || + dev->dev_type == FANOUT_DEV)) + asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN, + 4); + else + asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN, + dev->pathways); + asd_ddbsite_write_byte(asd_ha, ddb, NUM_CTX, 1); + } + } + if (dev->dev_type == SATA_PM) + flags |= SATA_MULTIPORT; + asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS, flags); + + flags = 0; + if (dev->tproto & SAS_PROTO_STP) + flags |= STP_CL_POL_NO_TX; + asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS2, flags); + + asd_ddbsite_write_word(asd_ha, ddb, EXEC_QUEUE_TAIL, 0xFFFF); + asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_TAIL, 0xFFFF); + asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF); + + if (dev->dev_type == SATA_DEV || (dev->tproto & SAS_PROTO_STP)) { + i = asd_init_sata(dev); + if (i < 0) { + asd_free_ddb(asd_ha, ddb); + return i; + } + } + + if (dev->dev_type == SAS_END_DEV) { + struct sas_end_device *rdev = rphy_to_end_device(dev->rphy); + if (rdev->I_T_nexus_loss_timeout > 0) + asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT, + min(rdev->I_T_nexus_loss_timeout, + (u16)ITNL_TIMEOUT_CONST)); + else + asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT, + (u16)ITNL_TIMEOUT_CONST); + } + return 0; +} + +static int asd_init_sata_pm_table_ddb(struct domain_device *dev) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + int ddb, i; + + ddb = asd_get_ddb(asd_ha); + if (ddb < 0) + return ddb; + + for (i = 0; i < 32; i += 2) + asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF); + + asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev, + SISTER_DDB, ddb); + + return 0; +} + +#define PM_PORT_FLAGS offsetof(struct asd_ddb_sata_pm_port, pm_port_flags) +#define PARENT_DDB offsetof(struct asd_ddb_sata_pm_port, parent_ddb) + +/** + * asd_init_sata_pm_port_ddb -- SATA Port Multiplier Port + * dev: pointer to domain device + * + * For SATA Port Multiplier Ports we need to allocate one SATA Port + * Multiplier Port DDB and depending on whether the target on it + * supports SATA II NCQ, one SATA Tag DDB. + */ +static int asd_init_sata_pm_port_ddb(struct domain_device *dev) +{ + int ddb, i, parent_ddb, pmtable_ddb; + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + u8 flags; + + ddb = asd_get_ddb(asd_ha); + if (ddb < 0) + return ddb; + + asd_set_ddb_type(dev); + flags = (dev->sata_dev.port_no << 4) | PM_PORT_SET; + asd_ddbsite_write_byte(asd_ha, ddb, PM_PORT_FLAGS, flags); + asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF); + asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF); + asd_init_sata(dev); + + parent_ddb = (int) (unsigned long) dev->parent->lldd_dev; + asd_ddbsite_write_word(asd_ha, ddb, PARENT_DDB, parent_ddb); + pmtable_ddb = asd_ddbsite_read_word(asd_ha, parent_ddb, SISTER_DDB); + asd_ddbsite_write_word(asd_ha, pmtable_ddb, dev->sata_dev.port_no,ddb); + + if (asd_ddbsite_read_byte(asd_ha, ddb, NUM_SATA_TAGS) > 0) { + i = asd_init_sata_tag_ddb(dev); + if (i < 0) { + asd_free_ddb(asd_ha, ddb); + return i; + } + } + return 0; +} + +static int asd_init_initiator_ddb(struct domain_device *dev) +{ + return -ENODEV; +} + +/** + * asd_init_sata_pm_ddb -- SATA Port Multiplier + * dev: pointer to domain device + * + * For STP and direct-attached SATA Port Multipliers we need + * one target port DDB entry and one SATA PM table DDB entry. + */ +static int asd_init_sata_pm_ddb(struct domain_device *dev) +{ + int res = 0; + + res = asd_init_target_ddb(dev); + if (res) + goto out; + res = asd_init_sata_pm_table_ddb(dev); + if (res) + asd_free_ddb(dev->port->ha->lldd_ha, + (int) (unsigned long) dev->lldd_dev); +out: + return res; +} + +int asd_dev_found(struct domain_device *dev) +{ + int res = 0; + + switch (dev->dev_type) { + case SATA_PM: + res = asd_init_sata_pm_ddb(dev); + break; + case SATA_PM_PORT: + res = asd_init_sata_pm_port_ddb(dev); + break; + default: + if (dev->tproto) + res = asd_init_target_ddb(dev); + else + res = asd_init_initiator_ddb(dev); + } + return res; +} + +void asd_dev_gone(struct domain_device *dev) +{ + int ddb, sister_ddb; + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + + ddb = (int) (unsigned long) dev->lldd_dev; + sister_ddb = asd_ddbsite_read_word(asd_ha, ddb, SISTER_DDB); + + if (sister_ddb != 0xFFFF) + asd_free_ddb(asd_ha, sister_ddb); + asd_free_ddb(asd_ha, ddb); + dev->lldd_dev = NULL; +} diff --git a/drivers/scsi/aic94xx/aic94xx_dump.c b/drivers/scsi/aic94xx/aic94xx_dump.c new file mode 100644 index 000000000000..e6ade5996d95 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_dump.c @@ -0,0 +1,959 @@ +/* + * Aic94xx SAS/SATA driver dump interface. + * + * Copyright (C) 2004 Adaptec, Inc. All rights reserved. + * Copyright (C) 2004 David Chaw + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * 2005/07/14/LT Complete overhaul of this file. Update pages, register + * locations, names, etc. Make use of macros. Print more information. + * Print all cseq and lseq mip and mdp. + * + */ + +#include "linux/pci.h" +#include "aic94xx.h" +#include "aic94xx_reg.h" +#include "aic94xx_reg_def.h" +#include "aic94xx_sas.h" + +#include "aic94xx_dump.h" + +#ifdef ASD_DEBUG + +#define MD(x) (1 << (x)) +#define MODE_COMMON (1 << 31) +#define MODE_0_7 (0xFF) + +static const struct lseq_cio_regs { + char *name; + u32 offs; + u8 width; + u32 mode; +} LSEQmCIOREGS[] = { + {"LmMnSCBPTR", 0x20, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) }, + {"LmMnDDBPTR", 0x22, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) }, + {"LmREQMBX", 0x30, 32, MODE_COMMON }, + {"LmRSPMBX", 0x34, 32, MODE_COMMON }, + {"LmMnINT", 0x38, 32, MODE_0_7 }, + {"LmMnINTEN", 0x3C, 32, MODE_0_7 }, + {"LmXMTPRIMD", 0x40, 32, MODE_COMMON }, + {"LmXMTPRIMCS", 0x44, 8, MODE_COMMON }, + {"LmCONSTAT", 0x45, 8, MODE_COMMON }, + {"LmMnDMAERRS", 0x46, 8, MD(0)|MD(1) }, + {"LmMnSGDMAERRS", 0x47, 8, MD(0)|MD(1) }, + {"LmMnEXPHDRP", 0x48, 8, MD(0) }, + {"LmMnSASAALIGN", 0x48, 8, MD(1) }, + {"LmMnMSKHDRP", 0x49, 8, MD(0) }, + {"LmMnSTPALIGN", 0x49, 8, MD(1) }, + {"LmMnRCVHDRP", 0x4A, 8, MD(0) }, + {"LmMnXMTHDRP", 0x4A, 8, MD(1) }, + {"LmALIGNMODE", 0x4B, 8, MD(1) }, + {"LmMnEXPRCVCNT", 0x4C, 32, MD(0) }, + {"LmMnXMTCNT", 0x4C, 32, MD(1) }, + {"LmMnCURRTAG", 0x54, 16, MD(0) }, + {"LmMnPREVTAG", 0x56, 16, MD(0) }, + {"LmMnACKOFS", 0x58, 8, MD(1) }, + {"LmMnXFRLVL", 0x59, 8, MD(0)|MD(1) }, + {"LmMnSGDMACTL", 0x5A, 8, MD(0)|MD(1) }, + {"LmMnSGDMASTAT", 0x5B, 8, MD(0)|MD(1) }, + {"LmMnDDMACTL", 0x5C, 8, MD(0)|MD(1) }, + {"LmMnDDMASTAT", 0x5D, 8, MD(0)|MD(1) }, + {"LmMnDDMAMODE", 0x5E, 16, MD(0)|MD(1) }, + {"LmMnPIPECTL", 0x61, 8, MD(0)|MD(1) }, + {"LmMnACTSCB", 0x62, 16, MD(0)|MD(1) }, + {"LmMnSGBHADR", 0x64, 8, MD(0)|MD(1) }, + {"LmMnSGBADR", 0x65, 8, MD(0)|MD(1) }, + {"LmMnSGDCNT", 0x66, 8, MD(0)|MD(1) }, + {"LmMnSGDMADR", 0x68, 32, MD(0)|MD(1) }, + {"LmMnSGDMADR", 0x6C, 32, MD(0)|MD(1) }, + {"LmMnXFRCNT", 0x70, 32, MD(0)|MD(1) }, + {"LmMnXMTCRC", 0x74, 32, MD(1) }, + {"LmCURRTAG", 0x74, 16, MD(0) }, + {"LmPREVTAG", 0x76, 16, MD(0) }, + {"LmMnDPSEL", 0x7B, 8, MD(0)|MD(1) }, + {"LmDPTHSTAT", 0x7C, 8, MODE_COMMON }, + {"LmMnHOLDLVL", 0x7D, 8, MD(0) }, + {"LmMnSATAFS", 0x7E, 8, MD(1) }, + {"LmMnCMPLTSTAT", 0x7F, 8, MD(0)|MD(1) }, + {"LmPRMSTAT0", 0x80, 32, MODE_COMMON }, + {"LmPRMSTAT1", 0x84, 32, MODE_COMMON }, + {"LmGPRMINT", 0x88, 8, MODE_COMMON }, + {"LmMnCURRSCB", 0x8A, 16, MD(0) }, + {"LmPRMICODE", 0x8C, 32, MODE_COMMON }, + {"LmMnRCVCNT", 0x90, 16, MD(0) }, + {"LmMnBUFSTAT", 0x92, 16, MD(0) }, + {"LmMnXMTHDRSIZE",0x92, 8, MD(1) }, + {"LmMnXMTSIZE", 0x93, 8, MD(1) }, + {"LmMnTGTXFRCNT", 0x94, 32, MD(0) }, + {"LmMnEXPROFS", 0x98, 32, MD(0) }, + {"LmMnXMTROFS", 0x98, 32, MD(1) }, + {"LmMnRCVROFS", 0x9C, 32, MD(0) }, + {"LmCONCTL", 0xA0, 16, MODE_COMMON }, + {"LmBITLTIMER", 0xA2, 16, MODE_COMMON }, + {"LmWWNLOW", 0xA8, 32, MODE_COMMON }, + {"LmWWNHIGH", 0xAC, 32, MODE_COMMON }, + {"LmMnFRMERR", 0xB0, 32, MD(0) }, + {"LmMnFRMERREN", 0xB4, 32, MD(0) }, + {"LmAWTIMER", 0xB8, 16, MODE_COMMON }, + {"LmAWTCTL", 0xBA, 8, MODE_COMMON }, + {"LmMnHDRCMPS", 0xC0, 32, MD(0) }, + {"LmMnXMTSTAT", 0xC4, 8, MD(1) }, + {"LmHWTSTATEN", 0xC5, 8, MODE_COMMON }, + {"LmMnRRDYRC", 0xC6, 8, MD(0) }, + {"LmMnRRDYTC", 0xC6, 8, MD(1) }, + {"LmHWTSTAT", 0xC7, 8, MODE_COMMON }, + {"LmMnDATABUFADR",0xC8, 16, MD(0)|MD(1) }, + {"LmDWSSTATUS", 0xCB, 8, MODE_COMMON }, + {"LmMnACTSTAT", 0xCE, 16, MD(0)|MD(1) }, + {"LmMnREQSCB", 0xD2, 16, MD(0)|MD(1) }, + {"LmXXXPRIM", 0xD4, 32, MODE_COMMON }, + {"LmRCVASTAT", 0xD9, 8, MODE_COMMON }, + {"LmINTDIS1", 0xDA, 8, MODE_COMMON }, + {"LmPSTORESEL", 0xDB, 8, MODE_COMMON }, + {"LmPSTORE", 0xDC, 32, MODE_COMMON }, + {"LmPRIMSTAT0EN", 0xE0, 32, MODE_COMMON }, + {"LmPRIMSTAT1EN", 0xE4, 32, MODE_COMMON }, + {"LmDONETCTL", 0xF2, 16, MODE_COMMON }, + {NULL, 0, 0, 0 } +}; +/* +static struct lseq_cio_regs LSEQmOOBREGS[] = { + {"OOB_BFLTR" ,0x100, 8, MD(5)}, + {"OOB_INIT_MIN" ,0x102,16, MD(5)}, + {"OOB_INIT_MAX" ,0x104,16, MD(5)}, + {"OOB_INIT_NEG" ,0x106,16, MD(5)}, + {"OOB_SAS_MIN" ,0x108,16, MD(5)}, + {"OOB_SAS_MAX" ,0x10A,16, MD(5)}, + {"OOB_SAS_NEG" ,0x10C,16, MD(5)}, + {"OOB_WAKE_MIN" ,0x10E,16, MD(5)}, + {"OOB_WAKE_MAX" ,0x110,16, MD(5)}, + {"OOB_WAKE_NEG" ,0x112,16, MD(5)}, + {"OOB_IDLE_MAX" ,0x114,16, MD(5)}, + {"OOB_BURST_MAX" ,0x116,16, MD(5)}, + {"OOB_XMIT_BURST" ,0x118, 8, MD(5)}, + {"OOB_SEND_PAIRS" ,0x119, 8, MD(5)}, + {"OOB_INIT_IDLE" ,0x11A, 8, MD(5)}, + {"OOB_INIT_NEGO" ,0x11C, 8, MD(5)}, + {"OOB_SAS_IDLE" ,0x11E, 8, MD(5)}, + {"OOB_SAS_NEGO" ,0x120, 8, MD(5)}, + {"OOB_WAKE_IDLE" ,0x122, 8, MD(5)}, + {"OOB_WAKE_NEGO" ,0x124, 8, MD(5)}, + {"OOB_DATA_KBITS" ,0x126, 8, MD(5)}, + {"OOB_BURST_DATA" ,0x128,32, MD(5)}, + {"OOB_ALIGN_0_DATA" ,0x12C,32, MD(5)}, + {"OOB_ALIGN_1_DATA" ,0x130,32, MD(5)}, + {"OOB_SYNC_DATA" ,0x134,32, MD(5)}, + {"OOB_D10_2_DATA" ,0x138,32, MD(5)}, + {"OOB_PHY_RST_CNT" ,0x13C,32, MD(5)}, + {"OOB_SIG_GEN" ,0x140, 8, MD(5)}, + {"OOB_XMIT" ,0x141, 8, MD(5)}, + {"FUNCTION_MAKS" ,0x142, 8, MD(5)}, + {"OOB_MODE" ,0x143, 8, MD(5)}, + {"CURRENT_STATUS" ,0x144, 8, MD(5)}, + {"SPEED_MASK" ,0x145, 8, MD(5)}, + {"PRIM_COUNT" ,0x146, 8, MD(5)}, + {"OOB_SIGNALS" ,0x148, 8, MD(5)}, + {"OOB_DATA_DET" ,0x149, 8, MD(5)}, + {"OOB_TIME_OUT" ,0x14C, 8, MD(5)}, + {"OOB_TIMER_ENABLE" ,0x14D, 8, MD(5)}, + {"OOB_STATUS" ,0x14E, 8, MD(5)}, + {"HOT_PLUG_DELAY" ,0x150, 8, MD(5)}, + {"RCD_DELAY" ,0x151, 8, MD(5)}, + {"COMSAS_TIMER" ,0x152, 8, MD(5)}, + {"SNTT_DELAY" ,0x153, 8, MD(5)}, + {"SPD_CHNG_DELAY" ,0x154, 8, MD(5)}, + {"SNLT_DELAY" ,0x155, 8, MD(5)}, + {"SNWT_DELAY" ,0x156, 8, MD(5)}, + {"ALIGN_DELAY" ,0x157, 8, MD(5)}, + {"INT_ENABLE_0" ,0x158, 8, MD(5)}, + {"INT_ENABLE_1" ,0x159, 8, MD(5)}, + {"INT_ENABLE_2" ,0x15A, 8, MD(5)}, + {"INT_ENABLE_3" ,0x15B, 8, MD(5)}, + {"OOB_TEST_REG" ,0x15C, 8, MD(5)}, + {"PHY_CONTROL_0" ,0x160, 8, MD(5)}, + {"PHY_CONTROL_1" ,0x161, 8, MD(5)}, + {"PHY_CONTROL_2" ,0x162, 8, MD(5)}, + {"PHY_CONTROL_3" ,0x163, 8, MD(5)}, + {"PHY_OOB_CAL_TX" ,0x164, 8, MD(5)}, + {"PHY_OOB_CAL_RX" ,0x165, 8, MD(5)}, + {"OOB_PHY_CAL_TX" ,0x166, 8, MD(5)}, + {"OOB_PHY_CAL_RX" ,0x167, 8, MD(5)}, + {"PHY_CONTROL_4" ,0x168, 8, MD(5)}, + {"PHY_TEST" ,0x169, 8, MD(5)}, + {"PHY_PWR_CTL" ,0x16A, 8, MD(5)}, + {"PHY_PWR_DELAY" ,0x16B, 8, MD(5)}, + {"OOB_SM_CON" ,0x16C, 8, MD(5)}, + {"ADDR_TRAP_1" ,0x16D, 8, MD(5)}, + {"ADDR_NEXT_1" ,0x16E, 8, MD(5)}, + {"NEXT_ST_1" ,0x16F, 8, MD(5)}, + {"OOB_SM_STATE" ,0x170, 8, MD(5)}, + {"ADDR_TRAP_2" ,0x171, 8, MD(5)}, + {"ADDR_NEXT_2" ,0x172, 8, MD(5)}, + {"NEXT_ST_2" ,0x173, 8, MD(5)}, + {NULL, 0, 0, 0 } +}; +*/ +#define STR_8BIT " %30s[0x%04x]:0x%02x\n" +#define STR_16BIT " %30s[0x%04x]:0x%04x\n" +#define STR_32BIT " %30s[0x%04x]:0x%08x\n" +#define STR_64BIT " %30s[0x%04x]:0x%llx\n" + +#define PRINT_REG_8bit(_ha, _n, _r) asd_printk(STR_8BIT, #_n, _n, \ + asd_read_reg_byte(_ha, _r)) +#define PRINT_REG_16bit(_ha, _n, _r) asd_printk(STR_16BIT, #_n, _n, \ + asd_read_reg_word(_ha, _r)) +#define PRINT_REG_32bit(_ha, _n, _r) asd_printk(STR_32BIT, #_n, _n, \ + asd_read_reg_dword(_ha, _r)) + +#define PRINT_CREG_8bit(_ha, _n) asd_printk(STR_8BIT, #_n, _n, \ + asd_read_reg_byte(_ha, C##_n)) +#define PRINT_CREG_16bit(_ha, _n) asd_printk(STR_16BIT, #_n, _n, \ + asd_read_reg_word(_ha, C##_n)) +#define PRINT_CREG_32bit(_ha, _n) asd_printk(STR_32BIT, #_n, _n, \ + asd_read_reg_dword(_ha, C##_n)) + +#define MSTR_8BIT " Mode:%02d %30s[0x%04x]:0x%02x\n" +#define MSTR_16BIT " Mode:%02d %30s[0x%04x]:0x%04x\n" +#define MSTR_32BIT " Mode:%02d %30s[0x%04x]:0x%08x\n" + +#define PRINT_MREG_8bit(_ha, _m, _n, _r) asd_printk(MSTR_8BIT, _m, #_n, _n, \ + asd_read_reg_byte(_ha, _r)) +#define PRINT_MREG_16bit(_ha, _m, _n, _r) asd_printk(MSTR_16BIT, _m, #_n, _n, \ + asd_read_reg_word(_ha, _r)) +#define PRINT_MREG_32bit(_ha, _m, _n, _r) asd_printk(MSTR_32BIT, _m, #_n, _n, \ + asd_read_reg_dword(_ha, _r)) + +/* can also be used for MD when the register is mode aware already */ +#define PRINT_MIS_byte(_ha, _n) asd_printk(STR_8BIT, #_n,CSEQ_##_n-CMAPPEDSCR,\ + asd_read_reg_byte(_ha, CSEQ_##_n)) +#define PRINT_MIS_word(_ha, _n) asd_printk(STR_16BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\ + asd_read_reg_word(_ha, CSEQ_##_n)) +#define PRINT_MIS_dword(_ha, _n) \ + asd_printk(STR_32BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\ + asd_read_reg_dword(_ha, CSEQ_##_n)) +#define PRINT_MIS_qword(_ha, _n) \ + asd_printk(STR_64BIT, #_n,CSEQ_##_n-CMAPPEDSCR, \ + (unsigned long long)(((u64)asd_read_reg_dword(_ha, CSEQ_##_n)) \ + | (((u64)asd_read_reg_dword(_ha, (CSEQ_##_n)+4))<<32))) + +#define CMDP_REG(_n, _m) (_m*(CSEQ_PAGE_SIZE*2)+CSEQ_##_n) +#define PRINT_CMDP_word(_ha, _n) \ +asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \ + #_n, \ + asd_read_reg_word(_ha, CMDP_REG(_n, 0)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 1)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 2)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 3)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 4)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 5)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 6)), \ + asd_read_reg_word(_ha, CMDP_REG(_n, 7))) + +#define PRINT_CMDP_byte(_ha, _n) \ +asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \ + #_n, \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 0)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 1)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 2)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 3)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 4)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 5)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 6)), \ + asd_read_reg_byte(_ha, CMDP_REG(_n, 7))) + +static void asd_dump_cseq_state(struct asd_ha_struct *asd_ha) +{ + int mode; + + asd_printk("CSEQ STATE\n"); + + asd_printk("ARP2 REGISTERS\n"); + + PRINT_CREG_32bit(asd_ha, ARP2CTL); + PRINT_CREG_32bit(asd_ha, ARP2INT); + PRINT_CREG_32bit(asd_ha, ARP2INTEN); + PRINT_CREG_8bit(asd_ha, MODEPTR); + PRINT_CREG_8bit(asd_ha, ALTMODE); + PRINT_CREG_8bit(asd_ha, FLAG); + PRINT_CREG_8bit(asd_ha, ARP2INTCTL); + PRINT_CREG_16bit(asd_ha, STACK); + PRINT_CREG_16bit(asd_ha, PRGMCNT); + PRINT_CREG_16bit(asd_ha, ACCUM); + PRINT_CREG_16bit(asd_ha, SINDEX); + PRINT_CREG_16bit(asd_ha, DINDEX); + PRINT_CREG_8bit(asd_ha, SINDIR); + PRINT_CREG_8bit(asd_ha, DINDIR); + PRINT_CREG_8bit(asd_ha, JUMLDIR); + PRINT_CREG_8bit(asd_ha, ARP2HALTCODE); + PRINT_CREG_16bit(asd_ha, CURRADDR); + PRINT_CREG_16bit(asd_ha, LASTADDR); + PRINT_CREG_16bit(asd_ha, NXTLADDR); + + asd_printk("IOP REGISTERS\n"); + + PRINT_REG_32bit(asd_ha, BISTCTL1, CBISTCTL); + PRINT_CREG_32bit(asd_ha, MAPPEDSCR); + + asd_printk("CIO REGISTERS\n"); + + for (mode = 0; mode < 9; mode++) + PRINT_MREG_16bit(asd_ha, mode, MnSCBPTR, CMnSCBPTR(mode)); + PRINT_MREG_16bit(asd_ha, 15, MnSCBPTR, CMnSCBPTR(15)); + + for (mode = 0; mode < 9; mode++) + PRINT_MREG_16bit(asd_ha, mode, MnDDBPTR, CMnDDBPTR(mode)); + PRINT_MREG_16bit(asd_ha, 15, MnDDBPTR, CMnDDBPTR(15)); + + for (mode = 0; mode < 8; mode++) + PRINT_MREG_32bit(asd_ha, mode, MnREQMBX, CMnREQMBX(mode)); + for (mode = 0; mode < 8; mode++) + PRINT_MREG_32bit(asd_ha, mode, MnRSPMBX, CMnRSPMBX(mode)); + for (mode = 0; mode < 8; mode++) + PRINT_MREG_32bit(asd_ha, mode, MnINT, CMnINT(mode)); + for (mode = 0; mode < 8; mode++) + PRINT_MREG_32bit(asd_ha, mode, MnINTEN, CMnINTEN(mode)); + + PRINT_CREG_8bit(asd_ha, SCRATCHPAGE); + for (mode = 0; mode < 8; mode++) + PRINT_MREG_8bit(asd_ha, mode, MnSCRATCHPAGE, + CMnSCRATCHPAGE(mode)); + + PRINT_REG_32bit(asd_ha, CLINKCON, CLINKCON); + PRINT_REG_8bit(asd_ha, CCONMSK, CCONMSK); + PRINT_REG_8bit(asd_ha, CCONEXIST, CCONEXIST); + PRINT_REG_16bit(asd_ha, CCONMODE, CCONMODE); + PRINT_REG_32bit(asd_ha, CTIMERCALC, CTIMERCALC); + PRINT_REG_8bit(asd_ha, CINTDIS, CINTDIS); + + asd_printk("SCRATCH MEMORY\n"); + + asd_printk("MIP 4 >>>>>\n"); + PRINT_MIS_word(asd_ha, Q_EXE_HEAD); + PRINT_MIS_word(asd_ha, Q_EXE_TAIL); + PRINT_MIS_word(asd_ha, Q_DONE_HEAD); + PRINT_MIS_word(asd_ha, Q_DONE_TAIL); + PRINT_MIS_word(asd_ha, Q_SEND_HEAD); + PRINT_MIS_word(asd_ha, Q_SEND_TAIL); + PRINT_MIS_word(asd_ha, Q_DMA2CHIM_HEAD); + PRINT_MIS_word(asd_ha, Q_DMA2CHIM_TAIL); + PRINT_MIS_word(asd_ha, Q_COPY_HEAD); + PRINT_MIS_word(asd_ha, Q_COPY_TAIL); + PRINT_MIS_word(asd_ha, REG0); + PRINT_MIS_word(asd_ha, REG1); + PRINT_MIS_dword(asd_ha, REG2); + PRINT_MIS_byte(asd_ha, LINK_CTL_Q_MAP); + PRINT_MIS_byte(asd_ha, MAX_CSEQ_MODE); + PRINT_MIS_byte(asd_ha, FREE_LIST_HACK_COUNT); + + asd_printk("MIP 5 >>>>\n"); + PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_QUEUE); + PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_COUNT); + PRINT_MIS_word(asd_ha, Q_EST_NEXUS_HEAD); + PRINT_MIS_word(asd_ha, Q_EST_NEXUS_TAIL); + PRINT_MIS_word(asd_ha, NEED_EST_NEXUS_SCB); + PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_HEAD); + PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_TAIL); + PRINT_MIS_byte(asd_ha, EST_NEXUS_SCB_OFFSET); + + asd_printk("MIP 6 >>>>\n"); + PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR0); + PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR1); + PRINT_MIS_word(asd_ha, INT_ROUT_SCBPTR); + PRINT_MIS_byte(asd_ha, INT_ROUT_MODE); + PRINT_MIS_byte(asd_ha, ISR_SCRATCH_FLAGS); + PRINT_MIS_word(asd_ha, ISR_SAVE_SINDEX); + PRINT_MIS_word(asd_ha, ISR_SAVE_DINDEX); + PRINT_MIS_word(asd_ha, Q_MONIRTT_HEAD); + PRINT_MIS_word(asd_ha, Q_MONIRTT_TAIL); + PRINT_MIS_byte(asd_ha, FREE_SCB_MASK); + PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_HEAD); + PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_TAIL); + PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_HEAD); + PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_TAIL); + + asd_printk("MIP 7 >>>>\n"); + PRINT_MIS_qword(asd_ha, EMPTY_REQ_QUEUE); + PRINT_MIS_qword(asd_ha, EMPTY_REQ_COUNT); + PRINT_MIS_word(asd_ha, Q_EMPTY_HEAD); + PRINT_MIS_word(asd_ha, Q_EMPTY_TAIL); + PRINT_MIS_word(asd_ha, NEED_EMPTY_SCB); + PRINT_MIS_byte(asd_ha, EMPTY_REQ_HEAD); + PRINT_MIS_byte(asd_ha, EMPTY_REQ_TAIL); + PRINT_MIS_byte(asd_ha, EMPTY_SCB_OFFSET); + PRINT_MIS_word(asd_ha, PRIMITIVE_DATA); + PRINT_MIS_dword(asd_ha, TIMEOUT_CONST); + + asd_printk("MDP 0 >>>>\n"); + asd_printk("%-20s %6s %6s %6s %6s %6s %6s %6s %6s\n", + "Mode: ", "0", "1", "2", "3", "4", "5", "6", "7"); + PRINT_CMDP_word(asd_ha, LRM_SAVE_SINDEX); + PRINT_CMDP_word(asd_ha, LRM_SAVE_SCBPTR); + PRINT_CMDP_word(asd_ha, Q_LINK_HEAD); + PRINT_CMDP_word(asd_ha, Q_LINK_TAIL); + PRINT_CMDP_byte(asd_ha, LRM_SAVE_SCRPAGE); + + asd_printk("MDP 0 Mode 8 >>>>\n"); + PRINT_MIS_word(asd_ha, RET_ADDR); + PRINT_MIS_word(asd_ha, RET_SCBPTR); + PRINT_MIS_word(asd_ha, SAVE_SCBPTR); + PRINT_MIS_word(asd_ha, EMPTY_TRANS_CTX); + PRINT_MIS_word(asd_ha, RESP_LEN); + PRINT_MIS_word(asd_ha, TMF_SCBPTR); + PRINT_MIS_word(asd_ha, GLOBAL_PREV_SCB); + PRINT_MIS_word(asd_ha, GLOBAL_HEAD); + PRINT_MIS_word(asd_ha, CLEAR_LU_HEAD); + PRINT_MIS_byte(asd_ha, TMF_OPCODE); + PRINT_MIS_byte(asd_ha, SCRATCH_FLAGS); + PRINT_MIS_word(asd_ha, HSB_SITE); + PRINT_MIS_word(asd_ha, FIRST_INV_SCB_SITE); + PRINT_MIS_word(asd_ha, FIRST_INV_DDB_SITE); + + asd_printk("MDP 1 Mode 8 >>>>\n"); + PRINT_MIS_qword(asd_ha, LUN_TO_CLEAR); + PRINT_MIS_qword(asd_ha, LUN_TO_CHECK); + + asd_printk("MDP 2 Mode 8 >>>>\n"); + PRINT_MIS_qword(asd_ha, HQ_NEW_POINTER); + PRINT_MIS_qword(asd_ha, HQ_DONE_BASE); + PRINT_MIS_dword(asd_ha, HQ_DONE_POINTER); + PRINT_MIS_byte(asd_ha, HQ_DONE_PASS); +} + +#define PRINT_LREG_8bit(_h, _lseq, _n) \ + asd_printk(STR_8BIT, #_n, _n, asd_read_reg_byte(_h, Lm##_n(_lseq))) +#define PRINT_LREG_16bit(_h, _lseq, _n) \ + asd_printk(STR_16BIT, #_n, _n, asd_read_reg_word(_h, Lm##_n(_lseq))) +#define PRINT_LREG_32bit(_h, _lseq, _n) \ + asd_printk(STR_32BIT, #_n, _n, asd_read_reg_dword(_h, Lm##_n(_lseq))) + +#define PRINT_LMIP_byte(_h, _lseq, _n) \ + asd_printk(STR_8BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \ + asd_read_reg_byte(_h, LmSEQ_##_n(_lseq))) +#define PRINT_LMIP_word(_h, _lseq, _n) \ + asd_printk(STR_16BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \ + asd_read_reg_word(_h, LmSEQ_##_n(_lseq))) +#define PRINT_LMIP_dword(_h, _lseq, _n) \ + asd_printk(STR_32BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \ + asd_read_reg_dword(_h, LmSEQ_##_n(_lseq))) +#define PRINT_LMIP_qword(_h, _lseq, _n) \ + asd_printk(STR_64BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \ + (unsigned long long)(((unsigned long long) \ + asd_read_reg_dword(_h, LmSEQ_##_n(_lseq))) \ + | (((unsigned long long) \ + asd_read_reg_dword(_h, LmSEQ_##_n(_lseq)+4))<<32))) + +static void asd_print_lseq_cio_reg(struct asd_ha_struct *asd_ha, + u32 lseq_cio_addr, int i) +{ + switch (LSEQmCIOREGS[i].width) { + case 8: + asd_printk("%20s[0x%x]: 0x%02x\n", LSEQmCIOREGS[i].name, + LSEQmCIOREGS[i].offs, + asd_read_reg_byte(asd_ha, lseq_cio_addr + + LSEQmCIOREGS[i].offs)); + + break; + case 16: + asd_printk("%20s[0x%x]: 0x%04x\n", LSEQmCIOREGS[i].name, + LSEQmCIOREGS[i].offs, + asd_read_reg_word(asd_ha, lseq_cio_addr + + LSEQmCIOREGS[i].offs)); + + break; + case 32: + asd_printk("%20s[0x%x]: 0x%08x\n", LSEQmCIOREGS[i].name, + LSEQmCIOREGS[i].offs, + asd_read_reg_dword(asd_ha, lseq_cio_addr + + LSEQmCIOREGS[i].offs)); + break; + } +} + +static void asd_dump_lseq_state(struct asd_ha_struct *asd_ha, int lseq) +{ + u32 moffs; + int mode; + + asd_printk("LSEQ %d STATE\n", lseq); + + asd_printk("LSEQ%d: ARP2 REGISTERS\n", lseq); + PRINT_LREG_32bit(asd_ha, lseq, ARP2CTL); + PRINT_LREG_32bit(asd_ha, lseq, ARP2INT); + PRINT_LREG_32bit(asd_ha, lseq, ARP2INTEN); + PRINT_LREG_8bit(asd_ha, lseq, MODEPTR); + PRINT_LREG_8bit(asd_ha, lseq, ALTMODE); + PRINT_LREG_8bit(asd_ha, lseq, FLAG); + PRINT_LREG_8bit(asd_ha, lseq, ARP2INTCTL); + PRINT_LREG_16bit(asd_ha, lseq, STACK); + PRINT_LREG_16bit(asd_ha, lseq, PRGMCNT); + PRINT_LREG_16bit(asd_ha, lseq, ACCUM); + PRINT_LREG_16bit(asd_ha, lseq, SINDEX); + PRINT_LREG_16bit(asd_ha, lseq, DINDEX); + PRINT_LREG_8bit(asd_ha, lseq, SINDIR); + PRINT_LREG_8bit(asd_ha, lseq, DINDIR); + PRINT_LREG_8bit(asd_ha, lseq, JUMLDIR); + PRINT_LREG_8bit(asd_ha, lseq, ARP2HALTCODE); + PRINT_LREG_16bit(asd_ha, lseq, CURRADDR); + PRINT_LREG_16bit(asd_ha, lseq, LASTADDR); + PRINT_LREG_16bit(asd_ha, lseq, NXTLADDR); + + asd_printk("LSEQ%d: IOP REGISTERS\n", lseq); + + PRINT_LREG_32bit(asd_ha, lseq, MODECTL); + PRINT_LREG_32bit(asd_ha, lseq, DBGMODE); + PRINT_LREG_32bit(asd_ha, lseq, CONTROL); + PRINT_REG_32bit(asd_ha, BISTCTL0, LmBISTCTL0(lseq)); + PRINT_REG_32bit(asd_ha, BISTCTL1, LmBISTCTL1(lseq)); + + asd_printk("LSEQ%d: CIO REGISTERS\n", lseq); + asd_printk("Mode common:\n"); + + for (mode = 0; mode < 8; mode++) { + u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq); + int i; + + for (i = 0; LSEQmCIOREGS[i].name; i++) + if (LSEQmCIOREGS[i].mode == MODE_COMMON) + asd_print_lseq_cio_reg(asd_ha,lseq_cio_addr,i); + } + + asd_printk("Mode unique:\n"); + for (mode = 0; mode < 8; mode++) { + u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq); + int i; + + asd_printk("Mode %d\n", mode); + for (i = 0; LSEQmCIOREGS[i].name; i++) { + if (!(LSEQmCIOREGS[i].mode & (1 << mode))) + continue; + asd_print_lseq_cio_reg(asd_ha, lseq_cio_addr, i); + } + } + + asd_printk("SCRATCH MEMORY\n"); + + asd_printk("LSEQ%d MIP 0 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_HEAD); + PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_TAIL); + PRINT_LMIP_byte(asd_ha, lseq, LINK_NUMBER); + PRINT_LMIP_byte(asd_ha, lseq, SCRATCH_FLAGS); + PRINT_LMIP_qword(asd_ha, lseq, CONNECTION_STATE); + PRINT_LMIP_word(asd_ha, lseq, CONCTL); + PRINT_LMIP_byte(asd_ha, lseq, CONSTAT); + PRINT_LMIP_byte(asd_ha, lseq, CONNECTION_MODES); + PRINT_LMIP_word(asd_ha, lseq, REG1_ISR); + PRINT_LMIP_word(asd_ha, lseq, REG2_ISR); + PRINT_LMIP_word(asd_ha, lseq, REG3_ISR); + PRINT_LMIP_qword(asd_ha, lseq,REG0_ISR); + + asd_printk("LSEQ%d MIP 1 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR0); + PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR1); + PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR2); + PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR3); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE0); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE1); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE2); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE3); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_HEAD); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_TAIL); + PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_BUF_AVAIL); + PRINT_LMIP_dword(asd_ha, lseq, TIMEOUT_CONST); + PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_SINDEX); + PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_DINDEX); + + asd_printk("LSEQ%d MIP 2 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR0); + PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR1); + PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR2); + PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR3); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD0); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD1); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD2); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD3); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_HEAD); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_TAIL); + PRINT_LMIP_byte(asd_ha, lseq, EMPTY_BUFS_AVAIL); + + asd_printk("LSEQ%d MIP 3 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TMR_TOUT_CONST); + PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMEOUT); + PRINT_LMIP_dword(asd_ha, lseq, SRST_ASSERT_TIMEOUT); + PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMEOUT); + PRINT_LMIP_dword(asd_ha, lseq, ONE_MILLISEC_TIMEOUT); + PRINT_LMIP_dword(asd_ha, lseq, TEN_MS_COMINIT_TIMEOUT); + PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMEOUT); + + for (mode = 0; mode < 3; mode++) { + asd_printk("LSEQ%d MDP 0 MODE %d >>>>\n", lseq, mode); + moffs = mode * LSEQ_MODE_SCRATCH_SIZE; + + asd_printk(STR_16BIT, "RET_ADDR", 0, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq) + + moffs)); + asd_printk(STR_16BIT, "REG0_MODE", 2, + asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq) + + moffs)); + asd_printk(STR_16BIT, "MODE_FLAGS", 4, + asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq) + + moffs)); + asd_printk(STR_16BIT, "RET_ADDR2", 0x6, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq) + + moffs)); + asd_printk(STR_16BIT, "RET_ADDR1", 0x8, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq) + + moffs)); + asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB, + asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq) + + moffs)); + asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC, + asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq) + + moffs)); + } + + asd_printk("LSEQ%d MDP 0 MODE 5 >>>>\n", lseq); + moffs = LSEQ_MODE5_PAGE0_OFFSET; + asd_printk(STR_16BIT, "RET_ADDR", 0, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq) + moffs)); + asd_printk(STR_16BIT, "REG0_MODE", 2, + asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq) + moffs)); + asd_printk(STR_16BIT, "MODE_FLAGS", 4, + asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq) + moffs)); + asd_printk(STR_16BIT, "RET_ADDR2", 0x6, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq) + moffs)); + asd_printk(STR_16BIT, "RET_ADDR1", 0x8, + asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq) + moffs)); + asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB, + asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq) + moffs)); + asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC, + asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq) + moffs)); + + asd_printk("LSEQ%d MDP 0 MODE 0 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_DDB_SITE); + PRINT_LMIP_word(asd_ha, lseq, EMPTY_TRANS_CTX); + PRINT_LMIP_word(asd_ha, lseq, RESP_LEN); + PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_SCB_SITE); + PRINT_LMIP_dword(asd_ha, lseq, INTEN_SAVE); + PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_FRM_LEN); + PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_PROTOCOL); + PRINT_LMIP_byte(asd_ha, lseq, RESP_STATUS); + PRINT_LMIP_byte(asd_ha, lseq, LAST_LOADED_SGE); + PRINT_LMIP_byte(asd_ha, lseq, SAVE_SCBPTR); + + asd_printk("LSEQ%d MDP 0 MODE 1 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, Q_XMIT_HEAD); + PRINT_LMIP_word(asd_ha, lseq, M1_EMPTY_TRANS_CTX); + PRINT_LMIP_word(asd_ha, lseq, INI_CONN_TAG); + PRINT_LMIP_byte(asd_ha, lseq, FAILED_OPEN_STATUS); + PRINT_LMIP_byte(asd_ha, lseq, XMIT_REQUEST_TYPE); + PRINT_LMIP_byte(asd_ha, lseq, M1_RESP_STATUS); + PRINT_LMIP_byte(asd_ha, lseq, M1_LAST_LOADED_SGE); + PRINT_LMIP_word(asd_ha, lseq, M1_SAVE_SCBPTR); + + asd_printk("LSEQ%d MDP 0 MODE 2 >>>>\n", lseq); + PRINT_LMIP_word(asd_ha, lseq, PORT_COUNTER); + PRINT_LMIP_word(asd_ha, lseq, PM_TABLE_PTR); + PRINT_LMIP_word(asd_ha, lseq, SATA_INTERLOCK_TMR_SAVE); + PRINT_LMIP_word(asd_ha, lseq, IP_BITL); + PRINT_LMIP_word(asd_ha, lseq, COPY_SMP_CONN_TAG); + PRINT_LMIP_byte(asd_ha, lseq, P0M2_OFFS1AH); + + asd_printk("LSEQ%d MDP 0 MODE 4/5 >>>>\n", lseq); + PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_STATUS); + PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_MODE); + PRINT_LMIP_word(asd_ha, lseq, Q_LINK_HEAD); + PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_ERR); + PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_SIGNALS); + PRINT_LMIP_byte(asd_ha, lseq, SAS_RESET_MODE); + PRINT_LMIP_byte(asd_ha, lseq, LINK_RESET_RETRY_COUNT); + PRINT_LMIP_byte(asd_ha, lseq, NUM_LINK_RESET_RETRIES); + PRINT_LMIP_word(asd_ha, lseq, OOB_INT_ENABLES); + PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_TIMEOUT); + PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_DOWN_COUNT); + + asd_printk("LSEQ%d MDP 1 MODE 0 >>>>\n", lseq); + PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR0); + PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR1); + + asd_printk("LSEQ%d MDP 1 MODE 1 >>>>\n", lseq); + PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR0); + PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR1); + + asd_printk("LSEQ%d MDP 1 MODE 2 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, INVALID_DWORD_COUNT); + PRINT_LMIP_dword(asd_ha, lseq, DISPARITY_ERROR_COUNT); + PRINT_LMIP_dword(asd_ha, lseq, LOSS_OF_SYNC_COUNT); + + asd_printk("LSEQ%d MDP 1 MODE 4/5 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, FRAME_TYPE_MASK); + PRINT_LMIP_dword(asd_ha, lseq, HASHED_SRC_ADDR_MASK_PRINT); + PRINT_LMIP_byte(asd_ha, lseq, NUM_FILL_BYTES_MASK); + PRINT_LMIP_word(asd_ha, lseq, TAG_MASK); + PRINT_LMIP_word(asd_ha, lseq, TARGET_PORT_XFER_TAG); + PRINT_LMIP_dword(asd_ha, lseq, DATA_OFFSET); + + asd_printk("LSEQ%d MDP 2 MODE 0 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMER_TERM_TS); + PRINT_LMIP_byte(asd_ha, lseq, DEVICE_BITS); + PRINT_LMIP_word(asd_ha, lseq, SDB_DDB); + PRINT_LMIP_word(asd_ha, lseq, SDB_NUM_TAGS); + PRINT_LMIP_word(asd_ha, lseq, SDB_CURR_TAG); + + asd_printk("LSEQ%d MDP 2 MODE 1 >>>>\n", lseq); + PRINT_LMIP_qword(asd_ha, lseq, TX_ID_ADDR_FRAME); + PRINT_LMIP_dword(asd_ha, lseq, OPEN_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, SRST_AS_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, LAST_LOADED_SG_EL); + + asd_printk("LSEQ%d MDP 2 MODE 2 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, CLOSE_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, BREAK_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, DWS_RESET_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, MCTL_TIMER_TERM_TS); + + asd_printk("LSEQ%d MDP 2 MODE 4/5 >>>>\n", lseq); + PRINT_LMIP_dword(asd_ha, lseq, COMINIT_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, RCV_ID_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMER_TERM_TS); + PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TIMER_TERM_TS); +} + +/** + * asd_dump_ddb_site -- dump a CSEQ DDB site + * @asd_ha: pointer to host adapter structure + * @site_no: site number of interest + */ +void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no) +{ + if (site_no >= asd_ha->hw_prof.max_ddbs) + return; + +#define DDB_FIELDB(__name) \ + asd_ddbsite_read_byte(asd_ha, site_no, \ + offsetof(struct asd_ddb_ssp_smp_target_port, __name)) +#define DDB2_FIELDB(__name) \ + asd_ddbsite_read_byte(asd_ha, site_no, \ + offsetof(struct asd_ddb_stp_sata_target_port, __name)) +#define DDB_FIELDW(__name) \ + asd_ddbsite_read_word(asd_ha, site_no, \ + offsetof(struct asd_ddb_ssp_smp_target_port, __name)) + +#define DDB_FIELDD(__name) \ + asd_ddbsite_read_dword(asd_ha, site_no, \ + offsetof(struct asd_ddb_ssp_smp_target_port, __name)) + + asd_printk("DDB: 0x%02x\n", site_no); + asd_printk("conn_type: 0x%02x\n", DDB_FIELDB(conn_type)); + asd_printk("conn_rate: 0x%02x\n", DDB_FIELDB(conn_rate)); + asd_printk("init_conn_tag: 0x%04x\n", be16_to_cpu(DDB_FIELDW(init_conn_tag))); + asd_printk("send_queue_head: 0x%04x\n", be16_to_cpu(DDB_FIELDW(send_queue_head))); + asd_printk("sq_suspended: 0x%02x\n", DDB_FIELDB(sq_suspended)); + asd_printk("DDB Type: 0x%02x\n", DDB_FIELDB(ddb_type)); + asd_printk("AWT Default: 0x%04x\n", DDB_FIELDW(awt_def)); + asd_printk("compat_features: 0x%02x\n", DDB_FIELDB(compat_features)); + asd_printk("Pathway Blocked Count: 0x%02x\n", + DDB_FIELDB(pathway_blocked_count)); + asd_printk("arb_wait_time: 0x%04x\n", DDB_FIELDW(arb_wait_time)); + asd_printk("more_compat_features: 0x%08x\n", + DDB_FIELDD(more_compat_features)); + asd_printk("Conn Mask: 0x%02x\n", DDB_FIELDB(conn_mask)); + asd_printk("flags: 0x%02x\n", DDB_FIELDB(flags)); + asd_printk("flags2: 0x%02x\n", DDB2_FIELDB(flags2)); + asd_printk("ExecQ Tail: 0x%04x\n",DDB_FIELDW(exec_queue_tail)); + asd_printk("SendQ Tail: 0x%04x\n",DDB_FIELDW(send_queue_tail)); + asd_printk("Active Task Count: 0x%04x\n", + DDB_FIELDW(active_task_count)); + asd_printk("ITNL Reason: 0x%02x\n", DDB_FIELDB(itnl_reason)); + asd_printk("ITNL Timeout Const: 0x%04x\n", DDB_FIELDW(itnl_timeout)); + asd_printk("ITNL timestamp: 0x%08x\n", DDB_FIELDD(itnl_timestamp)); +} + +void asd_dump_ddb_0(struct asd_ha_struct *asd_ha) +{ +#define DDB0_FIELDB(__name) \ + asd_ddbsite_read_byte(asd_ha, 0, \ + offsetof(struct asd_ddb_seq_shared, __name)) +#define DDB0_FIELDW(__name) \ + asd_ddbsite_read_word(asd_ha, 0, \ + offsetof(struct asd_ddb_seq_shared, __name)) + +#define DDB0_FIELDD(__name) \ + asd_ddbsite_read_dword(asd_ha,0 , \ + offsetof(struct asd_ddb_seq_shared, __name)) + +#define DDB0_FIELDA(__name, _o) \ + asd_ddbsite_read_byte(asd_ha, 0, \ + offsetof(struct asd_ddb_seq_shared, __name)+_o) + + + asd_printk("DDB: 0\n"); + asd_printk("q_free_ddb_head:%04x\n", DDB0_FIELDW(q_free_ddb_head)); + asd_printk("q_free_ddb_tail:%04x\n", DDB0_FIELDW(q_free_ddb_tail)); + asd_printk("q_free_ddb_cnt:%04x\n", DDB0_FIELDW(q_free_ddb_cnt)); + asd_printk("q_used_ddb_head:%04x\n", DDB0_FIELDW(q_used_ddb_head)); + asd_printk("q_used_ddb_tail:%04x\n", DDB0_FIELDW(q_used_ddb_tail)); + asd_printk("shared_mem_lock:%04x\n", DDB0_FIELDW(shared_mem_lock)); + asd_printk("smp_conn_tag:%04x\n", DDB0_FIELDW(smp_conn_tag)); + asd_printk("est_nexus_buf_cnt:%04x\n", DDB0_FIELDW(est_nexus_buf_cnt)); + asd_printk("est_nexus_buf_thresh:%04x\n", + DDB0_FIELDW(est_nexus_buf_thresh)); + asd_printk("conn_not_active:%02x\n", DDB0_FIELDB(conn_not_active)); + asd_printk("phy_is_up:%02x\n", DDB0_FIELDB(phy_is_up)); + asd_printk("port_map_by_links:%02x %02x %02x %02x " + "%02x %02x %02x %02x\n", + DDB0_FIELDA(port_map_by_links, 0), + DDB0_FIELDA(port_map_by_links, 1), + DDB0_FIELDA(port_map_by_links, 2), + DDB0_FIELDA(port_map_by_links, 3), + DDB0_FIELDA(port_map_by_links, 4), + DDB0_FIELDA(port_map_by_links, 5), + DDB0_FIELDA(port_map_by_links, 6), + DDB0_FIELDA(port_map_by_links, 7)); +} + +static void asd_dump_scb_site(struct asd_ha_struct *asd_ha, u16 site_no) +{ + +#define SCB_FIELDB(__name) \ + asd_scbsite_read_byte(asd_ha, site_no, sizeof(struct scb_header) \ + + offsetof(struct initiate_ssp_task, __name)) +#define SCB_FIELDW(__name) \ + asd_scbsite_read_word(asd_ha, site_no, sizeof(struct scb_header) \ + + offsetof(struct initiate_ssp_task, __name)) +#define SCB_FIELDD(__name) \ + asd_scbsite_read_dword(asd_ha, site_no, sizeof(struct scb_header) \ + + offsetof(struct initiate_ssp_task, __name)) + + asd_printk("Total Xfer Len: 0x%08x.\n", SCB_FIELDD(total_xfer_len)); + asd_printk("Frame Type: 0x%02x.\n", SCB_FIELDB(ssp_frame.frame_type)); + asd_printk("Tag: 0x%04x.\n", SCB_FIELDW(ssp_frame.tag)); + asd_printk("Target Port Xfer Tag: 0x%04x.\n", + SCB_FIELDW(ssp_frame.tptt)); + asd_printk("Data Offset: 0x%08x.\n", SCB_FIELDW(ssp_frame.data_offs)); + asd_printk("Retry Count: 0x%02x.\n", SCB_FIELDB(retry_count)); +} + +/** + * asd_dump_scb_sites -- dump currently used CSEQ SCB sites + * @asd_ha: pointer to host adapter struct + */ +void asd_dump_scb_sites(struct asd_ha_struct *asd_ha) +{ + u16 site_no; + + for (site_no = 0; site_no < asd_ha->hw_prof.max_scbs; site_no++) { + u8 opcode; + + if (!SCB_SITE_VALID(site_no)) + continue; + + /* We are only interested in SCB sites currently used. + */ + opcode = asd_scbsite_read_byte(asd_ha, site_no, + offsetof(struct scb_header, + opcode)); + if (opcode == 0xFF) + continue; + + asd_printk("\nSCB: 0x%x\n", site_no); + asd_dump_scb_site(asd_ha, site_no); + } +} + +/** + * ads_dump_seq_state -- dump CSEQ and LSEQ states + * @asd_ha: pointer to host adapter structure + * @lseq_mask: mask of LSEQs of interest + */ +void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask) +{ + int lseq; + + asd_dump_cseq_state(asd_ha); + + if (lseq_mask != 0) + for_each_sequencer(lseq_mask, lseq_mask, lseq) + asd_dump_lseq_state(asd_ha, lseq); +} + +void asd_dump_frame_rcvd(struct asd_phy *phy, + struct done_list_struct *dl) +{ + unsigned long flags; + int i; + + switch ((dl->status_block[1] & 0x70) >> 3) { + case SAS_PROTO_STP: + ASD_DPRINTK("STP proto device-to-host FIS:\n"); + break; + default: + case SAS_PROTO_SSP: + ASD_DPRINTK("SAS proto IDENTIFY:\n"); + break; + } + spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); + for (i = 0; i < phy->sas_phy.frame_rcvd_size; i+=4) + ASD_DPRINTK("%02x: %02x %02x %02x %02x\n", + i, + phy->frame_rcvd[i], + phy->frame_rcvd[i+1], + phy->frame_rcvd[i+2], + phy->frame_rcvd[i+3]); + spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); +} + +static inline void asd_dump_scb(struct asd_ascb *ascb, int ind) +{ + asd_printk("scb%d: vaddr: 0x%p, dma_handle: 0x%llx, next: 0x%llx, " + "index:%d, opcode:0x%02x\n", + ind, ascb->dma_scb.vaddr, + (unsigned long long)ascb->dma_scb.dma_handle, + (unsigned long long) + le64_to_cpu(ascb->scb->header.next_scb), + le16_to_cpu(ascb->scb->header.index), + ascb->scb->header.opcode); +} + +void asd_dump_scb_list(struct asd_ascb *ascb, int num) +{ + int i = 0; + + asd_printk("dumping %d scbs:\n", num); + + asd_dump_scb(ascb, i++); + --num; + + if (num > 0 && !list_empty(&ascb->list)) { + struct list_head *el; + + list_for_each(el, &ascb->list) { + struct asd_ascb *s = list_entry(el, struct asd_ascb, + list); + asd_dump_scb(s, i++); + if (--num <= 0) + break; + } + } +} + +#endif /* ASD_DEBUG */ diff --git a/drivers/scsi/aic94xx/aic94xx_dump.h b/drivers/scsi/aic94xx/aic94xx_dump.h new file mode 100644 index 000000000000..0c388e7da6bb --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_dump.h @@ -0,0 +1,52 @@ +/* + * Aic94xx SAS/SATA driver dump header file. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _AIC94XX_DUMP_H_ +#define _AIC94XX_DUMP_H_ + +#ifdef ASD_DEBUG + +void asd_dump_ddb_0(struct asd_ha_struct *asd_ha); +void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no); +void asd_dump_scb_sites(struct asd_ha_struct *asd_ha); +void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask); +void asd_dump_frame_rcvd(struct asd_phy *phy, + struct done_list_struct *dl); +void asd_dump_scb_list(struct asd_ascb *ascb, int num); +#else /* ASD_DEBUG */ + +static inline void asd_dump_ddb_0(struct asd_ha_struct *asd_ha) { } +static inline void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, + u16 site_no) { } +static inline void asd_dump_scb_sites(struct asd_ha_struct *asd_ha) { } +static inline void asd_dump_seq_state(struct asd_ha_struct *asd_ha, + u8 lseq_mask) { } +static inline void asd_dump_frame_rcvd(struct asd_phy *phy, + struct done_list_struct *dl) { } +static inline void asd_dump_scb_list(struct asd_ascb *ascb, int num) { } +#endif /* ASD_DEBUG */ + +#endif /* _AIC94XX_DUMP_H_ */ diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c new file mode 100644 index 000000000000..075cea85b56b --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -0,0 +1,1376 @@ +/* + * Aic94xx SAS/SATA driver hardware interface. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include + +#include "aic94xx.h" +#include "aic94xx_reg.h" +#include "aic94xx_hwi.h" +#include "aic94xx_seq.h" +#include "aic94xx_dump.h" + +u32 MBAR0_SWB_SIZE; + +/* ---------- Initialization ---------- */ + +static void asd_get_user_sas_addr(struct asd_ha_struct *asd_ha) +{ + extern char sas_addr_str[]; + /* If the user has specified a WWN it overrides other settings + */ + if (sas_addr_str[0] != '\0') + asd_destringify_sas_addr(asd_ha->hw_prof.sas_addr, + sas_addr_str); + else if (asd_ha->hw_prof.sas_addr[0] != 0) + asd_stringify_sas_addr(sas_addr_str, asd_ha->hw_prof.sas_addr); +} + +static void asd_propagate_sas_addr(struct asd_ha_struct *asd_ha) +{ + int i; + + for (i = 0; i < ASD_MAX_PHYS; i++) { + if (asd_ha->hw_prof.phy_desc[i].sas_addr[0] == 0) + continue; + /* Set a phy's address only if it has none. + */ + ASD_DPRINTK("setting phy%d addr to %llx\n", i, + SAS_ADDR(asd_ha->hw_prof.sas_addr)); + memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr, + asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE); + } +} + +/* ---------- PHY initialization ---------- */ + +static void asd_init_phy_identify(struct asd_phy *phy) +{ + phy->identify_frame = phy->id_frm_tok->vaddr; + + memset(phy->identify_frame, 0, sizeof(*phy->identify_frame)); + + phy->identify_frame->dev_type = SAS_END_DEV; + if (phy->sas_phy.role & PHY_ROLE_INITIATOR) + phy->identify_frame->initiator_bits = phy->sas_phy.iproto; + if (phy->sas_phy.role & PHY_ROLE_TARGET) + phy->identify_frame->target_bits = phy->sas_phy.tproto; + memcpy(phy->identify_frame->sas_addr, phy->phy_desc->sas_addr, + SAS_ADDR_SIZE); + phy->identify_frame->phy_id = phy->sas_phy.id; +} + +static int asd_init_phy(struct asd_phy *phy) +{ + struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha; + struct asd_sas_phy *sas_phy = &phy->sas_phy; + + sas_phy->enabled = 1; + sas_phy->class = SAS; + sas_phy->iproto = SAS_PROTO_ALL; + sas_phy->tproto = 0; + sas_phy->type = PHY_TYPE_PHYSICAL; + sas_phy->role = PHY_ROLE_INITIATOR; + sas_phy->oob_mode = OOB_NOT_CONNECTED; + sas_phy->linkrate = PHY_LINKRATE_NONE; + + phy->id_frm_tok = asd_alloc_coherent(asd_ha, + sizeof(*phy->identify_frame), + GFP_KERNEL); + if (!phy->id_frm_tok) { + asd_printk("no mem for IDENTIFY for phy%d\n", sas_phy->id); + return -ENOMEM; + } else + asd_init_phy_identify(phy); + + memset(phy->frame_rcvd, 0, sizeof(phy->frame_rcvd)); + + return 0; +} + +static int asd_init_phys(struct asd_ha_struct *asd_ha) +{ + u8 i; + u8 phy_mask = asd_ha->hw_prof.enabled_phys; + + for (i = 0; i < ASD_MAX_PHYS; i++) { + struct asd_phy *phy = &asd_ha->phys[i]; + + phy->phy_desc = &asd_ha->hw_prof.phy_desc[i]; + + phy->sas_phy.enabled = 0; + phy->sas_phy.id = i; + phy->sas_phy.sas_addr = &phy->phy_desc->sas_addr[0]; + phy->sas_phy.frame_rcvd = &phy->frame_rcvd[0]; + phy->sas_phy.ha = &asd_ha->sas_ha; + phy->sas_phy.lldd_phy = phy; + } + + /* Now enable and initialize only the enabled phys. */ + for_each_phy(phy_mask, phy_mask, i) { + int err = asd_init_phy(&asd_ha->phys[i]); + if (err) + return err; + } + + return 0; +} + +/* ---------- Sliding windows ---------- */ + +static int asd_init_sw(struct asd_ha_struct *asd_ha) +{ + struct pci_dev *pcidev = asd_ha->pcidev; + int err; + u32 v; + + /* Unlock MBARs */ + err = pci_read_config_dword(pcidev, PCI_CONF_MBAR_KEY, &v); + if (err) { + asd_printk("couldn't access conf. space of %s\n", + pci_name(pcidev)); + goto Err; + } + if (v) + err = pci_write_config_dword(pcidev, PCI_CONF_MBAR_KEY, v); + if (err) { + asd_printk("couldn't write to MBAR_KEY of %s\n", + pci_name(pcidev)); + goto Err; + } + + /* Set sliding windows A, B and C to point to proper internal + * memory regions. + */ + pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWA, REG_BASE_ADDR); + pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWB, + REG_BASE_ADDR_CSEQCIO); + pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWC, REG_BASE_ADDR_EXSI); + asd_ha->io_handle[0].swa_base = REG_BASE_ADDR; + asd_ha->io_handle[0].swb_base = REG_BASE_ADDR_CSEQCIO; + asd_ha->io_handle[0].swc_base = REG_BASE_ADDR_EXSI; + MBAR0_SWB_SIZE = asd_ha->io_handle[0].len - 0x80; + if (!asd_ha->iospace) { + /* MBAR1 will point to OCM (On Chip Memory) */ + pci_write_config_dword(pcidev, PCI_CONF_MBAR1, OCM_BASE_ADDR); + asd_ha->io_handle[1].swa_base = OCM_BASE_ADDR; + } + spin_lock_init(&asd_ha->iolock); +Err: + return err; +} + +/* ---------- SCB initialization ---------- */ + +/** + * asd_init_scbs - manually allocate the first SCB. + * @asd_ha: pointer to host adapter structure + * + * This allocates the very first SCB which would be sent to the + * sequencer for execution. Its bus address is written to + * CSEQ_Q_NEW_POINTER, mode page 2, mode 8. Since the bus address of + * the _next_ scb to be DMA-ed to the host adapter is read from the last + * SCB DMA-ed to the host adapter, we have to always stay one step + * ahead of the sequencer and keep one SCB already allocated. + */ +static int asd_init_scbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int bitmap_bytes; + + /* allocate the index array and bitmap */ + asd_ha->seq.tc_index_bitmap_bits = asd_ha->hw_prof.max_scbs; + asd_ha->seq.tc_index_array = kzalloc(asd_ha->seq.tc_index_bitmap_bits* + sizeof(void *), GFP_KERNEL); + if (!asd_ha->seq.tc_index_array) + return -ENOMEM; + + bitmap_bytes = (asd_ha->seq.tc_index_bitmap_bits+7)/8; + bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long); + asd_ha->seq.tc_index_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL); + if (!asd_ha->seq.tc_index_bitmap) + return -ENOMEM; + + spin_lock_init(&seq->tc_index_lock); + + seq->next_scb.size = sizeof(struct scb); + seq->next_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool, GFP_KERNEL, + &seq->next_scb.dma_handle); + if (!seq->next_scb.vaddr) { + kfree(asd_ha->seq.tc_index_bitmap); + kfree(asd_ha->seq.tc_index_array); + asd_ha->seq.tc_index_bitmap = NULL; + asd_ha->seq.tc_index_array = NULL; + return -ENOMEM; + } + + seq->pending = 0; + spin_lock_init(&seq->pend_q_lock); + INIT_LIST_HEAD(&seq->pend_q); + + return 0; +} + +static inline void asd_get_max_scb_ddb(struct asd_ha_struct *asd_ha) +{ + asd_ha->hw_prof.max_scbs = asd_get_cmdctx_size(asd_ha)/ASD_SCB_SIZE; + asd_ha->hw_prof.max_ddbs = asd_get_devctx_size(asd_ha)/ASD_DDB_SIZE; + ASD_DPRINTK("max_scbs:%d, max_ddbs:%d\n", + asd_ha->hw_prof.max_scbs, + asd_ha->hw_prof.max_ddbs); +} + +/* ---------- Done List initialization ---------- */ + +static void asd_dl_tasklet_handler(unsigned long); + +static int asd_init_dl(struct asd_ha_struct *asd_ha) +{ + asd_ha->seq.actual_dl + = asd_alloc_coherent(asd_ha, + ASD_DL_SIZE * sizeof(struct done_list_struct), + GFP_KERNEL); + if (!asd_ha->seq.actual_dl) + return -ENOMEM; + asd_ha->seq.dl = asd_ha->seq.actual_dl->vaddr; + asd_ha->seq.dl_toggle = ASD_DEF_DL_TOGGLE; + asd_ha->seq.dl_next = 0; + tasklet_init(&asd_ha->seq.dl_tasklet, asd_dl_tasklet_handler, + (unsigned long) asd_ha); + + return 0; +} + +/* ---------- EDB and ESCB init ---------- */ + +static int asd_alloc_edbs(struct asd_ha_struct *asd_ha, unsigned int gfp_flags) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int i; + + seq->edb_arr = kmalloc(seq->num_edbs*sizeof(*seq->edb_arr), gfp_flags); + if (!seq->edb_arr) + return -ENOMEM; + + for (i = 0; i < seq->num_edbs; i++) { + seq->edb_arr[i] = asd_alloc_coherent(asd_ha, ASD_EDB_SIZE, + gfp_flags); + if (!seq->edb_arr[i]) + goto Err_unroll; + memset(seq->edb_arr[i]->vaddr, 0, ASD_EDB_SIZE); + } + + ASD_DPRINTK("num_edbs:%d\n", seq->num_edbs); + + return 0; + +Err_unroll: + for (i-- ; i >= 0; i--) + asd_free_coherent(asd_ha, seq->edb_arr[i]); + kfree(seq->edb_arr); + seq->edb_arr = NULL; + + return -ENOMEM; +} + +static int asd_alloc_escbs(struct asd_ha_struct *asd_ha, + unsigned int gfp_flags) +{ + struct asd_seq_data *seq = &asd_ha->seq; + struct asd_ascb *escb; + int i, escbs; + + seq->escb_arr = kmalloc(seq->num_escbs*sizeof(*seq->escb_arr), + gfp_flags); + if (!seq->escb_arr) + return -ENOMEM; + + escbs = seq->num_escbs; + escb = asd_ascb_alloc_list(asd_ha, &escbs, gfp_flags); + if (!escb) { + asd_printk("couldn't allocate list of escbs\n"); + goto Err; + } + seq->num_escbs -= escbs; /* subtract what was not allocated */ + ASD_DPRINTK("num_escbs:%d\n", seq->num_escbs); + + for (i = 0; i < seq->num_escbs; i++, escb = list_entry(escb->list.next, + struct asd_ascb, + list)) { + seq->escb_arr[i] = escb; + escb->scb->header.opcode = EMPTY_SCB; + } + + return 0; +Err: + kfree(seq->escb_arr); + seq->escb_arr = NULL; + return -ENOMEM; + +} + +static void asd_assign_edbs2escbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int i, k, z = 0; + + for (i = 0; i < seq->num_escbs; i++) { + struct asd_ascb *ascb = seq->escb_arr[i]; + struct empty_scb *escb = &ascb->scb->escb; + + ascb->edb_index = z; + + escb->num_valid = ASD_EDBS_PER_SCB; + + for (k = 0; k < ASD_EDBS_PER_SCB; k++) { + struct sg_el *eb = &escb->eb[k]; + struct asd_dma_tok *edb = seq->edb_arr[z++]; + + memset(eb, 0, sizeof(*eb)); + eb->bus_addr = cpu_to_le64(((u64) edb->dma_handle)); + eb->size = cpu_to_le32(((u32) edb->size)); + } + } +} + +/** + * asd_init_escbs -- allocate and initialize empty scbs + * @asd_ha: pointer to host adapter structure + * + * An empty SCB has sg_elements of ASD_EDBS_PER_SCB (7) buffers. + * They transport sense data, etc. + */ +static int asd_init_escbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int err = 0; + + /* Allocate two empty data buffers (edb) per sequencer. */ + int edbs = 2*(1+asd_ha->hw_prof.num_phys); + + seq->num_escbs = (edbs+ASD_EDBS_PER_SCB-1)/ASD_EDBS_PER_SCB; + seq->num_edbs = seq->num_escbs * ASD_EDBS_PER_SCB; + + err = asd_alloc_edbs(asd_ha, GFP_KERNEL); + if (err) { + asd_printk("couldn't allocate edbs\n"); + return err; + } + + err = asd_alloc_escbs(asd_ha, GFP_KERNEL); + if (err) { + asd_printk("couldn't allocate escbs\n"); + return err; + } + + asd_assign_edbs2escbs(asd_ha); + /* In order to insure that normal SCBs do not overfill sequencer + * memory and leave no space for escbs (halting condition), + * we increment pending here by the number of escbs. However, + * escbs are never pending. + */ + seq->pending = seq->num_escbs; + seq->can_queue = 1 + (asd_ha->hw_prof.max_scbs - seq->pending)/2; + + return 0; +} + +/* ---------- HW initialization ---------- */ + +/** + * asd_chip_hardrst -- hard reset the chip + * @asd_ha: pointer to host adapter structure + * + * This takes 16 cycles and is synchronous to CFCLK, which runs + * at 200 MHz, so this should take at most 80 nanoseconds. + */ +int asd_chip_hardrst(struct asd_ha_struct *asd_ha) +{ + int i; + int count = 100; + u32 reg; + + for (i = 0 ; i < 4 ; i++) { + asd_write_reg_dword(asd_ha, COMBIST, HARDRST); + } + + do { + udelay(1); + reg = asd_read_reg_dword(asd_ha, CHIMINT); + if (reg & HARDRSTDET) { + asd_write_reg_dword(asd_ha, CHIMINT, + HARDRSTDET|PORRSTDET); + return 0; + } + } while (--count > 0); + + return -ENODEV; +} + +/** + * asd_init_chip -- initialize the chip + * @asd_ha: pointer to host adapter structure + * + * Hard resets the chip, disables HA interrupts, downloads the sequnecer + * microcode and starts the sequencers. The caller has to explicitly + * enable HA interrupts with asd_enable_ints(asd_ha). + */ +static int asd_init_chip(struct asd_ha_struct *asd_ha) +{ + int err; + + err = asd_chip_hardrst(asd_ha); + if (err) { + asd_printk("couldn't hard reset %s\n", + pci_name(asd_ha->pcidev)); + goto out; + } + + asd_disable_ints(asd_ha); + + err = asd_init_seqs(asd_ha); + if (err) { + asd_printk("couldn't init seqs for %s\n", + pci_name(asd_ha->pcidev)); + goto out; + } + + err = asd_start_seqs(asd_ha); + if (err) { + asd_printk("coudln't start seqs for %s\n", + pci_name(asd_ha->pcidev)); + goto out; + } +out: + return err; +} + +#define MAX_DEVS ((OCM_MAX_SIZE) / (ASD_DDB_SIZE)) + +static int max_devs = 0; +module_param_named(max_devs, max_devs, int, S_IRUGO); +MODULE_PARM_DESC(max_devs, "\n" + "\tMaximum number of SAS devices to support (not LUs).\n" + "\tDefault: 2176, Maximum: 65663.\n"); + +static int max_cmnds = 0; +module_param_named(max_cmnds, max_cmnds, int, S_IRUGO); +MODULE_PARM_DESC(max_cmnds, "\n" + "\tMaximum number of commands queuable.\n" + "\tDefault: 512, Maximum: 66047.\n"); + +static void asd_extend_devctx_ocm(struct asd_ha_struct *asd_ha) +{ + unsigned long dma_addr = OCM_BASE_ADDR; + u32 d; + + dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE; + asd_write_reg_addr(asd_ha, DEVCTXBASE, (dma_addr_t) dma_addr); + d = asd_read_reg_dword(asd_ha, CTXDOMAIN); + d |= 4; + asd_write_reg_dword(asd_ha, CTXDOMAIN, d); + asd_ha->hw_prof.max_ddbs += MAX_DEVS; +} + +static int asd_extend_devctx(struct asd_ha_struct *asd_ha) +{ + dma_addr_t dma_handle; + unsigned long dma_addr; + u32 d; + int size; + + asd_extend_devctx_ocm(asd_ha); + + asd_ha->hw_prof.ddb_ext = NULL; + if (max_devs <= asd_ha->hw_prof.max_ddbs || max_devs > 0xFFFF) { + max_devs = asd_ha->hw_prof.max_ddbs; + return 0; + } + + size = (max_devs - asd_ha->hw_prof.max_ddbs + 1) * ASD_DDB_SIZE; + + asd_ha->hw_prof.ddb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL); + if (!asd_ha->hw_prof.ddb_ext) { + asd_printk("couldn't allocate memory for %d devices\n", + max_devs); + max_devs = asd_ha->hw_prof.max_ddbs; + return -ENOMEM; + } + dma_handle = asd_ha->hw_prof.ddb_ext->dma_handle; + dma_addr = ALIGN((unsigned long) dma_handle, ASD_DDB_SIZE); + dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE; + dma_handle = (dma_addr_t) dma_addr; + asd_write_reg_addr(asd_ha, DEVCTXBASE, dma_handle); + d = asd_read_reg_dword(asd_ha, CTXDOMAIN); + d &= ~4; + asd_write_reg_dword(asd_ha, CTXDOMAIN, d); + + asd_ha->hw_prof.max_ddbs = max_devs; + + return 0; +} + +static int asd_extend_cmdctx(struct asd_ha_struct *asd_ha) +{ + dma_addr_t dma_handle; + unsigned long dma_addr; + u32 d; + int size; + + asd_ha->hw_prof.scb_ext = NULL; + if (max_cmnds <= asd_ha->hw_prof.max_scbs || max_cmnds > 0xFFFF) { + max_cmnds = asd_ha->hw_prof.max_scbs; + return 0; + } + + size = (max_cmnds - asd_ha->hw_prof.max_scbs + 1) * ASD_SCB_SIZE; + + asd_ha->hw_prof.scb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL); + if (!asd_ha->hw_prof.scb_ext) { + asd_printk("couldn't allocate memory for %d commands\n", + max_cmnds); + max_cmnds = asd_ha->hw_prof.max_scbs; + return -ENOMEM; + } + dma_handle = asd_ha->hw_prof.scb_ext->dma_handle; + dma_addr = ALIGN((unsigned long) dma_handle, ASD_SCB_SIZE); + dma_addr -= asd_ha->hw_prof.max_scbs * ASD_SCB_SIZE; + dma_handle = (dma_addr_t) dma_addr; + asd_write_reg_addr(asd_ha, CMDCTXBASE, dma_handle); + d = asd_read_reg_dword(asd_ha, CTXDOMAIN); + d &= ~1; + asd_write_reg_dword(asd_ha, CTXDOMAIN, d); + + asd_ha->hw_prof.max_scbs = max_cmnds; + + return 0; +} + +/** + * asd_init_ctxmem -- initialize context memory + * asd_ha: pointer to host adapter structure + * + * This function sets the maximum number of SCBs and + * DDBs which can be used by the sequencer. This is normally + * 512 and 128 respectively. If support for more SCBs or more DDBs + * is required then CMDCTXBASE, DEVCTXBASE and CTXDOMAIN are + * initialized here to extend context memory to point to host memory, + * thus allowing unlimited support for SCBs and DDBs -- only limited + * by host memory. + */ +static int asd_init_ctxmem(struct asd_ha_struct *asd_ha) +{ + int bitmap_bytes; + + asd_get_max_scb_ddb(asd_ha); + asd_extend_devctx(asd_ha); + asd_extend_cmdctx(asd_ha); + + /* The kernel wants bitmaps to be unsigned long sized. */ + bitmap_bytes = (asd_ha->hw_prof.max_ddbs+7)/8; + bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long); + asd_ha->hw_prof.ddb_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL); + if (!asd_ha->hw_prof.ddb_bitmap) + return -ENOMEM; + spin_lock_init(&asd_ha->hw_prof.ddb_lock); + + return 0; +} + +int asd_init_hw(struct asd_ha_struct *asd_ha) +{ + int err; + u32 v; + + err = asd_init_sw(asd_ha); + if (err) + return err; + + err = pci_read_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL, &v); + if (err) { + asd_printk("couldn't read PCIC_HSTPCIX_CNTRL of %s\n", + pci_name(asd_ha->pcidev)); + return err; + } + pci_write_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL, + v | SC_TMR_DIS); + if (err) { + asd_printk("couldn't disable split completion timer of %s\n", + pci_name(asd_ha->pcidev)); + return err; + } + + err = asd_read_ocm(asd_ha); + if (err) { + asd_printk("couldn't read ocm(%d)\n", err); + /* While suspicios, it is not an error that we + * couldn't read the OCM. */ + } + + err = asd_read_flash(asd_ha); + if (err) { + asd_printk("couldn't read flash(%d)\n", err); + /* While suspicios, it is not an error that we + * couldn't read FLASH memory. + */ + } + + asd_init_ctxmem(asd_ha); + + asd_get_user_sas_addr(asd_ha); + if (!asd_ha->hw_prof.sas_addr[0]) { + asd_printk("No SAS Address provided for %s\n", + pci_name(asd_ha->pcidev)); + err = -ENODEV; + goto Out; + } + + asd_propagate_sas_addr(asd_ha); + + err = asd_init_phys(asd_ha); + if (err) { + asd_printk("couldn't initialize phys for %s\n", + pci_name(asd_ha->pcidev)); + goto Out; + } + + err = asd_init_scbs(asd_ha); + if (err) { + asd_printk("couldn't initialize scbs for %s\n", + pci_name(asd_ha->pcidev)); + goto Out; + } + + err = asd_init_dl(asd_ha); + if (err) { + asd_printk("couldn't initialize the done list:%d\n", + err); + goto Out; + } + + err = asd_init_escbs(asd_ha); + if (err) { + asd_printk("couldn't initialize escbs\n"); + goto Out; + } + + err = asd_init_chip(asd_ha); + if (err) { + asd_printk("couldn't init the chip\n"); + goto Out; + } +Out: + return err; +} + +/* ---------- Chip reset ---------- */ + +/** + * asd_chip_reset -- reset the host adapter, etc + * @asd_ha: pointer to host adapter structure of interest + * + * Called from the ISR. Hard reset the chip. Let everything + * timeout. This should be no different than hot-unplugging the + * host adapter. Once everything times out we'll init the chip with + * a call to asd_init_chip() and enable interrupts with asd_enable_ints(). + * XXX finish. + */ +static void asd_chip_reset(struct asd_ha_struct *asd_ha) +{ + struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; + + ASD_DPRINTK("chip reset for %s\n", pci_name(asd_ha->pcidev)); + asd_chip_hardrst(asd_ha); + sas_ha->notify_ha_event(sas_ha, HAE_RESET); +} + +/* ---------- Done List Routines ---------- */ + +static void asd_dl_tasklet_handler(unsigned long data) +{ + struct asd_ha_struct *asd_ha = (struct asd_ha_struct *) data; + struct asd_seq_data *seq = &asd_ha->seq; + unsigned long flags; + + while (1) { + struct done_list_struct *dl = &seq->dl[seq->dl_next]; + struct asd_ascb *ascb; + + if ((dl->toggle & DL_TOGGLE_MASK) != seq->dl_toggle) + break; + + /* find the aSCB */ + spin_lock_irqsave(&seq->tc_index_lock, flags); + ascb = asd_tc_index_find(seq, (int)le16_to_cpu(dl->index)); + spin_unlock_irqrestore(&seq->tc_index_lock, flags); + if (unlikely(!ascb)) { + ASD_DPRINTK("BUG:sequencer:dl:no ascb?!\n"); + goto next_1; + } else if (ascb->scb->header.opcode == EMPTY_SCB) { + goto out; + } else if (!ascb->uldd_timer && !del_timer(&ascb->timer)) { + goto next_1; + } + spin_lock_irqsave(&seq->pend_q_lock, flags); + list_del_init(&ascb->list); + seq->pending--; + spin_unlock_irqrestore(&seq->pend_q_lock, flags); + out: + ascb->tasklet_complete(ascb, dl); + + next_1: + seq->dl_next = (seq->dl_next + 1) & (ASD_DL_SIZE-1); + if (!seq->dl_next) + seq->dl_toggle ^= DL_TOGGLE_MASK; + } +} + +/* ---------- Interrupt Service Routines ---------- */ + +/** + * asd_process_donelist_isr -- schedule processing of done list entries + * @asd_ha: pointer to host adapter structure + */ +static inline void asd_process_donelist_isr(struct asd_ha_struct *asd_ha) +{ + tasklet_schedule(&asd_ha->seq.dl_tasklet); +} + +/** + * asd_com_sas_isr -- process device communication interrupt (COMINT) + * @asd_ha: pointer to host adapter structure + */ +static inline void asd_com_sas_isr(struct asd_ha_struct *asd_ha) +{ + u32 comstat = asd_read_reg_dword(asd_ha, COMSTAT); + + /* clear COMSTAT int */ + asd_write_reg_dword(asd_ha, COMSTAT, 0xFFFFFFFF); + + if (comstat & CSBUFPERR) { + asd_printk("%s: command/status buffer dma parity error\n", + pci_name(asd_ha->pcidev)); + } else if (comstat & CSERR) { + int i; + u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR); + dmaerr &= 0xFF; + asd_printk("%s: command/status dma error, DMAERR: 0x%02x, " + "CSDMAADR: 0x%04x, CSDMAADR+4: 0x%04x\n", + pci_name(asd_ha->pcidev), + dmaerr, + asd_read_reg_dword(asd_ha, CSDMAADR), + asd_read_reg_dword(asd_ha, CSDMAADR+4)); + asd_printk("CSBUFFER:\n"); + for (i = 0; i < 8; i++) { + asd_printk("%08x %08x %08x %08x\n", + asd_read_reg_dword(asd_ha, CSBUFFER), + asd_read_reg_dword(asd_ha, CSBUFFER+4), + asd_read_reg_dword(asd_ha, CSBUFFER+8), + asd_read_reg_dword(asd_ha, CSBUFFER+12)); + } + asd_dump_seq_state(asd_ha, 0); + } else if (comstat & OVLYERR) { + u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR); + dmaerr = (dmaerr >> 8) & 0xFF; + asd_printk("%s: overlay dma error:0x%x\n", + pci_name(asd_ha->pcidev), + dmaerr); + } + asd_chip_reset(asd_ha); +} + +static inline void asd_arp2_err(struct asd_ha_struct *asd_ha, u32 dchstatus) +{ + static const char *halt_code[256] = { + "UNEXPECTED_INTERRUPT0", + "UNEXPECTED_INTERRUPT1", + "UNEXPECTED_INTERRUPT2", + "UNEXPECTED_INTERRUPT3", + "UNEXPECTED_INTERRUPT4", + "UNEXPECTED_INTERRUPT5", + "UNEXPECTED_INTERRUPT6", + "UNEXPECTED_INTERRUPT7", + "UNEXPECTED_INTERRUPT8", + "UNEXPECTED_INTERRUPT9", + "UNEXPECTED_INTERRUPT10", + [11 ... 19] = "unknown[11,19]", + "NO_FREE_SCB_AVAILABLE", + "INVALID_SCB_OPCODE", + "INVALID_MBX_OPCODE", + "INVALID_ATA_STATE", + "ATA_QUEUE_FULL", + "ATA_TAG_TABLE_FAULT", + "ATA_TAG_MASK_FAULT", + "BAD_LINK_QUEUE_STATE", + "DMA2CHIM_QUEUE_ERROR", + "EMPTY_SCB_LIST_FULL", + "unknown[30]", + "IN_USE_SCB_ON_FREE_LIST", + "BAD_OPEN_WAIT_STATE", + "INVALID_STP_AFFILIATION", + "unknown[34]", + "EXEC_QUEUE_ERROR", + "TOO_MANY_EMPTIES_NEEDED", + "EMPTY_REQ_QUEUE_ERROR", + "Q_MONIRTT_MGMT_ERROR", + "TARGET_MODE_FLOW_ERROR", + "DEVICE_QUEUE_NOT_FOUND", + "START_IRTT_TIMER_ERROR", + "ABORT_TASK_ILLEGAL_REQ", + [43 ... 255] = "unknown[43,255]" + }; + + if (dchstatus & CSEQINT) { + u32 arp2int = asd_read_reg_dword(asd_ha, CARP2INT); + + if (arp2int & (ARP2WAITTO|ARP2ILLOPC|ARP2PERR|ARP2CIOPERR)) { + asd_printk("%s: CSEQ arp2int:0x%x\n", + pci_name(asd_ha->pcidev), + arp2int); + } else if (arp2int & ARP2HALTC) + asd_printk("%s: CSEQ halted: %s\n", + pci_name(asd_ha->pcidev), + halt_code[(arp2int>>16)&0xFF]); + else + asd_printk("%s: CARP2INT:0x%x\n", + pci_name(asd_ha->pcidev), + arp2int); + } + if (dchstatus & LSEQINT_MASK) { + int lseq; + u8 lseq_mask = dchstatus & LSEQINT_MASK; + + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + u32 arp2int = asd_read_reg_dword(asd_ha, + LmARP2INT(lseq)); + if (arp2int & (ARP2WAITTO | ARP2ILLOPC | ARP2PERR + | ARP2CIOPERR)) { + asd_printk("%s: LSEQ%d arp2int:0x%x\n", + pci_name(asd_ha->pcidev), + lseq, arp2int); + /* XXX we should only do lseq reset */ + } else if (arp2int & ARP2HALTC) + asd_printk("%s: LSEQ%d halted: %s\n", + pci_name(asd_ha->pcidev), + lseq,halt_code[(arp2int>>16)&0xFF]); + else + asd_printk("%s: LSEQ%d ARP2INT:0x%x\n", + pci_name(asd_ha->pcidev), lseq, + arp2int); + } + } + asd_chip_reset(asd_ha); +} + +/** + * asd_dch_sas_isr -- process device channel interrupt (DEVINT) + * @asd_ha: pointer to host adapter structure + */ +static inline void asd_dch_sas_isr(struct asd_ha_struct *asd_ha) +{ + u32 dchstatus = asd_read_reg_dword(asd_ha, DCHSTATUS); + + if (dchstatus & CFIFTOERR) { + asd_printk("%s: CFIFTOERR\n", pci_name(asd_ha->pcidev)); + asd_chip_reset(asd_ha); + } else + asd_arp2_err(asd_ha, dchstatus); +} + +/** + * ads_rbi_exsi_isr -- process external system interface interrupt (INITERR) + * @asd_ha: pointer to host adapter structure + */ +static inline void asd_rbi_exsi_isr(struct asd_ha_struct *asd_ha) +{ + u32 stat0r = asd_read_reg_dword(asd_ha, ASISTAT0R); + + if (!(stat0r & ASIERR)) { + asd_printk("hmm, EXSI interrupted but no error?\n"); + return; + } + + if (stat0r & ASIFMTERR) { + asd_printk("ASI SEEPROM format error for %s\n", + pci_name(asd_ha->pcidev)); + } else if (stat0r & ASISEECHKERR) { + u32 stat1r = asd_read_reg_dword(asd_ha, ASISTAT1R); + asd_printk("ASI SEEPROM checksum 0x%x error for %s\n", + stat1r & CHECKSUM_MASK, + pci_name(asd_ha->pcidev)); + } else { + u32 statr = asd_read_reg_dword(asd_ha, ASIERRSTATR); + + if (!(statr & CPI2ASIMSTERR_MASK)) { + ASD_DPRINTK("hmm, ASIERR?\n"); + return; + } else { + u32 addr = asd_read_reg_dword(asd_ha, ASIERRADDR); + u32 data = asd_read_reg_dword(asd_ha, ASIERRDATAR); + + asd_printk("%s: CPI2 xfer err: addr: 0x%x, wdata: 0x%x, " + "count: 0x%x, byteen: 0x%x, targerr: 0x%x " + "master id: 0x%x, master err: 0x%x\n", + pci_name(asd_ha->pcidev), + addr, data, + (statr & CPI2ASIBYTECNT_MASK) >> 16, + (statr & CPI2ASIBYTEEN_MASK) >> 12, + (statr & CPI2ASITARGERR_MASK) >> 8, + (statr & CPI2ASITARGMID_MASK) >> 4, + (statr & CPI2ASIMSTERR_MASK)); + } + } + asd_chip_reset(asd_ha); +} + +/** + * asd_hst_pcix_isr -- process host interface interrupts + * @asd_ha: pointer to host adapter structure + * + * Asserted on PCIX errors: target abort, etc. + */ +static inline void asd_hst_pcix_isr(struct asd_ha_struct *asd_ha) +{ + u16 status; + u32 pcix_status; + u32 ecc_status; + + pci_read_config_word(asd_ha->pcidev, PCI_STATUS, &status); + pci_read_config_dword(asd_ha->pcidev, PCIX_STATUS, &pcix_status); + pci_read_config_dword(asd_ha->pcidev, ECC_CTRL_STAT, &ecc_status); + + if (status & PCI_STATUS_DETECTED_PARITY) + asd_printk("parity error for %s\n", pci_name(asd_ha->pcidev)); + else if (status & PCI_STATUS_REC_MASTER_ABORT) + asd_printk("master abort for %s\n", pci_name(asd_ha->pcidev)); + else if (status & PCI_STATUS_REC_TARGET_ABORT) + asd_printk("target abort for %s\n", pci_name(asd_ha->pcidev)); + else if (status & PCI_STATUS_PARITY) + asd_printk("data parity for %s\n", pci_name(asd_ha->pcidev)); + else if (pcix_status & RCV_SCE) { + asd_printk("received split completion error for %s\n", + pci_name(asd_ha->pcidev)); + pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status); + /* XXX: Abort task? */ + return; + } else if (pcix_status & UNEXP_SC) { + asd_printk("unexpected split completion for %s\n", + pci_name(asd_ha->pcidev)); + pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status); + /* ignore */ + return; + } else if (pcix_status & SC_DISCARD) + asd_printk("split completion discarded for %s\n", + pci_name(asd_ha->pcidev)); + else if (ecc_status & UNCOR_ECCERR) + asd_printk("uncorrectable ECC error for %s\n", + pci_name(asd_ha->pcidev)); + asd_chip_reset(asd_ha); +} + +/** + * asd_hw_isr -- host adapter interrupt service routine + * @irq: ignored + * @dev_id: pointer to host adapter structure + * @regs: ignored + * + * The ISR processes done list entries and level 3 error handling. + */ +irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs) +{ + struct asd_ha_struct *asd_ha = dev_id; + u32 chimint = asd_read_reg_dword(asd_ha, CHIMINT); + + if (!chimint) + return IRQ_NONE; + + asd_write_reg_dword(asd_ha, CHIMINT, chimint); + (void) asd_read_reg_dword(asd_ha, CHIMINT); + + if (chimint & DLAVAIL) + asd_process_donelist_isr(asd_ha); + if (chimint & COMINT) + asd_com_sas_isr(asd_ha); + if (chimint & DEVINT) + asd_dch_sas_isr(asd_ha); + if (chimint & INITERR) + asd_rbi_exsi_isr(asd_ha); + if (chimint & HOSTERR) + asd_hst_pcix_isr(asd_ha); + + return IRQ_HANDLED; +} + +/* ---------- SCB handling ---------- */ + +static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha, + unsigned int gfp_flags) +{ + extern kmem_cache_t *asd_ascb_cache; + struct asd_seq_data *seq = &asd_ha->seq; + struct asd_ascb *ascb; + unsigned long flags; + + ascb = kmem_cache_alloc(asd_ascb_cache, gfp_flags); + + if (ascb) { + memset(ascb, 0, sizeof(*ascb)); + ascb->dma_scb.size = sizeof(struct scb); + ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool, + gfp_flags, + &ascb->dma_scb.dma_handle); + if (!ascb->dma_scb.vaddr) { + kmem_cache_free(asd_ascb_cache, ascb); + return NULL; + } + memset(ascb->dma_scb.vaddr, 0, sizeof(struct scb)); + asd_init_ascb(asd_ha, ascb); + + spin_lock_irqsave(&seq->tc_index_lock, flags); + ascb->tc_index = asd_tc_index_get(seq, ascb); + spin_unlock_irqrestore(&seq->tc_index_lock, flags); + if (ascb->tc_index == -1) + goto undo; + + ascb->scb->header.index = cpu_to_le16((u16)ascb->tc_index); + } + + return ascb; +undo: + dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr, + ascb->dma_scb.dma_handle); + kmem_cache_free(asd_ascb_cache, ascb); + ASD_DPRINTK("no index for ascb\n"); + return NULL; +} + +/** + * asd_ascb_alloc_list -- allocate a list of aSCBs + * @asd_ha: pointer to host adapter structure + * @num: pointer to integer number of aSCBs + * @gfp_flags: GFP_ flags. + * + * This is the only function which is used to allocate aSCBs. + * It can allocate one or many. If more than one, then they form + * a linked list in two ways: by their list field of the ascb struct + * and by the next_scb field of the scb_header. + * + * Returns NULL if no memory was available, else pointer to a list + * of ascbs. When this function returns, @num would be the number + * of SCBs which were not able to be allocated, 0 if all requested + * were able to be allocated. + */ +struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct + *asd_ha, int *num, + unsigned int gfp_flags) +{ + struct asd_ascb *first = NULL; + + for ( ; *num > 0; --*num) { + struct asd_ascb *ascb = asd_ascb_alloc(asd_ha, gfp_flags); + + if (!ascb) + break; + else if (!first) + first = ascb; + else { + struct asd_ascb *last = list_entry(first->list.prev, + struct asd_ascb, + list); + list_add_tail(&ascb->list, &first->list); + last->scb->header.next_scb = + cpu_to_le64(((u64)ascb->dma_scb.dma_handle)); + } + } + + return first; +} + +/** + * asd_swap_head_scb -- swap the head scb + * @asd_ha: pointer to host adapter structure + * @ascb: pointer to the head of an ascb list + * + * The sequencer knows the DMA address of the next SCB to be DMAed to + * the host adapter, from initialization or from the last list DMAed. + * seq->next_scb keeps the address of this SCB. The sequencer will + * DMA to the host adapter this list of SCBs. But the head (first + * element) of this list is not known to the sequencer. Here we swap + * the head of the list with the known SCB (memcpy()). + * Only one memcpy() is required per list so it is in our interest + * to keep the list of SCB as long as possible so that the ratio + * of number of memcpy calls to the number of SCB DMA-ed is as small + * as possible. + * + * LOCKING: called with the pending list lock held. + */ +static inline void asd_swap_head_scb(struct asd_ha_struct *asd_ha, + struct asd_ascb *ascb) +{ + struct asd_seq_data *seq = &asd_ha->seq; + struct asd_ascb *last = list_entry(ascb->list.prev, + struct asd_ascb, + list); + struct asd_dma_tok t = ascb->dma_scb; + + memcpy(seq->next_scb.vaddr, ascb->scb, sizeof(*ascb->scb)); + ascb->dma_scb = seq->next_scb; + ascb->scb = ascb->dma_scb.vaddr; + seq->next_scb = t; + last->scb->header.next_scb = + cpu_to_le64(((u64)seq->next_scb.dma_handle)); +} + +/** + * asd_start_timers -- (add and) start timers of SCBs + * @list: pointer to struct list_head of the scbs + * @to: timeout in jiffies + * + * If an SCB in the @list has no timer function, assign the default + * one, then start the timer of the SCB. This function is + * intended to be called from asd_post_ascb_list(), just prior to + * posting the SCBs to the sequencer. + */ +static inline void asd_start_scb_timers(struct list_head *list) +{ + struct asd_ascb *ascb; + list_for_each_entry(ascb, list, list) { + if (!ascb->uldd_timer) { + ascb->timer.data = (unsigned long) ascb; + ascb->timer.function = asd_ascb_timedout; + ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT; + add_timer(&ascb->timer); + } + } +} + +/** + * asd_post_ascb_list -- post a list of 1 or more aSCBs to the host adapter + * @asd_ha: pointer to a host adapter structure + * @ascb: pointer to the first aSCB in the list + * @num: number of aSCBs in the list (to be posted) + * + * See queueing comment in asd_post_escb_list(). + * + * Additional note on queuing: In order to minimize the ratio of memcpy() + * to the number of ascbs sent, we try to batch-send as many ascbs as possible + * in one go. + * Two cases are possible: + * A) can_queue >= num, + * B) can_queue < num. + * Case A: we can send the whole batch at once. Increment "pending" + * in the beginning of this function, when it is checked, in order to + * eliminate races when this function is called by multiple processes. + * Case B: should never happen if the managing layer considers + * lldd_queue_size. + */ +int asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb, + int num) +{ + unsigned long flags; + LIST_HEAD(list); + int can_queue; + + spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags); + can_queue = asd_ha->hw_prof.max_scbs - asd_ha->seq.pending; + if (can_queue >= num) + asd_ha->seq.pending += num; + else + can_queue = 0; + + if (!can_queue) { + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); + asd_printk("%s: scb queue full\n", pci_name(asd_ha->pcidev)); + return -SAS_QUEUE_FULL; + } + + asd_swap_head_scb(asd_ha, ascb); + + __list_add(&list, ascb->list.prev, &ascb->list); + + asd_start_scb_timers(&list); + + asd_ha->seq.scbpro += num; + list_splice_init(&list, asd_ha->seq.pend_q.prev); + asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro); + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); + + return 0; +} + +/** + * asd_post_escb_list -- post a list of 1 or more empty scb + * @asd_ha: pointer to a host adapter structure + * @ascb: pointer to the first empty SCB in the list + * @num: number of aSCBs in the list (to be posted) + * + * This is essentially the same as asd_post_ascb_list, but we do not + * increment pending, add those to the pending list or get indexes. + * See asd_init_escbs() and asd_init_post_escbs(). + * + * Since sending a list of ascbs is a superset of sending a single + * ascb, this function exists to generalize this. More specifically, + * when sending a list of those, we want to do only a _single_ + * memcpy() at swap head, as opposed to for each ascb sent (in the + * case of sending them one by one). That is, we want to minimize the + * ratio of memcpy() operations to the number of ascbs sent. The same + * logic applies to asd_post_ascb_list(). + */ +int asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb, + int num) +{ + unsigned long flags; + + spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags); + asd_swap_head_scb(asd_ha, ascb); + asd_ha->seq.scbpro += num; + asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro); + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); + + return 0; +} + +/* ---------- LED ---------- */ + +/** + * asd_turn_led -- turn on/off an LED + * @asd_ha: pointer to host adapter structure + * @phy_id: the PHY id whose LED we want to manupulate + * @op: 1 to turn on, 0 to turn off + */ +void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op) +{ + if (phy_id < ASD_MAX_PHYS) { + u32 v = asd_read_reg_dword(asd_ha, LmCONTROL(phy_id)); + if (op) + v |= LEDPOL; + else + v &= ~LEDPOL; + asd_write_reg_dword(asd_ha, LmCONTROL(phy_id), v); + } +} + +/** + * asd_control_led -- enable/disable an LED on the board + * @asd_ha: pointer to host adapter structure + * @phy_id: integer, the phy id + * @op: integer, 1 to enable, 0 to disable the LED + * + * First we output enable the LED, then we set the source + * to be an external module. + */ +void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op) +{ + if (phy_id < ASD_MAX_PHYS) { + u32 v; + + v = asd_read_reg_dword(asd_ha, GPIOOER); + if (op) + v |= (1 << phy_id); + else + v &= ~(1 << phy_id); + asd_write_reg_dword(asd_ha, GPIOOER, v); + + v = asd_read_reg_dword(asd_ha, GPIOCNFGR); + if (op) + v |= (1 << phy_id); + else + v &= ~(1 << phy_id); + asd_write_reg_dword(asd_ha, GPIOCNFGR, v); + } +} + +/* ---------- PHY enable ---------- */ + +static int asd_enable_phy(struct asd_ha_struct *asd_ha, int phy_id) +{ + struct asd_phy *phy = &asd_ha->phys[phy_id]; + + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, INT_ENABLE_2), 0); + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, HOT_PLUG_DELAY), + HOTPLUG_DELAY_TIMEOUT); + + /* Get defaults from manuf. sector */ + /* XXX we need defaults for those in case MS is broken. */ + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_0), + phy->phy_desc->phy_control_0); + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_1), + phy->phy_desc->phy_control_1); + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_2), + phy->phy_desc->phy_control_2); + asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_3), + phy->phy_desc->phy_control_3); + + asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(phy_id), + ASD_COMINIT_TIMEOUT); + + asd_write_reg_addr(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(phy_id), + phy->id_frm_tok->dma_handle); + + asd_control_led(asd_ha, phy_id, 1); + + return 0; +} + +int asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask) +{ + u8 phy_m; + u8 i; + int num = 0, k; + struct asd_ascb *ascb; + struct asd_ascb *ascb_list; + + if (!phy_mask) { + asd_printk("%s called with phy_mask of 0!?\n", __FUNCTION__); + return 0; + } + + for_each_phy(phy_mask, phy_m, i) { + num++; + asd_enable_phy(asd_ha, i); + } + + k = num; + ascb_list = asd_ascb_alloc_list(asd_ha, &k, GFP_KERNEL); + if (!ascb_list) { + asd_printk("no memory for control phy ascb list\n"); + return -ENOMEM; + } + num -= k; + + ascb = ascb_list; + for_each_phy(phy_mask, phy_m, i) { + asd_build_control_phy(ascb, i, ENABLE_PHY); + ascb = list_entry(ascb->list.next, struct asd_ascb, list); + } + ASD_DPRINTK("posting %d control phy scbs\n", num); + k = asd_post_ascb_list(asd_ha, ascb_list, num); + if (k) + asd_ascb_free_list(ascb_list); + + return k; +} diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.h b/drivers/scsi/aic94xx/aic94xx_hwi.h new file mode 100644 index 000000000000..c7d505388fed --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_hwi.h @@ -0,0 +1,397 @@ +/* + * Aic94xx SAS/SATA driver hardware interface header file. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _AIC94XX_HWI_H_ +#define _AIC94XX_HWI_H_ + +#include +#include +#include + +#include + +#include "aic94xx.h" +#include "aic94xx_sas.h" + +/* Define ASD_MAX_PHYS to the maximum phys ever. Currently 8. */ +#define ASD_MAX_PHYS 8 +#define ASD_PCBA_SN_SIZE 12 + +/* Those are to be further named properly, the "RAZORx" part, and + * subsequently included in include/linux/pci_ids.h. + */ +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR10 0x410 +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR12 0x412 +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR1E 0x41E +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR30 0x430 +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR32 0x432 +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3E 0x43E +#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3F 0x43F + +struct asd_ha_addrspace { + void __iomem *addr; + unsigned long start; /* pci resource start */ + unsigned long len; /* pci resource len */ + unsigned long flags; /* pci resource flags */ + + /* addresses internal to the host adapter */ + u32 swa_base; /* mmspace 1 (MBAR1) uses this only */ + u32 swb_base; + u32 swc_base; +}; + +struct bios_struct { + int present; + u8 maj; + u8 min; + u32 bld; +}; + +struct unit_element_struct { + u16 num; + u16 size; + void *area; +}; + +struct flash_struct { + u32 bar; + int present; + int wide; + u8 manuf; + u8 dev_id; + u8 sec_prot; + + u32 dir_offs; +}; + +struct asd_phy_desc { + /* From CTRL-A settings, then set to what is appropriate */ + u8 sas_addr[SAS_ADDR_SIZE]; + u8 max_sas_lrate; + u8 min_sas_lrate; + u8 max_sata_lrate; + u8 min_sata_lrate; + u8 flags; +#define ASD_CRC_DIS 1 +#define ASD_SATA_SPINUP_HOLD 2 + + u8 phy_control_0; /* mode 5 reg 0x160 */ + u8 phy_control_1; /* mode 5 reg 0x161 */ + u8 phy_control_2; /* mode 5 reg 0x162 */ + u8 phy_control_3; /* mode 5 reg 0x163 */ +}; + +struct asd_dma_tok { + void *vaddr; + dma_addr_t dma_handle; + size_t size; +}; + +struct hw_profile { + struct bios_struct bios; + struct unit_element_struct ue; + struct flash_struct flash; + + u8 sas_addr[SAS_ADDR_SIZE]; + char pcba_sn[ASD_PCBA_SN_SIZE+1]; + + u8 enabled_phys; /* mask of enabled phys */ + struct asd_phy_desc phy_desc[ASD_MAX_PHYS]; + u32 max_scbs; /* absolute sequencer scb queue size */ + struct asd_dma_tok *scb_ext; + u32 max_ddbs; + struct asd_dma_tok *ddb_ext; + + spinlock_t ddb_lock; + void *ddb_bitmap; + + int num_phys; /* ENABLEABLE */ + int max_phys; /* REPORTED + ENABLEABLE */ + + unsigned addr_range; /* max # of addrs; max # of possible ports */ + unsigned port_name_base; + unsigned dev_name_base; + unsigned sata_name_base; +}; + +struct asd_ascb { + struct list_head list; + struct asd_ha_struct *ha; + + struct scb *scb; /* equals dma_scb->vaddr */ + struct asd_dma_tok dma_scb; + struct asd_dma_tok *sg_arr; + + void (*tasklet_complete)(struct asd_ascb *, struct done_list_struct *); + u8 uldd_timer:1; + + /* internally generated command */ + struct timer_list timer; + struct completion completion; + u8 tag_valid:1; + __be16 tag; /* error recovery only */ + + /* If this is an Empty SCB, index of first edb in seq->edb_arr. */ + int edb_index; + + /* Used by the timer timeout function. */ + int tc_index; + + void *uldd_task; +}; + +#define ASD_DL_SIZE_BITS 0x8 +#define ASD_DL_SIZE (1<<(2+ASD_DL_SIZE_BITS)) +#define ASD_DEF_DL_TOGGLE 0x01 + +struct asd_seq_data { + spinlock_t pend_q_lock; + u16 scbpro; + int pending; + struct list_head pend_q; + int can_queue; /* per adapter */ + struct asd_dma_tok next_scb; /* next scb to be delivered to CSEQ */ + + spinlock_t tc_index_lock; + void **tc_index_array; + void *tc_index_bitmap; + int tc_index_bitmap_bits; + + struct tasklet_struct dl_tasklet; + struct done_list_struct *dl; /* array of done list entries, equals */ + struct asd_dma_tok *actual_dl; /* actual_dl->vaddr */ + int dl_toggle; + int dl_next; + + int num_edbs; + struct asd_dma_tok **edb_arr; + int num_escbs; + struct asd_ascb **escb_arr; /* array of pointers to escbs */ +}; + +/* This is the Host Adapter structure. It describes the hardware + * SAS adapter. + */ +struct asd_ha_struct { + struct pci_dev *pcidev; + const char *name; + + struct sas_ha_struct sas_ha; + + u8 revision_id; + + int iospace; + spinlock_t iolock; + struct asd_ha_addrspace io_handle[2]; + + struct hw_profile hw_prof; + + struct asd_phy phys[ASD_MAX_PHYS]; + struct asd_sas_port ports[ASD_MAX_PHYS]; + + struct dma_pool *scb_pool; + + struct asd_seq_data seq; /* sequencer related */ +}; + +/* ---------- Common macros ---------- */ + +#define ASD_BUSADDR_LO(__dma_handle) ((u32)(__dma_handle)) +#define ASD_BUSADDR_HI(__dma_handle) (((sizeof(dma_addr_t))==8) \ + ? ((u32)((__dma_handle) >> 32)) \ + : ((u32)0)) + +#define dev_to_asd_ha(__dev) pci_get_drvdata(to_pci_dev(__dev)) +#define SCB_SITE_VALID(__site_no) (((__site_no) & 0xF0FF) != 0x00FF \ + && ((__site_no) & 0xF0FF) > 0x001F) +/* For each bit set in __lseq_mask, set __lseq to equal the bit + * position of the set bit and execute the statement following. + * __mc is the temporary mask, used as a mask "counter". + */ +#define for_each_sequencer(__lseq_mask, __mc, __lseq) \ + for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\ + if (((__mc) & 1)) +#define for_each_phy(__lseq_mask, __mc, __lseq) \ + for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\ + if (((__mc) & 1)) + +#define PHY_ENABLED(_HA, _I) ((_HA)->hw_prof.enabled_phys & (1<<(_I))) + +/* ---------- DMA allocs ---------- */ + +static inline struct asd_dma_tok *asd_dmatok_alloc(unsigned int flags) +{ + return kmem_cache_alloc(asd_dma_token_cache, flags); +} + +static inline void asd_dmatok_free(struct asd_dma_tok *token) +{ + kmem_cache_free(asd_dma_token_cache, token); +} + +static inline struct asd_dma_tok *asd_alloc_coherent(struct asd_ha_struct * + asd_ha, size_t size, + unsigned int flags) +{ + struct asd_dma_tok *token = asd_dmatok_alloc(flags); + if (token) { + token->size = size; + token->vaddr = dma_alloc_coherent(&asd_ha->pcidev->dev, + token->size, + &token->dma_handle, + flags); + if (!token->vaddr) { + asd_dmatok_free(token); + token = NULL; + } + } + return token; +} + +static inline void asd_free_coherent(struct asd_ha_struct *asd_ha, + struct asd_dma_tok *token) +{ + if (token) { + dma_free_coherent(&asd_ha->pcidev->dev, token->size, + token->vaddr, token->dma_handle); + asd_dmatok_free(token); + } +} + +static inline void asd_init_ascb(struct asd_ha_struct *asd_ha, + struct asd_ascb *ascb) +{ + INIT_LIST_HEAD(&ascb->list); + ascb->scb = ascb->dma_scb.vaddr; + ascb->ha = asd_ha; + ascb->timer.function = NULL; + init_timer(&ascb->timer); + ascb->tc_index = -1; + init_completion(&ascb->completion); +} + +/* Must be called with the tc_index_lock held! + */ +static inline void asd_tc_index_release(struct asd_seq_data *seq, int index) +{ + seq->tc_index_array[index] = NULL; + clear_bit(index, seq->tc_index_bitmap); +} + +/* Must be called with the tc_index_lock held! + */ +static inline int asd_tc_index_get(struct asd_seq_data *seq, void *ptr) +{ + int index; + + index = find_first_zero_bit(seq->tc_index_bitmap, + seq->tc_index_bitmap_bits); + if (index == seq->tc_index_bitmap_bits) + return -1; + + seq->tc_index_array[index] = ptr; + set_bit(index, seq->tc_index_bitmap); + + return index; +} + +/* Must be called with the tc_index_lock held! + */ +static inline void *asd_tc_index_find(struct asd_seq_data *seq, int index) +{ + return seq->tc_index_array[index]; +} + +/** + * asd_ascb_free -- free a single aSCB after is has completed + * @ascb: pointer to the aSCB of interest + * + * This frees an aSCB after it has been executed/completed by + * the sequencer. + */ +static inline void asd_ascb_free(struct asd_ascb *ascb) +{ + if (ascb) { + struct asd_ha_struct *asd_ha = ascb->ha; + unsigned long flags; + + BUG_ON(!list_empty(&ascb->list)); + spin_lock_irqsave(&ascb->ha->seq.tc_index_lock, flags); + asd_tc_index_release(&ascb->ha->seq, ascb->tc_index); + spin_unlock_irqrestore(&ascb->ha->seq.tc_index_lock, flags); + dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr, + ascb->dma_scb.dma_handle); + kmem_cache_free(asd_ascb_cache, ascb); + } +} + +/** + * asd_ascb_list_free -- free a list of ascbs + * @ascb_list: a list of ascbs + * + * This function will free a list of ascbs allocated by asd_ascb_alloc_list. + * It is used when say the scb queueing function returned QUEUE_FULL, + * and we do not need the ascbs any more. + */ +static inline void asd_ascb_free_list(struct asd_ascb *ascb_list) +{ + LIST_HEAD(list); + struct list_head *n, *pos; + + __list_add(&list, ascb_list->list.prev, &ascb_list->list); + list_for_each_safe(pos, n, &list) { + list_del_init(pos); + asd_ascb_free(list_entry(pos, struct asd_ascb, list)); + } +} + +/* ---------- Function declarations ---------- */ + +int asd_init_hw(struct asd_ha_struct *asd_ha); +irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs); + + +struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct + *asd_ha, int *num, + unsigned int gfp_mask); + +int asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb, + int num); +int asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb, + int num); + +int asd_init_post_escbs(struct asd_ha_struct *asd_ha); +void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc); +void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op); +void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op); +int asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask); +void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id, + u8 subfunc); + +void asd_ascb_timedout(unsigned long data); +int asd_chip_hardrst(struct asd_ha_struct *asd_ha); + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c new file mode 100644 index 000000000000..3ec2e46f80c6 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -0,0 +1,860 @@ +/* + * Aic94xx SAS/SATA driver initialization. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "aic94xx.h" +#include "aic94xx_reg.h" +#include "aic94xx_hwi.h" +#include "aic94xx_seq.h" + +/* The format is "version.release.patchlevel" */ +#define ASD_DRIVER_VERSION "1.0.2" + +static int use_msi = 0; +module_param_named(use_msi, use_msi, int, S_IRUGO); +MODULE_PARM_DESC(use_msi, "\n" + "\tEnable(1) or disable(0) using PCI MSI.\n" + "\tDefault: 0"); + +static int lldd_max_execute_num = 0; +module_param_named(collector, lldd_max_execute_num, int, S_IRUGO); +MODULE_PARM_DESC(collector, "\n" + "\tIf greater than one, tells the SAS Layer to run in Task Collector\n" + "\tMode. If 1 or 0, tells the SAS Layer to run in Direct Mode.\n" + "\tThe aic94xx SAS LLDD supports both modes.\n" + "\tDefault: 0 (Direct Mode).\n"); + +char sas_addr_str[2*SAS_ADDR_SIZE + 1] = ""; + +static struct scsi_transport_template *aic94xx_transport_template; + +static struct scsi_host_template aic94xx_sht = { + .module = THIS_MODULE, + /* .name is initialized */ + .name = "aic94xx", + .queuecommand = sas_queuecommand, + .target_alloc = sas_target_alloc, + .slave_configure = sas_slave_configure, + .slave_destroy = sas_slave_destroy, + .change_queue_depth = sas_change_queue_depth, + .change_queue_type = sas_change_queue_type, + .bios_param = sas_bios_param, + .can_queue = 1, + .cmd_per_lun = 1, + .this_id = -1, + .sg_tablesize = SG_ALL, + .max_sectors = SCSI_DEFAULT_MAX_SECTORS, + .use_clustering = ENABLE_CLUSTERING, +}; + +static int __devinit asd_map_memio(struct asd_ha_struct *asd_ha) +{ + int err, i; + struct asd_ha_addrspace *io_handle; + + asd_ha->iospace = 0; + for (i = 0; i < 3; i += 2) { + io_handle = &asd_ha->io_handle[i==0?0:1]; + io_handle->start = pci_resource_start(asd_ha->pcidev, i); + io_handle->len = pci_resource_len(asd_ha->pcidev, i); + io_handle->flags = pci_resource_flags(asd_ha->pcidev, i); + err = -ENODEV; + if (!io_handle->start || !io_handle->len) { + asd_printk("MBAR%d start or length for %s is 0.\n", + i==0?0:1, pci_name(asd_ha->pcidev)); + goto Err; + } + err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME); + if (err) { + asd_printk("couldn't reserve memory region for %s\n", + pci_name(asd_ha->pcidev)); + goto Err; + } + if (io_handle->flags & IORESOURCE_CACHEABLE) + io_handle->addr = ioremap(io_handle->start, + io_handle->len); + else + io_handle->addr = ioremap_nocache(io_handle->start, + io_handle->len); + if (!io_handle->addr) { + asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1, + pci_name(asd_ha->pcidev)); + goto Err_unreq; + } + } + + return 0; +Err_unreq: + pci_release_region(asd_ha->pcidev, i); +Err: + if (i > 0) { + io_handle = &asd_ha->io_handle[0]; + iounmap(io_handle->addr); + pci_release_region(asd_ha->pcidev, 0); + } + return err; +} + +static void __devexit asd_unmap_memio(struct asd_ha_struct *asd_ha) +{ + struct asd_ha_addrspace *io_handle; + + io_handle = &asd_ha->io_handle[1]; + iounmap(io_handle->addr); + pci_release_region(asd_ha->pcidev, 2); + + io_handle = &asd_ha->io_handle[0]; + iounmap(io_handle->addr); + pci_release_region(asd_ha->pcidev, 0); +} + +static int __devinit asd_map_ioport(struct asd_ha_struct *asd_ha) +{ + int i = PCI_IOBAR_OFFSET, err; + struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0]; + + asd_ha->iospace = 1; + io_handle->start = pci_resource_start(asd_ha->pcidev, i); + io_handle->len = pci_resource_len(asd_ha->pcidev, i); + io_handle->flags = pci_resource_flags(asd_ha->pcidev, i); + io_handle->addr = (void __iomem *) io_handle->start; + if (!io_handle->start || !io_handle->len) { + asd_printk("couldn't get IO ports for %s\n", + pci_name(asd_ha->pcidev)); + return -ENODEV; + } + err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME); + if (err) { + asd_printk("couldn't reserve io space for %s\n", + pci_name(asd_ha->pcidev)); + } + + return err; +} + +static void __devexit asd_unmap_ioport(struct asd_ha_struct *asd_ha) +{ + pci_release_region(asd_ha->pcidev, PCI_IOBAR_OFFSET); +} + +static int __devinit asd_map_ha(struct asd_ha_struct *asd_ha) +{ + int err; + u16 cmd_reg; + + err = pci_read_config_word(asd_ha->pcidev, PCI_COMMAND, &cmd_reg); + if (err) { + asd_printk("couldn't read command register of %s\n", + pci_name(asd_ha->pcidev)); + goto Err; + } + + err = -ENODEV; + if (cmd_reg & PCI_COMMAND_MEMORY) { + if ((err = asd_map_memio(asd_ha))) + goto Err; + } else if (cmd_reg & PCI_COMMAND_IO) { + if ((err = asd_map_ioport(asd_ha))) + goto Err; + asd_printk("%s ioport mapped -- upgrade your hardware\n", + pci_name(asd_ha->pcidev)); + } else { + asd_printk("no proper device access to %s\n", + pci_name(asd_ha->pcidev)); + goto Err; + } + + return 0; +Err: + return err; +} + +static void __devexit asd_unmap_ha(struct asd_ha_struct *asd_ha) +{ + if (asd_ha->iospace) + asd_unmap_ioport(asd_ha); + else + asd_unmap_memio(asd_ha); +} + +static const char *asd_dev_rev[30] = { + [0] = "A0", + [1] = "A1", + [8] = "B0", +}; + +static int __devinit asd_common_setup(struct asd_ha_struct *asd_ha) +{ + int err, i; + + err = pci_read_config_byte(asd_ha->pcidev, PCI_REVISION_ID, + &asd_ha->revision_id); + if (err) { + asd_printk("couldn't read REVISION ID register of %s\n", + pci_name(asd_ha->pcidev)); + goto Err; + } + err = -ENODEV; + if (asd_ha->revision_id < AIC9410_DEV_REV_B0) { + asd_printk("%s is revision %s (%X), which is not supported\n", + pci_name(asd_ha->pcidev), + asd_dev_rev[asd_ha->revision_id], + asd_ha->revision_id); + goto Err; + } + /* Provide some sane default values. */ + asd_ha->hw_prof.max_scbs = 512; + asd_ha->hw_prof.max_ddbs = 128; + asd_ha->hw_prof.num_phys = ASD_MAX_PHYS; + /* All phys are enabled, by default. */ + asd_ha->hw_prof.enabled_phys = 0xFF; + for (i = 0; i < ASD_MAX_PHYS; i++) { + asd_ha->hw_prof.phy_desc[i].max_sas_lrate = PHY_LINKRATE_3; + asd_ha->hw_prof.phy_desc[i].min_sas_lrate = PHY_LINKRATE_1_5; + asd_ha->hw_prof.phy_desc[i].max_sata_lrate= PHY_LINKRATE_1_5; + asd_ha->hw_prof.phy_desc[i].min_sata_lrate= PHY_LINKRATE_1_5; + } + + return 0; +Err: + return err; +} + +static int __devinit asd_aic9410_setup(struct asd_ha_struct *asd_ha) +{ + int err = asd_common_setup(asd_ha); + + if (err) + return err; + + asd_ha->hw_prof.addr_range = 8; + asd_ha->hw_prof.port_name_base = 0; + asd_ha->hw_prof.dev_name_base = 8; + asd_ha->hw_prof.sata_name_base = 16; + + return 0; +} + +static int __devinit asd_aic9405_setup(struct asd_ha_struct *asd_ha) +{ + int err = asd_common_setup(asd_ha); + + if (err) + return err; + + asd_ha->hw_prof.addr_range = 4; + asd_ha->hw_prof.port_name_base = 0; + asd_ha->hw_prof.dev_name_base = 4; + asd_ha->hw_prof.sata_name_base = 8; + + return 0; +} + +static ssize_t asd_show_dev_rev(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev); + return snprintf(buf, PAGE_SIZE, "%s\n", + asd_dev_rev[asd_ha->revision_id]); +} +static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL); + +static ssize_t asd_show_dev_bios_build(struct device *dev, + struct device_attribute *attr,char *buf) +{ + struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", asd_ha->hw_prof.bios.bld); +} +static DEVICE_ATTR(bios_build, S_IRUGO, asd_show_dev_bios_build, NULL); + +static ssize_t asd_show_dev_pcba_sn(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev); + return snprintf(buf, PAGE_SIZE, "%s\n", asd_ha->hw_prof.pcba_sn); +} +static DEVICE_ATTR(pcba_sn, S_IRUGO, asd_show_dev_pcba_sn, NULL); + +static void asd_create_dev_attrs(struct asd_ha_struct *asd_ha) +{ + device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_create_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); + device_create_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn); +} + +static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha) +{ + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build); + device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn); +} + +/* The first entry, 0, is used for dynamic ids, the rest for devices + * we know about. + */ +static struct asd_pcidev_struct { + const char * name; + int (*setup)(struct asd_ha_struct *asd_ha); +} asd_pcidev_data[] = { + /* Id 0 is used for dynamic ids. */ + { .name = "Adaptec AIC-94xx SAS/SATA Host Adapter", + .setup = asd_aic9410_setup + }, + { .name = "Adaptec AIC-9410W SAS/SATA Host Adapter", + .setup = asd_aic9410_setup + }, + { .name = "Adaptec AIC-9405W SAS/SATA Host Adapter", + .setup = asd_aic9405_setup + }, +}; + +static inline int asd_create_ha_caches(struct asd_ha_struct *asd_ha) +{ + asd_ha->scb_pool = dma_pool_create(ASD_DRIVER_NAME "_scb_pool", + &asd_ha->pcidev->dev, + sizeof(struct scb), + 8, 0); + if (!asd_ha->scb_pool) { + asd_printk("couldn't create scb pool\n"); + return -ENOMEM; + } + + return 0; +} + +/** + * asd_free_edbs -- free empty data buffers + * asd_ha: pointer to host adapter structure + */ +static inline void asd_free_edbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int i; + + for (i = 0; i < seq->num_edbs; i++) + asd_free_coherent(asd_ha, seq->edb_arr[i]); + kfree(seq->edb_arr); + seq->edb_arr = NULL; +} + +static inline void asd_free_escbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int i; + + for (i = 0; i < seq->num_escbs; i++) { + if (!list_empty(&seq->escb_arr[i]->list)) + list_del_init(&seq->escb_arr[i]->list); + + asd_ascb_free(seq->escb_arr[i]); + } + kfree(seq->escb_arr); + seq->escb_arr = NULL; +} + +static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha) +{ + int i; + + if (asd_ha->hw_prof.ddb_ext) + asd_free_coherent(asd_ha, asd_ha->hw_prof.ddb_ext); + if (asd_ha->hw_prof.scb_ext) + asd_free_coherent(asd_ha, asd_ha->hw_prof.scb_ext); + + if (asd_ha->hw_prof.ddb_bitmap) + kfree(asd_ha->hw_prof.ddb_bitmap); + asd_ha->hw_prof.ddb_bitmap = NULL; + + for (i = 0; i < ASD_MAX_PHYS; i++) { + struct asd_phy *phy = &asd_ha->phys[i]; + + asd_free_coherent(asd_ha, phy->id_frm_tok); + } + if (asd_ha->seq.escb_arr) + asd_free_escbs(asd_ha); + if (asd_ha->seq.edb_arr) + asd_free_edbs(asd_ha); + if (asd_ha->hw_prof.ue.area) { + kfree(asd_ha->hw_prof.ue.area); + asd_ha->hw_prof.ue.area = NULL; + } + if (asd_ha->seq.tc_index_array) { + kfree(asd_ha->seq.tc_index_array); + kfree(asd_ha->seq.tc_index_bitmap); + asd_ha->seq.tc_index_array = NULL; + asd_ha->seq.tc_index_bitmap = NULL; + } + if (asd_ha->seq.actual_dl) { + asd_free_coherent(asd_ha, asd_ha->seq.actual_dl); + asd_ha->seq.actual_dl = NULL; + asd_ha->seq.dl = NULL; + } + if (asd_ha->seq.next_scb.vaddr) { + dma_pool_free(asd_ha->scb_pool, asd_ha->seq.next_scb.vaddr, + asd_ha->seq.next_scb.dma_handle); + asd_ha->seq.next_scb.vaddr = NULL; + } + dma_pool_destroy(asd_ha->scb_pool); + asd_ha->scb_pool = NULL; +} + +kmem_cache_t *asd_dma_token_cache; +kmem_cache_t *asd_ascb_cache; + +static int asd_create_global_caches(void) +{ + if (!asd_dma_token_cache) { + asd_dma_token_cache + = kmem_cache_create(ASD_DRIVER_NAME "_dma_token", + sizeof(struct asd_dma_tok), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!asd_dma_token_cache) { + asd_printk("couldn't create dma token cache\n"); + return -ENOMEM; + } + } + + if (!asd_ascb_cache) { + asd_ascb_cache = kmem_cache_create(ASD_DRIVER_NAME "_ascb", + sizeof(struct asd_ascb), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!asd_ascb_cache) { + asd_printk("couldn't create ascb cache\n"); + goto Err; + } + } + + return 0; +Err: + kmem_cache_destroy(asd_dma_token_cache); + asd_dma_token_cache = NULL; + return -ENOMEM; +} + +static void asd_destroy_global_caches(void) +{ + if (asd_dma_token_cache) + kmem_cache_destroy(asd_dma_token_cache); + asd_dma_token_cache = NULL; + + if (asd_ascb_cache) + kmem_cache_destroy(asd_ascb_cache); + asd_ascb_cache = NULL; +} + +static int asd_register_sas_ha(struct asd_ha_struct *asd_ha) +{ + int i; + struct asd_sas_phy **sas_phys = + kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_phy), GFP_KERNEL); + struct asd_sas_port **sas_ports = + kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_port), GFP_KERNEL); + + if (!sas_phys || !sas_ports) { + kfree(sas_phys); + kfree(sas_ports); + return -ENOMEM; + } + + asd_ha->sas_ha.sas_ha_name = (char *) asd_ha->name; + asd_ha->sas_ha.lldd_module = THIS_MODULE; + asd_ha->sas_ha.sas_addr = &asd_ha->hw_prof.sas_addr[0]; + + for (i = 0; i < ASD_MAX_PHYS; i++) { + sas_phys[i] = &asd_ha->phys[i].sas_phy; + sas_ports[i] = &asd_ha->ports[i]; + } + + asd_ha->sas_ha.sas_phy = sas_phys; + asd_ha->sas_ha.sas_port= sas_ports; + asd_ha->sas_ha.num_phys= ASD_MAX_PHYS; + + asd_ha->sas_ha.lldd_queue_size = asd_ha->seq.can_queue; + + return sas_register_ha(&asd_ha->sas_ha); +} + +static int asd_unregister_sas_ha(struct asd_ha_struct *asd_ha) +{ + int err; + + err = sas_unregister_ha(&asd_ha->sas_ha); + + sas_remove_host(asd_ha->sas_ha.core.shost); + scsi_remove_host(asd_ha->sas_ha.core.shost); + scsi_host_put(asd_ha->sas_ha.core.shost); + + kfree(asd_ha->sas_ha.sas_phy); + kfree(asd_ha->sas_ha.sas_port); + + return err; +} + +static int __devinit asd_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct asd_pcidev_struct *asd_dev; + unsigned asd_id = (unsigned) id->driver_data; + struct asd_ha_struct *asd_ha; + struct Scsi_Host *shost; + int err; + + if (asd_id >= ARRAY_SIZE(asd_pcidev_data)) { + asd_printk("wrong driver_data in PCI table\n"); + return -ENODEV; + } + + if ((err = pci_enable_device(dev))) { + asd_printk("couldn't enable device %s\n", pci_name(dev)); + return err; + } + + pci_set_master(dev); + + err = -ENOMEM; + + shost = scsi_host_alloc(&aic94xx_sht, sizeof(void *)); + if (!shost) + goto Err; + + asd_dev = &asd_pcidev_data[asd_id]; + + asd_ha = kzalloc(sizeof(*asd_ha), GFP_KERNEL); + if (!asd_ha) { + asd_printk("out of memory\n"); + goto Err; + } + asd_ha->pcidev = dev; + asd_ha->sas_ha.pcidev = asd_ha->pcidev; + asd_ha->sas_ha.lldd_ha = asd_ha; + + asd_ha->name = asd_dev->name; + asd_printk("found %s, device %s\n", asd_ha->name, pci_name(dev)); + + SHOST_TO_SAS_HA(shost) = &asd_ha->sas_ha; + asd_ha->sas_ha.core.shost = shost; + shost->transportt = aic94xx_transport_template; + shost->max_id = ~0; + shost->max_lun = ~0; + shost->max_cmd_len = 16; + + err = scsi_add_host(shost, &dev->dev); + if (err) { + scsi_host_put(shost); + goto Err_free; + } + + + + err = asd_dev->setup(asd_ha); + if (err) + goto Err_free; + + err = -ENODEV; + if (!pci_set_dma_mask(dev, DMA_64BIT_MASK) + && !pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK)) + ; + else if (!pci_set_dma_mask(dev, DMA_32BIT_MASK) + && !pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK)) + ; + else { + asd_printk("no suitable DMA mask for %s\n", pci_name(dev)); + goto Err_free; + } + + pci_set_drvdata(dev, asd_ha); + + err = asd_map_ha(asd_ha); + if (err) + goto Err_free; + + err = asd_create_ha_caches(asd_ha); + if (err) + goto Err_unmap; + + err = asd_init_hw(asd_ha); + if (err) + goto Err_free_cache; + + asd_printk("device %s: SAS addr %llx, PCBA SN %s, %d phys, %d enabled " + "phys, flash %s, BIOS %s%d\n", + pci_name(dev), SAS_ADDR(asd_ha->hw_prof.sas_addr), + asd_ha->hw_prof.pcba_sn, asd_ha->hw_prof.max_phys, + asd_ha->hw_prof.num_phys, + asd_ha->hw_prof.flash.present ? "present" : "not present", + asd_ha->hw_prof.bios.present ? "build " : "not present", + asd_ha->hw_prof.bios.bld); + + if (use_msi) + pci_enable_msi(asd_ha->pcidev); + + err = request_irq(asd_ha->pcidev->irq, asd_hw_isr, SA_SHIRQ, + ASD_DRIVER_NAME, asd_ha); + if (err) { + asd_printk("couldn't get irq %d for %s\n", + asd_ha->pcidev->irq, pci_name(asd_ha->pcidev)); + goto Err_irq; + } + asd_enable_ints(asd_ha); + + err = asd_init_post_escbs(asd_ha); + if (err) { + asd_printk("couldn't post escbs for %s\n", + pci_name(asd_ha->pcidev)); + goto Err_escbs; + } + ASD_DPRINTK("escbs posted\n"); + + asd_create_dev_attrs(asd_ha); + + err = asd_register_sas_ha(asd_ha); + if (err) + goto Err_reg_sas; + + err = asd_enable_phys(asd_ha, asd_ha->hw_prof.enabled_phys); + if (err) { + asd_printk("coudln't enable phys, err:%d\n", err); + goto Err_en_phys; + } + ASD_DPRINTK("enabled phys\n"); + /* give the phy enabling interrupt event time to come in (1s + * is empirically about all it takes) */ + ssleep(1); + /* Wait for discovery to finish */ + scsi_flush_work(asd_ha->sas_ha.core.shost); + + return 0; +Err_en_phys: + asd_unregister_sas_ha(asd_ha); +Err_reg_sas: + asd_remove_dev_attrs(asd_ha); +Err_escbs: + asd_disable_ints(asd_ha); + free_irq(dev->irq, asd_ha); +Err_irq: + if (use_msi) + pci_disable_msi(dev); + asd_chip_hardrst(asd_ha); +Err_free_cache: + asd_destroy_ha_caches(asd_ha); +Err_unmap: + asd_unmap_ha(asd_ha); +Err_free: + kfree(asd_ha); + scsi_remove_host(shost); +Err: + pci_disable_device(dev); + return err; +} + +static void asd_free_queues(struct asd_ha_struct *asd_ha) +{ + unsigned long flags; + LIST_HEAD(pending); + struct list_head *n, *pos; + + spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags); + asd_ha->seq.pending = 0; + list_splice_init(&asd_ha->seq.pend_q, &pending); + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); + + if (!list_empty(&pending)) + ASD_DPRINTK("Uh-oh! Pending is not empty!\n"); + + list_for_each_safe(pos, n, &pending) { + struct asd_ascb *ascb = list_entry(pos, struct asd_ascb, list); + list_del_init(pos); + ASD_DPRINTK("freeing from pending\n"); + asd_ascb_free(ascb); + } +} + +static void asd_turn_off_leds(struct asd_ha_struct *asd_ha) +{ + u8 phy_mask = asd_ha->hw_prof.enabled_phys; + u8 i; + + for_each_phy(phy_mask, phy_mask, i) { + asd_turn_led(asd_ha, i, 0); + asd_control_led(asd_ha, i, 0); + } +} + +static void __devexit asd_pci_remove(struct pci_dev *dev) +{ + struct asd_ha_struct *asd_ha = pci_get_drvdata(dev); + + if (!asd_ha) + return; + + asd_unregister_sas_ha(asd_ha); + + asd_disable_ints(asd_ha); + + asd_remove_dev_attrs(asd_ha); + + /* XXX more here as needed */ + + free_irq(dev->irq, asd_ha); + if (use_msi) + pci_disable_msi(asd_ha->pcidev); + asd_turn_off_leds(asd_ha); + asd_chip_hardrst(asd_ha); + asd_free_queues(asd_ha); + asd_destroy_ha_caches(asd_ha); + asd_unmap_ha(asd_ha); + kfree(asd_ha); + pci_disable_device(dev); + return; +} + +static ssize_t asd_version_show(struct device_driver *driver, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", ASD_DRIVER_VERSION); +} +static DRIVER_ATTR(version, S_IRUGO, asd_version_show, NULL); + +static void asd_create_driver_attrs(struct device_driver *driver) +{ + driver_create_file(driver, &driver_attr_version); +} + +static void asd_remove_driver_attrs(struct device_driver *driver) +{ + driver_remove_file(driver, &driver_attr_version); +} + +static struct sas_domain_function_template aic94xx_transport_functions = { + .lldd_port_formed = asd_update_port_links, + + .lldd_dev_found = asd_dev_found, + .lldd_dev_gone = asd_dev_gone, + + .lldd_execute_task = asd_execute_task, + + .lldd_abort_task = asd_abort_task, + .lldd_abort_task_set = asd_abort_task_set, + .lldd_clear_aca = asd_clear_aca, + .lldd_clear_task_set = asd_clear_task_set, + .lldd_I_T_nexus_reset = NULL, + .lldd_lu_reset = asd_lu_reset, + .lldd_query_task = asd_query_task, + + .lldd_clear_nexus_port = asd_clear_nexus_port, + .lldd_clear_nexus_ha = asd_clear_nexus_ha, + + .lldd_control_phy = asd_control_phy, +}; + +static const struct pci_device_id aic94xx_pci_table[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR10), + 0, 0, 1}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR12), + 0, 0, 1}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR1E), + 0, 0, 1}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR30), + 0, 0, 2}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR32), + 0, 0, 2}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3E), + 0, 0, 2}, + {PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3F), + 0, 0, 2}, + {} +}; + +MODULE_DEVICE_TABLE(pci, aic94xx_pci_table); + +static struct pci_driver aic94xx_pci_driver = { + .name = ASD_DRIVER_NAME, + .id_table = aic94xx_pci_table, + .probe = asd_pci_probe, + .remove = __devexit_p(asd_pci_remove), +}; + +static int __init aic94xx_init(void) +{ + int err; + + + asd_printk("%s version %s loaded\n", ASD_DRIVER_DESCRIPTION, + ASD_DRIVER_VERSION); + + err = asd_create_global_caches(); + if (err) + return err; + + aic94xx_transport_template = + sas_domain_attach_transport(&aic94xx_transport_functions); + if (err) + goto out_destroy_caches; + + err = pci_register_driver(&aic94xx_pci_driver); + if (err) + goto out_release_transport; + + asd_create_driver_attrs(&aic94xx_pci_driver.driver); + + return err; + + out_release_transport: + sas_release_transport(aic94xx_transport_template); + out_destroy_caches: + asd_destroy_global_caches(); + + return err; +} + +static void __exit aic94xx_exit(void) +{ + asd_remove_driver_attrs(&aic94xx_pci_driver.driver); + pci_unregister_driver(&aic94xx_pci_driver); + sas_release_transport(aic94xx_transport_template); + asd_destroy_global_caches(); + asd_printk("%s version %s unloaded\n", ASD_DRIVER_DESCRIPTION, + ASD_DRIVER_VERSION); +} + +module_init(aic94xx_init); +module_exit(aic94xx_exit); + +MODULE_AUTHOR("Luben Tuikov "); +MODULE_DESCRIPTION(ASD_DRIVER_DESCRIPTION); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(ASD_DRIVER_VERSION); diff --git a/drivers/scsi/aic94xx/aic94xx_reg.c b/drivers/scsi/aic94xx/aic94xx_reg.c new file mode 100644 index 000000000000..f210dac3203d --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_reg.c @@ -0,0 +1,332 @@ +/* + * Aic94xx SAS/SATA driver register access. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include "aic94xx_reg.h" +#include "aic94xx.h" + +/* Writing to device address space. + * Offset comes before value to remind that the operation of + * this function is *offs = val. + */ +static inline void asd_write_byte(struct asd_ha_struct *asd_ha, + unsigned long offs, u8 val) +{ + if (unlikely(asd_ha->iospace)) + outb(val, + (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF)); + else + writeb(val, asd_ha->io_handle[0].addr + offs); + wmb(); +} + +static inline void asd_write_word(struct asd_ha_struct *asd_ha, + unsigned long offs, u16 val) +{ + if (unlikely(asd_ha->iospace)) + outw(val, + (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF)); + else + writew(val, asd_ha->io_handle[0].addr + offs); + wmb(); +} + +static inline void asd_write_dword(struct asd_ha_struct *asd_ha, + unsigned long offs, u32 val) +{ + if (unlikely(asd_ha->iospace)) + outl(val, + (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF)); + else + writel(val, asd_ha->io_handle[0].addr + offs); + wmb(); +} + +/* Reading from device address space. + */ +static inline u8 asd_read_byte(struct asd_ha_struct *asd_ha, + unsigned long offs) +{ + u8 val; + if (unlikely(asd_ha->iospace)) + val = inb((unsigned long) asd_ha->io_handle[0].addr + + (offs & 0xFF)); + else + val = readb(asd_ha->io_handle[0].addr + offs); + rmb(); + return val; +} + +static inline u16 asd_read_word(struct asd_ha_struct *asd_ha, + unsigned long offs) +{ + u16 val; + if (unlikely(asd_ha->iospace)) + val = inw((unsigned long)asd_ha->io_handle[0].addr + + (offs & 0xFF)); + else + val = readw(asd_ha->io_handle[0].addr + offs); + rmb(); + return val; +} + +static inline u32 asd_read_dword(struct asd_ha_struct *asd_ha, + unsigned long offs) +{ + u32 val; + if (unlikely(asd_ha->iospace)) + val = inl((unsigned long) asd_ha->io_handle[0].addr + + (offs & 0xFF)); + else + val = readl(asd_ha->io_handle[0].addr + offs); + rmb(); + return val; +} + +static inline u32 asd_mem_offs_swa(void) +{ + return 0; +} + +static inline u32 asd_mem_offs_swc(void) +{ + return asd_mem_offs_swa() + MBAR0_SWA_SIZE; +} + +static inline u32 asd_mem_offs_swb(void) +{ + return asd_mem_offs_swc() + MBAR0_SWC_SIZE + 0x20; +} + +/* We know that the register wanted is in the range + * of the sliding window. + */ +#define ASD_READ_SW(ww, type, ord) \ +static inline type asd_read_##ww##_##ord (struct asd_ha_struct *asd_ha,\ + u32 reg) \ +{ \ + struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0]; \ + u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\ + return asd_read_##ord (asd_ha, (unsigned long) map_offs); \ +} + +#define ASD_WRITE_SW(ww, type, ord) \ +static inline void asd_write_##ww##_##ord (struct asd_ha_struct *asd_ha,\ + u32 reg, type val) \ +{ \ + struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0]; \ + u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\ + asd_write_##ord (asd_ha, (unsigned long) map_offs, val); \ +} + +ASD_READ_SW(swa, u8, byte); +ASD_READ_SW(swa, u16, word); +ASD_READ_SW(swa, u32, dword); + +ASD_READ_SW(swb, u8, byte); +ASD_READ_SW(swb, u16, word); +ASD_READ_SW(swb, u32, dword); + +ASD_READ_SW(swc, u8, byte); +ASD_READ_SW(swc, u16, word); +ASD_READ_SW(swc, u32, dword); + +ASD_WRITE_SW(swa, u8, byte); +ASD_WRITE_SW(swa, u16, word); +ASD_WRITE_SW(swa, u32, dword); + +ASD_WRITE_SW(swb, u8, byte); +ASD_WRITE_SW(swb, u16, word); +ASD_WRITE_SW(swb, u32, dword); + +ASD_WRITE_SW(swc, u8, byte); +ASD_WRITE_SW(swc, u16, word); +ASD_WRITE_SW(swc, u32, dword); + +/* + * A word about sliding windows: + * MBAR0 is divided into sliding windows A, C and B, in that order. + * SWA starts at offset 0 of MBAR0, up to 0x57, with size 0x58 bytes. + * SWC starts at offset 0x58 of MBAR0, up to 0x60, with size 0x8 bytes. + * From 0x60 to 0x7F, we have a copy of PCI config space 0x60-0x7F. + * SWB starts at offset 0x80 of MBAR0 and extends to the end of MBAR0. + * See asd_init_sw() in aic94xx_hwi.c + * + * We map the most common registers we'd access of the internal 4GB + * host adapter memory space. If a register/internal memory location + * is wanted which is not mapped, we slide SWB, by paging it, + * see asd_move_swb() in aic94xx_reg.c. + */ + +/** + * asd_move_swb -- move sliding window B + * @asd_ha: pointer to host adapter structure + * @reg: register desired to be within range of the new window + */ +static inline void asd_move_swb(struct asd_ha_struct *asd_ha, u32 reg) +{ + u32 base = reg & ~(MBAR0_SWB_SIZE-1); + pci_write_config_dword(asd_ha->pcidev, PCI_CONF_MBAR0_SWB, base); + asd_ha->io_handle[0].swb_base = base; +} + +static void __asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val) +{ + struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; + BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR); + if (io_handle->swa_base <= reg + && reg < io_handle->swa_base + MBAR0_SWA_SIZE) + asd_write_swa_byte (asd_ha, reg,val); + else if (io_handle->swb_base <= reg + && reg < io_handle->swb_base + MBAR0_SWB_SIZE) + asd_write_swb_byte (asd_ha, reg, val); + else if (io_handle->swc_base <= reg + && reg < io_handle->swc_base + MBAR0_SWC_SIZE) + asd_write_swc_byte (asd_ha, reg, val); + else { + /* Ok, we have to move SWB */ + asd_move_swb(asd_ha, reg); + asd_write_swb_byte (asd_ha, reg, val); + } +} + +#define ASD_WRITE_REG(type, ord) \ +void asd_write_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg, type val)\ +{ \ + struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \ + unsigned long flags; \ + BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR); \ + spin_lock_irqsave(&asd_ha->iolock, flags); \ + if (io_handle->swa_base <= reg \ + && reg < io_handle->swa_base + MBAR0_SWA_SIZE) \ + asd_write_swa_##ord (asd_ha, reg,val); \ + else if (io_handle->swb_base <= reg \ + && reg < io_handle->swb_base + MBAR0_SWB_SIZE) \ + asd_write_swb_##ord (asd_ha, reg, val); \ + else if (io_handle->swc_base <= reg \ + && reg < io_handle->swc_base + MBAR0_SWC_SIZE) \ + asd_write_swc_##ord (asd_ha, reg, val); \ + else { \ + /* Ok, we have to move SWB */ \ + asd_move_swb(asd_ha, reg); \ + asd_write_swb_##ord (asd_ha, reg, val); \ + } \ + spin_unlock_irqrestore(&asd_ha->iolock, flags); \ +} + +ASD_WRITE_REG(u8, byte); +ASD_WRITE_REG(u16,word); +ASD_WRITE_REG(u32,dword); + +static u8 __asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg) +{ + struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; + u8 val; + BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR); + if (io_handle->swa_base <= reg + && reg < io_handle->swa_base + MBAR0_SWA_SIZE) + val = asd_read_swa_byte (asd_ha, reg); + else if (io_handle->swb_base <= reg + && reg < io_handle->swb_base + MBAR0_SWB_SIZE) + val = asd_read_swb_byte (asd_ha, reg); + else if (io_handle->swc_base <= reg + && reg < io_handle->swc_base + MBAR0_SWC_SIZE) + val = asd_read_swc_byte (asd_ha, reg); + else { + /* Ok, we have to move SWB */ + asd_move_swb(asd_ha, reg); + val = asd_read_swb_byte (asd_ha, reg); + } + return val; +} + +#define ASD_READ_REG(type, ord) \ +type asd_read_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg) \ +{ \ + struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \ + type val; \ + unsigned long flags; \ + BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR); \ + spin_lock_irqsave(&asd_ha->iolock, flags); \ + if (io_handle->swa_base <= reg \ + && reg < io_handle->swa_base + MBAR0_SWA_SIZE) \ + val = asd_read_swa_##ord (asd_ha, reg); \ + else if (io_handle->swb_base <= reg \ + && reg < io_handle->swb_base + MBAR0_SWB_SIZE) \ + val = asd_read_swb_##ord (asd_ha, reg); \ + else if (io_handle->swc_base <= reg \ + && reg < io_handle->swc_base + MBAR0_SWC_SIZE) \ + val = asd_read_swc_##ord (asd_ha, reg); \ + else { \ + /* Ok, we have to move SWB */ \ + asd_move_swb(asd_ha, reg); \ + val = asd_read_swb_##ord (asd_ha, reg); \ + } \ + spin_unlock_irqrestore(&asd_ha->iolock, flags); \ + return val; \ +} + +ASD_READ_REG(u8, byte); +ASD_READ_REG(u16,word); +ASD_READ_REG(u32,dword); + +/** + * asd_read_reg_string -- read a string of bytes from io space memory + * @asd_ha: pointer to host adapter structure + * @dst: pointer to a destination buffer where data will be written to + * @offs: start offset (register) to read from + * @count: number of bytes to read + */ +void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst, + u32 offs, int count) +{ + u8 *p = dst; + unsigned long flags; + + spin_lock_irqsave(&asd_ha->iolock, flags); + for ( ; count > 0; count--, offs++, p++) + *p = __asd_read_reg_byte(asd_ha, offs); + spin_unlock_irqrestore(&asd_ha->iolock, flags); +} + +/** + * asd_write_reg_string -- write a string of bytes to io space memory + * @asd_ha: pointer to host adapter structure + * @src: pointer to source buffer where data will be read from + * @offs: start offset (register) to write to + * @count: number of bytes to write + */ +void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src, + u32 offs, int count) +{ + u8 *p = src; + unsigned long flags; + + spin_lock_irqsave(&asd_ha->iolock, flags); + for ( ; count > 0; count--, offs++, p++) + __asd_write_reg_byte(asd_ha, offs, *p); + spin_unlock_irqrestore(&asd_ha->iolock, flags); +} diff --git a/drivers/scsi/aic94xx/aic94xx_reg.h b/drivers/scsi/aic94xx/aic94xx_reg.h new file mode 100644 index 000000000000..2279307fd27e --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_reg.h @@ -0,0 +1,302 @@ +/* + * Aic94xx SAS/SATA driver hardware registers definitions. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _AIC94XX_REG_H_ +#define _AIC94XX_REG_H_ + +#include +#include "aic94xx_hwi.h" + +/* Values */ +#define AIC9410_DEV_REV_B0 0x8 + +/* MBAR0, SWA, SWB, SWC, internal memory space addresses */ +#define REG_BASE_ADDR 0xB8000000 +#define REG_BASE_ADDR_CSEQCIO 0xB8002000 +#define REG_BASE_ADDR_EXSI 0xB8042800 + +#define MBAR0_SWA_SIZE 0x58 +extern u32 MBAR0_SWB_SIZE; +#define MBAR0_SWC_SIZE 0x8 + +/* MBAR1, points to On Chip Memory */ +#define OCM_BASE_ADDR 0xA0000000 +#define OCM_MAX_SIZE 0x20000 + +/* Smallest address possible to reference */ +#define ALL_BASE_ADDR OCM_BASE_ADDR + +/* PCI configuration space registers */ +#define PCI_IOBAR_OFFSET 4 + +#define PCI_CONF_MBAR1 0x6C +#define PCI_CONF_MBAR0_SWA 0x70 +#define PCI_CONF_MBAR0_SWB 0x74 +#define PCI_CONF_MBAR0_SWC 0x78 +#define PCI_CONF_MBAR_KEY 0x7C +#define PCI_CONF_FLSH_BAR 0xB8 + +#include "aic94xx_reg_def.h" + +u8 asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg); +u16 asd_read_reg_word(struct asd_ha_struct *asd_ha, u32 reg); +u32 asd_read_reg_dword(struct asd_ha_struct *asd_ha, u32 reg); + +void asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val); +void asd_write_reg_word(struct asd_ha_struct *asd_ha, u32 reg, u16 val); +void asd_write_reg_dword(struct asd_ha_struct *asd_ha, u32 reg, u32 val); + +void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst, + u32 offs, int count); +void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src, + u32 offs, int count); + +#define ASD_READ_OCM(type, ord, S) \ +static inline type asd_read_ocm_##ord (struct asd_ha_struct *asd_ha, \ + u32 offs) \ +{ \ + struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1]; \ + type val = read##S (io_handle->addr + (unsigned long) offs); \ + rmb(); \ + return val; \ +} + +ASD_READ_OCM(u8, byte, b); +ASD_READ_OCM(u16,word, w); +ASD_READ_OCM(u32,dword,l); + +#define ASD_WRITE_OCM(type, ord, S) \ +static inline void asd_write_ocm_##ord (struct asd_ha_struct *asd_ha, \ + u32 offs, type val) \ +{ \ + struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1]; \ + write##S (val, io_handle->addr + (unsigned long) offs); \ + return; \ +} + +ASD_WRITE_OCM(u8, byte, b); +ASD_WRITE_OCM(u16,word, w); +ASD_WRITE_OCM(u32,dword,l); + +#define ASD_DDBSITE_READ(type, ord) \ +static inline type asd_ddbsite_read_##ord (struct asd_ha_struct *asd_ha, \ + u16 ddb_site_no, \ + u16 offs) \ +{ \ + asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs); \ + asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no); \ + return asd_read_reg_##ord (asd_ha, CTXACCESS); \ +} + +ASD_DDBSITE_READ(u32, dword); +ASD_DDBSITE_READ(u16, word); + +static inline u8 asd_ddbsite_read_byte(struct asd_ha_struct *asd_ha, + u16 ddb_site_no, + u16 offs) +{ + if (offs & 1) + return asd_ddbsite_read_word(asd_ha, ddb_site_no, + offs & ~1) >> 8; + else + return asd_ddbsite_read_word(asd_ha, ddb_site_no, + offs) & 0xFF; +} + + +#define ASD_DDBSITE_WRITE(type, ord) \ +static inline void asd_ddbsite_write_##ord (struct asd_ha_struct *asd_ha, \ + u16 ddb_site_no, \ + u16 offs, type val) \ +{ \ + asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs); \ + asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no); \ + asd_write_reg_##ord (asd_ha, CTXACCESS, val); \ +} + +ASD_DDBSITE_WRITE(u32, dword); +ASD_DDBSITE_WRITE(u16, word); + +static inline void asd_ddbsite_write_byte(struct asd_ha_struct *asd_ha, + u16 ddb_site_no, + u16 offs, u8 val) +{ + u16 base = offs & ~1; + u16 rval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base); + if (offs & 1) + rval = (val << 8) | (rval & 0xFF); + else + rval = (rval & 0xFF00) | val; + asd_ddbsite_write_word(asd_ha, ddb_site_no, base, rval); +} + + +#define ASD_SCBSITE_READ(type, ord) \ +static inline type asd_scbsite_read_##ord (struct asd_ha_struct *asd_ha, \ + u16 scb_site_no, \ + u16 offs) \ +{ \ + asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs); \ + asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no); \ + return asd_read_reg_##ord (asd_ha, CTXACCESS); \ +} + +ASD_SCBSITE_READ(u32, dword); +ASD_SCBSITE_READ(u16, word); + +static inline u8 asd_scbsite_read_byte(struct asd_ha_struct *asd_ha, + u16 scb_site_no, + u16 offs) +{ + if (offs & 1) + return asd_scbsite_read_word(asd_ha, scb_site_no, + offs & ~1) >> 8; + else + return asd_scbsite_read_word(asd_ha, scb_site_no, + offs) & 0xFF; +} + + +#define ASD_SCBSITE_WRITE(type, ord) \ +static inline void asd_scbsite_write_##ord (struct asd_ha_struct *asd_ha, \ + u16 scb_site_no, \ + u16 offs, type val) \ +{ \ + asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs); \ + asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no); \ + asd_write_reg_##ord (asd_ha, CTXACCESS, val); \ +} + +ASD_SCBSITE_WRITE(u32, dword); +ASD_SCBSITE_WRITE(u16, word); + +static inline void asd_scbsite_write_byte(struct asd_ha_struct *asd_ha, + u16 scb_site_no, + u16 offs, u8 val) +{ + u16 base = offs & ~1; + u16 rval = asd_scbsite_read_word(asd_ha, scb_site_no, base); + if (offs & 1) + rval = (val << 8) | (rval & 0xFF); + else + rval = (rval & 0xFF00) | val; + asd_scbsite_write_word(asd_ha, scb_site_no, base, rval); +} + +/** + * asd_ddbsite_update_word -- atomically update a word in a ddb site + * @asd_ha: pointer to host adapter structure + * @ddb_site_no: the DDB site number + * @offs: the offset into the DDB + * @oldval: old value found in that offset + * @newval: the new value to replace it + * + * This function is used when the sequencers are running and we need to + * update a DDB site atomically without expensive pausing and upausing + * of the sequencers and accessing the DDB site through the CIO bus. + * + * Return 0 on success; -EFAULT on parity error; -EAGAIN if the old value + * is different than the current value at that offset. + */ +static inline int asd_ddbsite_update_word(struct asd_ha_struct *asd_ha, + u16 ddb_site_no, u16 offs, + u16 oldval, u16 newval) +{ + u8 done; + u16 oval = asd_ddbsite_read_word(asd_ha, ddb_site_no, offs); + if (oval != oldval) + return -EAGAIN; + asd_write_reg_word(asd_ha, AOLDDATA, oldval); + asd_write_reg_word(asd_ha, ANEWDATA, newval); + do { + done = asd_read_reg_byte(asd_ha, ATOMICSTATCTL); + } while (!(done & ATOMICDONE)); + if (done & ATOMICERR) + return -EFAULT; /* parity error */ + else if (done & ATOMICWIN) + return 0; /* success */ + else + return -EAGAIN; /* oldval different than current value */ +} + +static inline int asd_ddbsite_update_byte(struct asd_ha_struct *asd_ha, + u16 ddb_site_no, u16 offs, + u8 _oldval, u8 _newval) +{ + u16 base = offs & ~1; + u16 oval; + u16 nval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base); + if (offs & 1) { + if ((nval >> 8) != _oldval) + return -EAGAIN; + nval = (_newval << 8) | (nval & 0xFF); + oval = (_oldval << 8) | (nval & 0xFF); + } else { + if ((nval & 0xFF) != _oldval) + return -EAGAIN; + nval = (nval & 0xFF00) | _newval; + oval = (nval & 0xFF00) | _oldval; + } + return asd_ddbsite_update_word(asd_ha, ddb_site_no, base, oval, nval); +} + +static inline void asd_write_reg_addr(struct asd_ha_struct *asd_ha, u32 reg, + dma_addr_t dma_handle) +{ + asd_write_reg_dword(asd_ha, reg, ASD_BUSADDR_LO(dma_handle)); + asd_write_reg_dword(asd_ha, reg+4, ASD_BUSADDR_HI(dma_handle)); +} + +static inline u32 asd_get_cmdctx_size(struct asd_ha_struct *asd_ha) +{ + /* DCHREVISION returns 0, possibly broken */ + u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE; + return ctxmemsize ? 65536 : 32768; +} + +static inline u32 asd_get_devctx_size(struct asd_ha_struct *asd_ha) +{ + u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE; + return ctxmemsize ? 8192 : 4096; +} + +static inline void asd_disable_ints(struct asd_ha_struct *asd_ha) +{ + asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN); +} + +static inline void asd_enable_ints(struct asd_ha_struct *asd_ha) +{ + /* Enable COM SAS interrupt on errors, COMSTAT */ + asd_write_reg_dword(asd_ha, COMSTATEN, + EN_CSBUFPERR | EN_CSERR | EN_OVLYERR); + /* Enable DCH SAS CFIFTOERR */ + asd_write_reg_dword(asd_ha, DCHSTATUS, EN_CFIFTOERR); + /* Enable Host Device interrupts */ + asd_write_reg_dword(asd_ha, CHIMINTEN, SET_CHIMINTEN); +} + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h new file mode 100644 index 000000000000..b79f45f3ad47 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h @@ -0,0 +1,2398 @@ +/* + * Aic94xx SAS/SATA driver hardware registers defintions. + * + * Copyright (C) 2004 Adaptec, Inc. All rights reserved. + * Copyright (C) 2004 David Chaw + * Copyright (C) 2005 Luben Tuikov + * + * Luben Tuikov: Some register value updates to make it work with the window + * agnostic register r/w functions. Some register corrections, sizes, + * etc. + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id: //depot/aic94xx/aic94xx_reg_def.h#27 $ + * + */ + +#ifndef _ADP94XX_REG_DEF_H_ +#define _ADP94XX_REG_DEF_H_ + +/* + * Common definitions. + */ +#define CSEQ_MODE_PAGE_SIZE 0x200 /* CSEQ mode page size */ +#define LmSEQ_MODE_PAGE_SIZE 0x200 /* LmSEQ mode page size */ +#define LmSEQ_HOST_REG_SIZE 0x4000 /* LmSEQ Host Register size */ + +/********************* COM_SAS registers definition *************************/ + +/* The base is REG_BASE_ADDR, defined in aic94xx_reg.h. + */ + +/* + * CHIM Registers, Address Range : (0x00-0xFF) + */ +#define COMBIST (REG_BASE_ADDR + 0x00) + +/* bits 31:24 */ +#define L7BLKRST 0x80000000 +#define L6BLKRST 0x40000000 +#define L5BLKRST 0x20000000 +#define L4BLKRST 0x10000000 +#define L3BLKRST 0x08000000 +#define L2BLKRST 0x04000000 +#define L1BLKRST 0x02000000 +#define L0BLKRST 0x01000000 +#define LmBLKRST 0xFF000000 +#define LmBLKRST_COMBIST(phyid) (1 << (24 + phyid)) + +#define OCMBLKRST 0x00400000 +#define CTXMEMBLKRST 0x00200000 +#define CSEQBLKRST 0x00100000 +#define EXSIBLKRST 0x00040000 +#define DPIBLKRST 0x00020000 +#define DFIFBLKRST 0x00010000 +#define HARDRST 0x00000200 +#define COMBLKRST 0x00000100 +#define FRCDFPERR 0x00000080 +#define FRCCIOPERR 0x00000020 +#define FRCBISTERR 0x00000010 +#define COMBISTEN 0x00000004 +#define COMBISTDONE 0x00000002 /* ro */ +#define COMBISTFAIL 0x00000001 /* ro */ + +#define COMSTAT (REG_BASE_ADDR + 0x04) + +#define REQMBXREAD 0x00000040 +#define RSPMBXAVAIL 0x00000020 +#define CSBUFPERR 0x00000008 +#define OVLYERR 0x00000004 +#define CSERR 0x00000002 +#define OVLYDMADONE 0x00000001 + +#define COMSTAT_MASK (REQMBXREAD | RSPMBXAVAIL | \ + CSBUFPERR | OVLYERR | CSERR |\ + OVLYDMADONE) + +#define COMSTATEN (REG_BASE_ADDR + 0x08) + +#define EN_REQMBXREAD 0x00000040 +#define EN_RSPMBXAVAIL 0x00000020 +#define EN_CSBUFPERR 0x00000008 +#define EN_OVLYERR 0x00000004 +#define EN_CSERR 0x00000002 +#define EN_OVLYDONE 0x00000001 + +#define SCBPRO (REG_BASE_ADDR + 0x0C) + +#define SCBCONS_MASK 0xFFFF0000 +#define SCBPRO_MASK 0x0000FFFF + +#define CHIMREQMBX (REG_BASE_ADDR + 0x10) + +#define CHIMRSPMBX (REG_BASE_ADDR + 0x14) + +#define CHIMINT (REG_BASE_ADDR + 0x18) + +#define EXT_INT0 0x00000800 +#define EXT_INT1 0x00000400 +#define PORRSTDET 0x00000200 +#define HARDRSTDET 0x00000100 +#define DLAVAILQ 0x00000080 /* ro */ +#define HOSTERR 0x00000040 +#define INITERR 0x00000020 +#define DEVINT 0x00000010 +#define COMINT 0x00000008 +#define DEVTIMER2 0x00000004 +#define DEVTIMER1 0x00000002 +#define DLAVAIL 0x00000001 + +#define CHIMINT_MASK (HOSTERR | INITERR | DEVINT | COMINT |\ + DEVTIMER2 | DEVTIMER1 | DLAVAIL) + +#define DEVEXCEPT_MASK (HOSTERR | INITERR | DEVINT | COMINT) + +#define CHIMINTEN (REG_BASE_ADDR + 0x1C) + +#define RST_EN_EXT_INT1 0x01000000 +#define RST_EN_EXT_INT0 0x00800000 +#define RST_EN_HOSTERR 0x00400000 +#define RST_EN_INITERR 0x00200000 +#define RST_EN_DEVINT 0x00100000 +#define RST_EN_COMINT 0x00080000 +#define RST_EN_DEVTIMER2 0x00040000 +#define RST_EN_DEVTIMER1 0x00020000 +#define RST_EN_DLAVAIL 0x00010000 +#define SET_EN_EXT_INT1 0x00000100 +#define SET_EN_EXT_INT0 0x00000080 +#define SET_EN_HOSTERR 0x00000040 +#define SET_EN_INITERR 0x00000020 +#define SET_EN_DEVINT 0x00000010 +#define SET_EN_COMINT 0x00000008 +#define SET_EN_DEVTIMER2 0x00000004 +#define SET_EN_DEVTIMER1 0x00000002 +#define SET_EN_DLAVAIL 0x00000001 + +#define RST_CHIMINTEN (RST_EN_HOSTERR | RST_EN_INITERR | \ + RST_EN_DEVINT | RST_EN_COMINT | \ + RST_EN_DEVTIMER2 | RST_EN_DEVTIMER1 |\ + RST_EN_DLAVAIL) + +#define SET_CHIMINTEN (SET_EN_HOSTERR | SET_EN_INITERR |\ + SET_EN_DEVINT | SET_EN_COMINT |\ + SET_EN_DLAVAIL) + +#define OVLYDMACTL (REG_BASE_ADDR + 0x20) + +#define OVLYADR_MASK 0x07FF0000 +#define OVLYLSEQ_MASK 0x0000FF00 +#define OVLYCSEQ 0x00000080 +#define OVLYHALTERR 0x00000040 +#define PIOCMODE 0x00000020 +#define RESETOVLYDMA 0x00000008 /* wo */ +#define STARTOVLYDMA 0x00000004 +#define STOPOVLYDMA 0x00000002 /* wo */ +#define OVLYDMAACT 0x00000001 /* ro */ + +#define OVLYDMACNT (REG_BASE_ADDR + 0x24) + +#define OVLYDOMAIN1 0x20000000 /* ro */ +#define OVLYDOMAIN0 0x10000000 +#define OVLYBUFADR_MASK 0x007F0000 +#define OVLYDMACNT_MASK 0x00003FFF + +#define OVLYDMAADR (REG_BASE_ADDR + 0x28) + +#define DMAERR (REG_BASE_ADDR + 0x30) + +#define OVLYERRSTAT_MASK 0x0000FF00 /* ro */ +#define CSERRSTAT_MASK 0x000000FF /* ro */ + +#define SPIODATA (REG_BASE_ADDR + 0x34) + +/* 0x38 - 0x3C are reserved */ + +#define T1CNTRLR (REG_BASE_ADDR + 0x40) + +#define T1DONE 0x00010000 /* ro */ +#define TIMER64 0x00000400 +#define T1ENABLE 0x00000200 +#define T1RELOAD 0x00000100 +#define T1PRESCALER_MASK 0x00000003 + +#define T1CMPR (REG_BASE_ADDR + 0x44) + +#define T1CNTR (REG_BASE_ADDR + 0x48) + +#define T2CNTRLR (REG_BASE_ADDR + 0x4C) + +#define T2DONE 0x00010000 /* ro */ +#define T2ENABLE 0x00000200 +#define T2RELOAD 0x00000100 +#define T2PRESCALER_MASK 0x00000003 + +#define T2CMPR (REG_BASE_ADDR + 0x50) + +#define T2CNTR (REG_BASE_ADDR + 0x54) + +/* 0x58h - 0xFCh are reserved */ + +/* + * DCH_SAS Registers, Address Range : (0x800-0xFFF) + */ +#define CMDCTXBASE (REG_BASE_ADDR + 0x800) + +#define DEVCTXBASE (REG_BASE_ADDR + 0x808) + +#define CTXDOMAIN (REG_BASE_ADDR + 0x810) + +#define DEVCTXDOMAIN1 0x00000008 /* ro */ +#define DEVCTXDOMAIN0 0x00000004 +#define CMDCTXDOMAIN1 0x00000002 /* ro */ +#define CMDCTXDOMAIN0 0x00000001 + +#define DCHCTL (REG_BASE_ADDR + 0x814) + +#define OCMBISTREPAIR 0x00080000 +#define OCMBISTEN 0x00040000 +#define OCMBISTDN 0x00020000 /* ro */ +#define OCMBISTFAIL 0x00010000 /* ro */ +#define DDBBISTEN 0x00004000 +#define DDBBISTDN 0x00002000 /* ro */ +#define DDBBISTFAIL 0x00001000 /* ro */ +#define SCBBISTEN 0x00000400 +#define SCBBISTDN 0x00000200 /* ro */ +#define SCBBISTFAIL 0x00000100 /* ro */ + +#define MEMSEL_MASK 0x000000E0 +#define MEMSEL_CCM_LSEQ 0x00000000 +#define MEMSEL_CCM_IOP 0x00000020 +#define MEMSEL_CCM_SASCTL 0x00000040 +#define MEMSEL_DCM_LSEQ 0x00000060 +#define MEMSEL_DCM_IOP 0x00000080 +#define MEMSEL_OCM 0x000000A0 + +#define FRCERR 0x00000010 +#define AUTORLS 0x00000001 + +#define DCHREVISION (REG_BASE_ADDR + 0x818) + +#define DCHREVISION_MASK 0x000000FF + +#define DCHSTATUS (REG_BASE_ADDR + 0x81C) + +#define EN_CFIFTOERR 0x00020000 +#define CFIFTOERR 0x00000200 +#define CSEQINT 0x00000100 /* ro */ +#define LSEQ7INT 0x00000080 /* ro */ +#define LSEQ6INT 0x00000040 /* ro */ +#define LSEQ5INT 0x00000020 /* ro */ +#define LSEQ4INT 0x00000010 /* ro */ +#define LSEQ3INT 0x00000008 /* ro */ +#define LSEQ2INT 0x00000004 /* ro */ +#define LSEQ1INT 0x00000002 /* ro */ +#define LSEQ0INT 0x00000001 /* ro */ + +#define LSEQINT_MASK (LSEQ7INT | LSEQ6INT | LSEQ5INT |\ + LSEQ4INT | LSEQ3INT | LSEQ2INT |\ + LSEQ1INT | LSEQ0INT) + +#define DCHDFIFDEBUG (REG_BASE_ADDR + 0x820) +#define ENFAIRMST 0x00FF0000 +#define DISWRMST9 0x00000200 +#define DISWRMST8 0x00000100 +#define DISRDMST 0x000000FF + +#define ATOMICSTATCTL (REG_BASE_ADDR + 0x824) +/* 8 bit wide */ +#define AUTOINC 0x80 +#define ATOMICERR 0x04 +#define ATOMICWIN 0x02 +#define ATOMICDONE 0x01 + + +#define ALTCIOADR (REG_BASE_ADDR + 0x828) +/* 16 bit; bits 8:0 define CIO addr space of CSEQ */ + +#define ASCBPTR (REG_BASE_ADDR + 0x82C) +/* 16 bit wide */ + +#define ADDBPTR (REG_BASE_ADDR + 0x82E) +/* 16 bit wide */ + +#define ANEWDATA (REG_BASE_ADDR + 0x830) +/* 16 bit */ + +#define AOLDDATA (REG_BASE_ADDR + 0x834) +/* 16 bit */ + +#define CTXACCESS (REG_BASE_ADDR + 0x838) +/* 32 bit */ + +/* 0x83Ch - 0xFFCh are reserved */ + +/* + * ARP2 External Processor Registers, Address Range : (0x00-0x1F) + */ +#define ARP2CTL 0x00 + +#define FRCSCRPERR 0x00040000 +#define FRCARP2PERR 0x00020000 +#define FRCARP2ILLOPC 0x00010000 +#define ENWAITTO 0x00008000 +#define PERRORDIS 0x00004000 +#define FAILDIS 0x00002000 +#define CIOPERRDIS 0x00001000 +#define BREAKEN3 0x00000800 +#define BREAKEN2 0x00000400 +#define BREAKEN1 0x00000200 +#define BREAKEN0 0x00000100 +#define EPAUSE 0x00000008 +#define PAUSED 0x00000004 /* ro */ +#define STEP 0x00000002 +#define ARP2RESET 0x00000001 /* wo */ + +#define ARP2INT 0x04 + +#define HALTCODE_MASK 0x00FF0000 /* ro */ +#define ARP2WAITTO 0x00000100 +#define ARP2HALTC 0x00000080 +#define ARP2ILLOPC 0x00000040 +#define ARP2PERR 0x00000020 +#define ARP2CIOPERR 0x00000010 +#define ARP2BREAK3 0x00000008 +#define ARP2BREAK2 0x00000004 +#define ARP2BREAK1 0x00000002 +#define ARP2BREAK0 0x00000001 + +#define ARP2INTEN 0x08 + +#define EN_ARP2WAITTO 0x00000100 +#define EN_ARP2HALTC 0x00000080 +#define EN_ARP2ILLOPC 0x00000040 +#define EN_ARP2PERR 0x00000020 +#define EN_ARP2CIOPERR 0x00000010 +#define EN_ARP2BREAK3 0x00000008 +#define EN_ARP2BREAK2 0x00000004 +#define EN_ARP2BREAK1 0x00000002 +#define EN_ARP2BREAK0 0x00000001 + +#define ARP2BREAKADR01 0x0C + +#define BREAKADR1_MASK 0x0FFF0000 +#define BREAKADR0_MASK 0x00000FFF + +#define ARP2BREAKADR23 0x10 + +#define BREAKADR3_MASK 0x0FFF0000 +#define BREAKADR2_MASK 0x00000FFF + +/* 0x14h - 0x1Ch are reserved */ + +/* + * ARP2 Registers, Address Range : (0x00-0x1F) + * The definitions have the same address offset for CSEQ and LmSEQ + * CIO Bus Registers. + */ +#define MODEPTR 0x00 + +#define DSTMODE 0xF0 +#define SRCMODE 0x0F + +#define ALTMODE 0x01 + +#define ALTDMODE 0xF0 +#define ALTSMODE 0x0F + +#define ATOMICXCHG 0x02 + +#define FLAG 0x04 + +#define INTCODE_MASK 0xF0 +#define ALTMODEV2 0x04 +#define CARRY_INT 0x02 +#define CARRY 0x01 + +#define ARP2INTCTL 0x05 + +#define PAUSEDIS 0x80 +#define RSTINTCTL 0x40 +#define POPALTMODE 0x08 +#define ALTMODEV 0x04 +#define INTMASK 0x02 +#define IRET 0x01 + +#define STACK 0x06 + +#define FUNCTION1 0x07 + +#define PRGMCNT 0x08 + +#define ACCUM 0x0A + +#define SINDEX 0x0C + +#define DINDEX 0x0E + +#define ALLONES 0x10 + +#define ALLZEROS 0x11 + +#define SINDIR 0x12 + +#define DINDIR 0x13 + +#define JUMLDIR 0x14 + +#define ARP2HALTCODE 0x15 + +#define CURRADDR 0x16 + +#define LASTADDR 0x18 + +#define NXTLADDR 0x1A + +#define DBGPORTPTR 0x1C + +#define DBGPORT 0x1D + +/* + * CIO Registers. + * The definitions have the same address offset for CSEQ and LmSEQ + * CIO Bus Registers. + */ +#define MnSCBPTR 0x20 + +#define MnDDBPTR 0x22 + +#define SCRATCHPAGE 0x24 + +#define MnSCRATCHPAGE 0x25 + +#define SCRATCHPAGESV 0x26 + +#define MnSCRATCHPAGESV 0x27 + +#define MnDMAERRS 0x46 + +#define MnSGDMAERRS 0x47 + +#define MnSGBUF 0x53 + +#define MnSGDMASTAT 0x5b + +#define MnDDMACTL 0x5c /* RAZOR.rspec.fm rev 1.5 is wrong */ + +#define MnDDMASTAT 0x5d /* RAZOR.rspec.fm rev 1.5 is wrong */ + +#define MnDDMAMODE 0x5e /* RAZOR.rspec.fm rev 1.5 is wrong */ + +#define MnDMAENG 0x60 + +#define MnPIPECTL 0x61 + +#define MnSGBADR 0x65 + +#define MnSCB_SITE 0x100 + +#define MnDDB_SITE 0x180 + +/* + * The common definitions below have the same address offset for both + * CSEQ and LmSEQ. + */ +#define BISTCTL0 0x4C + +#define BISTCTL1 0x50 + +#define MAPPEDSCR 0x800 + +/* + * CSEQ Host Register, Address Range : (0x000-0xFFC) + */ +#define CSEQ_HOST_REG_BASE_ADR 0xB8001000 + +#define CARP2CTL (CSEQ_HOST_REG_BASE_ADR + ARP2CTL) + +#define CARP2INT (CSEQ_HOST_REG_BASE_ADR + ARP2INT) + +#define CARP2INTEN (CSEQ_HOST_REG_BASE_ADR + ARP2INTEN) + +#define CARP2BREAKADR01 (CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR01) + +#define CARP2BREAKADR23 (CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR23) + +#define CBISTCTL (CSEQ_HOST_REG_BASE_ADR + BISTCTL1) + +#define CSEQRAMBISTEN 0x00000040 +#define CSEQRAMBISTDN 0x00000020 /* ro */ +#define CSEQRAMBISTFAIL 0x00000010 /* ro */ +#define CSEQSCRBISTEN 0x00000004 +#define CSEQSCRBISTDN 0x00000002 /* ro */ +#define CSEQSCRBISTFAIL 0x00000001 /* ro */ + +#define CMAPPEDSCR (CSEQ_HOST_REG_BASE_ADR + MAPPEDSCR) + +/* + * CSEQ CIO Bus Registers, Address Range : (0x0000-0x1FFC) + * 16 modes, each mode is 512 bytes. + * Unless specified, the register should valid for all modes. + */ +#define CSEQ_CIO_REG_BASE_ADR REG_BASE_ADDR_CSEQCIO + +#define CSEQm_CIO_REG(Mode, Reg) \ + (CSEQ_CIO_REG_BASE_ADR + \ + ((u32) (Mode) * CSEQ_MODE_PAGE_SIZE) + (u32) (Reg)) + +#define CMODEPTR (CSEQ_CIO_REG_BASE_ADR + MODEPTR) + +#define CALTMODE (CSEQ_CIO_REG_BASE_ADR + ALTMODE) + +#define CATOMICXCHG (CSEQ_CIO_REG_BASE_ADR + ATOMICXCHG) + +#define CFLAG (CSEQ_CIO_REG_BASE_ADR + FLAG) + +#define CARP2INTCTL (CSEQ_CIO_REG_BASE_ADR + ARP2INTCTL) + +#define CSTACK (CSEQ_CIO_REG_BASE_ADR + STACK) + +#define CFUNCTION1 (CSEQ_CIO_REG_BASE_ADR + FUNCTION1) + +#define CPRGMCNT (CSEQ_CIO_REG_BASE_ADR + PRGMCNT) + +#define CACCUM (CSEQ_CIO_REG_BASE_ADR + ACCUM) + +#define CSINDEX (CSEQ_CIO_REG_BASE_ADR + SINDEX) + +#define CDINDEX (CSEQ_CIO_REG_BASE_ADR + DINDEX) + +#define CALLONES (CSEQ_CIO_REG_BASE_ADR + ALLONES) + +#define CALLZEROS (CSEQ_CIO_REG_BASE_ADR + ALLZEROS) + +#define CSINDIR (CSEQ_CIO_REG_BASE_ADR + SINDIR) + +#define CDINDIR (CSEQ_CIO_REG_BASE_ADR + DINDIR) + +#define CJUMLDIR (CSEQ_CIO_REG_BASE_ADR + JUMLDIR) + +#define CARP2HALTCODE (CSEQ_CIO_REG_BASE_ADR + ARP2HALTCODE) + +#define CCURRADDR (CSEQ_CIO_REG_BASE_ADR + CURRADDR) + +#define CLASTADDR (CSEQ_CIO_REG_BASE_ADR + LASTADDR) + +#define CNXTLADDR (CSEQ_CIO_REG_BASE_ADR + NXTLADDR) + +#define CDBGPORTPTR (CSEQ_CIO_REG_BASE_ADR + DBGPORTPTR) + +#define CDBGPORT (CSEQ_CIO_REG_BASE_ADR + DBGPORT) + +#define CSCRATCHPAGE (CSEQ_CIO_REG_BASE_ADR + SCRATCHPAGE) + +#define CMnSCBPTR(Mode) CSEQm_CIO_REG(Mode, MnSCBPTR) + +#define CMnDDBPTR(Mode) CSEQm_CIO_REG(Mode, MnDDBPTR) + +#define CMnSCRATCHPAGE(Mode) CSEQm_CIO_REG(Mode, MnSCRATCHPAGE) + +#define CLINKCON (CSEQ_CIO_REG_BASE_ADR + 0x28) + +#define CCIOAACESS (CSEQ_CIO_REG_BASE_ADR + 0x2C) + +/* mode 0-7 */ +#define MnREQMBX 0x30 +#define CMnREQMBX(Mode) CSEQm_CIO_REG(Mode, 0x30) + +/* mode 8 */ +#define CSEQCON CSEQm_CIO_REG(8, 0x30) + +/* mode 0-7 */ +#define MnRSPMBX 0x34 +#define CMnRSPMBX(Mode) CSEQm_CIO_REG(Mode, 0x34) + +/* mode 8 */ +#define CSEQCOMCTL CSEQm_CIO_REG(8, 0x34) + +/* mode 8 */ +#define CSEQCOMSTAT CSEQm_CIO_REG(8, 0x35) + +/* mode 8 */ +#define CSEQCOMINTEN CSEQm_CIO_REG(8, 0x36) + +/* mode 8 */ +#define CSEQCOMDMACTL CSEQm_CIO_REG(8, 0x37) + +#define CSHALTERR 0x10 +#define RESETCSDMA 0x08 /* wo */ +#define STARTCSDMA 0x04 +#define STOPCSDMA 0x02 /* wo */ +#define CSDMAACT 0x01 /* ro */ + +/* mode 0-7 */ +#define MnINT 0x38 +#define CMnINT(Mode) CSEQm_CIO_REG(Mode, 0x38) + +#define CMnREQMBXE 0x02 +#define CMnRSPMBXF 0x01 +#define CMnINT_MASK 0x00000003 + +/* mode 8 */ +#define CSEQREQMBX CSEQm_CIO_REG(8, 0x38) + +/* mode 0-7 */ +#define MnINTEN 0x3C +#define CMnINTEN(Mode) CSEQm_CIO_REG(Mode, 0x3C) + +#define EN_CMnRSPMBXF 0x01 + +/* mode 8 */ +#define CSEQRSPMBX CSEQm_CIO_REG(8, 0x3C) + +/* mode 8 */ +#define CSDMAADR CSEQm_CIO_REG(8, 0x40) + +/* mode 8 */ +#define CSDMACNT CSEQm_CIO_REG(8, 0x48) + +/* mode 8 */ +#define CSEQDLCTL CSEQm_CIO_REG(8, 0x4D) + +#define DONELISTEND 0x10 +#define DONELISTSIZE_MASK 0x0F +#define DONELISTSIZE_8ELEM 0x01 +#define DONELISTSIZE_16ELEM 0x02 +#define DONELISTSIZE_32ELEM 0x03 +#define DONELISTSIZE_64ELEM 0x04 +#define DONELISTSIZE_128ELEM 0x05 +#define DONELISTSIZE_256ELEM 0x06 +#define DONELISTSIZE_512ELEM 0x07 +#define DONELISTSIZE_1024ELEM 0x08 +#define DONELISTSIZE_2048ELEM 0x09 +#define DONELISTSIZE_4096ELEM 0x0A +#define DONELISTSIZE_8192ELEM 0x0B +#define DONELISTSIZE_16384ELEM 0x0C + +/* mode 8 */ +#define CSEQDLOFFS CSEQm_CIO_REG(8, 0x4E) + +/* mode 11 */ +#define CM11INTVEC0 CSEQm_CIO_REG(11, 0x50) + +/* mode 11 */ +#define CM11INTVEC1 CSEQm_CIO_REG(11, 0x52) + +/* mode 11 */ +#define CM11INTVEC2 CSEQm_CIO_REG(11, 0x54) + +#define CCONMSK (CSEQ_CIO_REG_BASE_ADR + 0x60) + +#define CCONEXIST (CSEQ_CIO_REG_BASE_ADR + 0x61) + +#define CCONMODE (CSEQ_CIO_REG_BASE_ADR + 0x62) + +#define CTIMERCALC (CSEQ_CIO_REG_BASE_ADR + 0x64) + +#define CINTDIS (CSEQ_CIO_REG_BASE_ADR + 0x68) + +/* mode 8, 32x32 bits, 128 bytes of mapped buffer */ +#define CSBUFFER CSEQm_CIO_REG(8, 0x80) + +#define CSCRATCH (CSEQ_CIO_REG_BASE_ADR + 0x1C0) + +/* mode 0-8 */ +#define CMnSCRATCH(Mode) CSEQm_CIO_REG(Mode, 0x1E0) + +/* + * CSEQ Mapped Instruction RAM Page, Address Range : (0x0000-0x1FFC) + */ +#define CSEQ_RAM_REG_BASE_ADR 0xB8004000 + +/* + * The common definitions below have the same address offset for all the Link + * sequencers. + */ +#define MODECTL 0x40 + +#define DBGMODE 0x44 + +#define CONTROL 0x48 +#define LEDTIMER 0x00010000 +#define LEDTIMERS_10us 0x00000000 +#define LEDTIMERS_1ms 0x00000800 +#define LEDTIMERS_100ms 0x00001000 +#define LEDMODE_TXRX 0x00000000 +#define LEDMODE_CONNECTED 0x00000200 +#define LEDPOL 0x00000100 + +#define LSEQRAM 0x1000 + +/* + * LmSEQ Host Registers, Address Range : (0x0000-0x3FFC) + */ +#define LSEQ0_HOST_REG_BASE_ADR 0xB8020000 +#define LSEQ1_HOST_REG_BASE_ADR 0xB8024000 +#define LSEQ2_HOST_REG_BASE_ADR 0xB8028000 +#define LSEQ3_HOST_REG_BASE_ADR 0xB802C000 +#define LSEQ4_HOST_REG_BASE_ADR 0xB8030000 +#define LSEQ5_HOST_REG_BASE_ADR 0xB8034000 +#define LSEQ6_HOST_REG_BASE_ADR 0xB8038000 +#define LSEQ7_HOST_REG_BASE_ADR 0xB803C000 + +#define LmARP2CTL(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + ARP2CTL) + +#define LmARP2INT(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + ARP2INT) + +#define LmARP2INTEN(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + ARP2INTEN) + +#define LmDBGMODE(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + DBGMODE) + +#define LmCONTROL(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + CONTROL) + +#define LmARP2BREAKADR01(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + ARP2BREAKADR01) + +#define LmARP2BREAKADR23(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + ARP2BREAKADR23) + +#define LmMODECTL(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + MODECTL) + +#define LmAUTODISCI 0x08000000 +#define LmDSBLBITLT 0x04000000 +#define LmDSBLANTT 0x02000000 +#define LmDSBLCRTT 0x01000000 +#define LmDSBLCONT 0x00000100 +#define LmPRIMODE 0x00000080 +#define LmDSBLHOLD 0x00000040 +#define LmDISACK 0x00000020 +#define LmBLIND48 0x00000010 +#define LmRCVMODE_MASK 0x0000000C +#define LmRCVMODE_PLD 0x00000000 +#define LmRCVMODE_HPC 0x00000004 + +#define LmDBGMODE(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + DBGMODE) + +#define LmFRCPERR 0x80000000 +#define LmMEMSEL_MASK 0x30000000 +#define LmFRCRBPERR 0x00000000 +#define LmFRCTBPERR 0x10000000 +#define LmFRCSGBPERR 0x20000000 +#define LmFRCARBPERR 0x30000000 +#define LmRCVIDW 0x00080000 +#define LmINVDWERR 0x00040000 +#define LmRCVDISP 0x00004000 +#define LmDISPERR 0x00002000 +#define LmDSBLDSCR 0x00000800 +#define LmDSBLSCR 0x00000400 +#define LmFRCNAK 0x00000200 +#define LmFRCROFS 0x00000100 +#define LmFRCCRC 0x00000080 +#define LmFRMTYPE_MASK 0x00000070 +#define LmSG_DATA 0x00000000 +#define LmSG_COMMAND 0x00000010 +#define LmSG_TASK 0x00000020 +#define LmSG_TGTXFER 0x00000030 +#define LmSG_RESPONSE 0x00000040 +#define LmSG_IDENADDR 0x00000050 +#define LmSG_OPENADDR 0x00000060 +#define LmDISCRCGEN 0x00000008 +#define LmDISCRCCHK 0x00000004 +#define LmSSXMTFRM 0x00000002 +#define LmSSRCVFRM 0x00000001 + +#define LmCONTROL(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + CONTROL) + +#define LmSTEPXMTFRM 0x00000002 +#define LmSTEPRCVFRM 0x00000001 + +#define LmBISTCTL0(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) + \ + BISTCTL0) + +#define ARBBISTEN 0x40000000 +#define ARBBISTDN 0x20000000 /* ro */ +#define ARBBISTFAIL 0x10000000 /* ro */ +#define TBBISTEN 0x00000400 +#define TBBISTDN 0x00000200 /* ro */ +#define TBBISTFAIL 0x00000100 /* ro */ +#define RBBISTEN 0x00000040 +#define RBBISTDN 0x00000020 /* ro */ +#define RBBISTFAIL 0x00000010 /* ro */ +#define SGBISTEN 0x00000004 +#define SGBISTDN 0x00000002 /* ro */ +#define SGBISTFAIL 0x00000001 /* ro */ + +#define LmBISTCTL1(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum)*LmSEQ_HOST_REG_SIZE) +\ + BISTCTL1) + +#define LmRAMPAGE1 0x00000200 +#define LmRAMPAGE0 0x00000100 +#define LmIMEMBISTEN 0x00000040 +#define LmIMEMBISTDN 0x00000020 /* ro */ +#define LmIMEMBISTFAIL 0x00000010 /* ro */ +#define LmSCRBISTEN 0x00000004 +#define LmSCRBISTDN 0x00000002 /* ro */ +#define LmSCRBISTFAIL 0x00000001 /* ro */ +#define LmRAMPAGE (LmRAMPAGE1 + LmRAMPAGE0) +#define LmRAMPAGE_LSHIFT 0x8 + +#define LmSCRATCH(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum) * LmSEQ_HOST_REG_SIZE) +\ + MAPPEDSCR) + +#define LmSEQRAM(LinkNum) (LSEQ0_HOST_REG_BASE_ADR + \ + ((LinkNum) * LmSEQ_HOST_REG_SIZE) +\ + LSEQRAM) + +/* + * LmSEQ CIO Bus Register, Address Range : (0x0000-0xFFC) + * 8 modes, each mode is 512 bytes. + * Unless specified, the register should valid for all modes. + */ +#define LmSEQ_CIOBUS_REG_BASE 0x2000 + +#define LmSEQ_PHY_BASE(Mode, LinkNum) \ + (LSEQ0_HOST_REG_BASE_ADR + \ + (LmSEQ_HOST_REG_SIZE * (u32) (LinkNum)) + \ + LmSEQ_CIOBUS_REG_BASE + \ + ((u32) (Mode) * LmSEQ_MODE_PAGE_SIZE)) + +#define LmSEQ_PHY_REG(Mode, LinkNum, Reg) \ + (LmSEQ_PHY_BASE(Mode, LinkNum) + (u32) (Reg)) + +#define LmMODEPTR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, MODEPTR) + +#define LmALTMODE(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ALTMODE) + +#define LmATOMICXCHG(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ATOMICXCHG) + +#define LmFLAG(LinkNum) LmSEQ_PHY_REG(0, LinkNum, FLAG) + +#define LmARP2INTCTL(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ARP2INTCTL) + +#define LmSTACK(LinkNum) LmSEQ_PHY_REG(0, LinkNum, STACK) + +#define LmFUNCTION1(LinkNum) LmSEQ_PHY_REG(0, LinkNum, FUNCTION1) + +#define LmPRGMCNT(LinkNum) LmSEQ_PHY_REG(0, LinkNum, PRGMCNT) + +#define LmACCUM(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ACCUM) + +#define LmSINDEX(LinkNum) LmSEQ_PHY_REG(0, LinkNum, SINDEX) + +#define LmDINDEX(LinkNum) LmSEQ_PHY_REG(0, LinkNum, DINDEX) + +#define LmALLONES(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ALLONES) + +#define LmALLZEROS(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ALLZEROS) + +#define LmSINDIR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, SINDIR) + +#define LmDINDIR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, DINDIR) + +#define LmJUMLDIR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, JUMLDIR) + +#define LmARP2HALTCODE(LinkNum) LmSEQ_PHY_REG(0, LinkNum, ARP2HALTCODE) + +#define LmCURRADDR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, CURRADDR) + +#define LmLASTADDR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, LASTADDR) + +#define LmNXTLADDR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, NXTLADDR) + +#define LmDBGPORTPTR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, DBGPORTPTR) + +#define LmDBGPORT(LinkNum) LmSEQ_PHY_REG(0, LinkNum, DBGPORT) + +#define LmSCRATCHPAGE(LinkNum) LmSEQ_PHY_REG(0, LinkNum, SCRATCHPAGE) + +#define LmMnSCRATCHPAGE(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, \ + MnSCRATCHPAGE) + +#define LmTIMERCALC(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x28) + +#define LmREQMBX(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x30) + +#define LmRSPMBX(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x34) + +#define LmMnINT(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x38) + +#define CTXMEMSIZE 0x80000000 /* ro */ +#define LmACKREQ 0x08000000 +#define LmNAKREQ 0x04000000 +#define LmMnXMTERR 0x02000000 +#define LmM5OOBSVC 0x01000000 +#define LmHWTINT 0x00800000 +#define LmMnCTXDONE 0x00100000 +#define LmM2REQMBXF 0x00080000 +#define LmM2RSPMBXE 0x00040000 +#define LmMnDMAERR 0x00020000 +#define LmRCVPRIM 0x00010000 +#define LmRCVERR 0x00008000 +#define LmADDRRCV 0x00004000 +#define LmMnHDRMISS 0x00002000 +#define LmMnWAITSCB 0x00001000 +#define LmMnRLSSCB 0x00000800 +#define LmMnSAVECTX 0x00000400 +#define LmMnFETCHSG 0x00000200 +#define LmMnLOADCTX 0x00000100 +#define LmMnCFGICL 0x00000080 +#define LmMnCFGSATA 0x00000040 +#define LmMnCFGEXPSATA 0x00000020 +#define LmMnCFGCMPLT 0x00000010 +#define LmMnCFGRBUF 0x00000008 +#define LmMnSAVETTR 0x00000004 +#define LmMnCFGRDAT 0x00000002 +#define LmMnCFGHDR 0x00000001 + +#define LmMnINTEN(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x3C) + +#define EN_LmACKREQ 0x08000000 +#define EN_LmNAKREQ 0x04000000 +#define EN_LmMnXMTERR 0x02000000 +#define EN_LmM5OOBSVC 0x01000000 +#define EN_LmHWTINT 0x00800000 +#define EN_LmMnCTXDONE 0x00100000 +#define EN_LmM2REQMBXF 0x00080000 +#define EN_LmM2RSPMBXE 0x00040000 +#define EN_LmMnDMAERR 0x00020000 +#define EN_LmRCVPRIM 0x00010000 +#define EN_LmRCVERR 0x00008000 +#define EN_LmADDRRCV 0x00004000 +#define EN_LmMnHDRMISS 0x00002000 +#define EN_LmMnWAITSCB 0x00001000 +#define EN_LmMnRLSSCB 0x00000800 +#define EN_LmMnSAVECTX 0x00000400 +#define EN_LmMnFETCHSG 0x00000200 +#define EN_LmMnLOADCTX 0x00000100 +#define EN_LmMnCFGICL 0x00000080 +#define EN_LmMnCFGSATA 0x00000040 +#define EN_LmMnCFGEXPSATA 0x00000020 +#define EN_LmMnCFGCMPLT 0x00000010 +#define EN_LmMnCFGRBUF 0x00000008 +#define EN_LmMnSAVETTR 0x00000004 +#define EN_LmMnCFGRDAT 0x00000002 +#define EN_LmMnCFGHDR 0x00000001 + +#define LmM0INTEN_MASK (EN_LmMnCFGCMPLT | EN_LmMnCFGRBUF | \ + EN_LmMnSAVETTR | EN_LmMnCFGRDAT | \ + EN_LmMnCFGHDR | EN_LmRCVERR | \ + EN_LmADDRRCV | EN_LmMnHDRMISS | \ + EN_LmMnRLSSCB | EN_LmMnSAVECTX | \ + EN_LmMnFETCHSG | EN_LmMnLOADCTX | \ + EN_LmHWTINT | EN_LmMnCTXDONE | \ + EN_LmRCVPRIM | EN_LmMnCFGSATA | \ + EN_LmMnCFGEXPSATA | EN_LmMnDMAERR) + +#define LmM1INTEN_MASK (EN_LmMnCFGCMPLT | EN_LmADDRRCV | \ + EN_LmMnRLSSCB | EN_LmMnSAVECTX | \ + EN_LmMnFETCHSG | EN_LmMnLOADCTX | \ + EN_LmMnXMTERR | EN_LmHWTINT | \ + EN_LmMnCTXDONE | EN_LmRCVPRIM | \ + EN_LmRCVERR | EN_LmMnDMAERR) + +#define LmM2INTEN_MASK (EN_LmADDRRCV | EN_LmHWTINT | \ + EN_LmM2REQMBXF | EN_LmRCVPRIM | \ + EN_LmRCVERR) + +#define LmM5INTEN_MASK (EN_LmADDRRCV | EN_LmM5OOBSVC | \ + EN_LmHWTINT | EN_LmRCVPRIM | \ + EN_LmRCVERR) + +#define LmXMTPRIMD(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x40) + +#define LmXMTPRIMCS(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x44) + +#define LmCONSTAT(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x45) + +#define LmMnDMAERRS(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x46) + +#define LmMnSGDMAERRS(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x47) + +#define LmM0EXPHDRP(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x48) + +#define LmM1SASALIGN(LinkNum) LmSEQ_PHY_REG(1, LinkNum, 0x48) +#define SAS_ALIGN_DEFAULT 0xFF + +#define LmM0MSKHDRP(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x49) + +#define LmM1STPALIGN(LinkNum) LmSEQ_PHY_REG(1, LinkNum, 0x49) +#define STP_ALIGN_DEFAULT 0x1F + +#define LmM0RCVHDRP(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x4A) + +#define LmM1XMTHDRP(LinkNum) LmSEQ_PHY_REG(1, LinkNum, 0x4A) + +#define LmM0ICLADR(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x4B) + +#define LmM1ALIGNMODE(LinkNum) LmSEQ_PHY_REG(1, LinkNum, 0x4B) + +#define LmDISALIGN 0x20 +#define LmROTSTPALIGN 0x10 +#define LmSTPALIGN 0x08 +#define LmROTNOTIFY 0x04 +#define LmDUALALIGN 0x02 +#define LmROTALIGN 0x01 + +#define LmM0EXPRCVNT(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x4C) + +#define LmM1XMTCNT(LinkNum) LmSEQ_PHY_REG(1, LinkNum, 0x4C) + +#define LmMnBUFSTAT(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x4E) + +#define LmMnBUFPERR 0x01 + +/* mode 0-1 */ +#define LmMnXFRLVL(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x59) + +#define LmMnXFRLVL_128 0x05 +#define LmMnXFRLVL_256 0x04 +#define LmMnXFRLVL_512 0x03 +#define LmMnXFRLVL_1024 0x02 +#define LmMnXFRLVL_1536 0x01 +#define LmMnXFRLVL_2048 0x00 + + /* mode 0-1 */ +#define LmMnSGDMACTL(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x5A) + +#define LmMnRESETSG 0x04 +#define LmMnSTOPSG 0x02 +#define LmMnSTARTSG 0x01 + +/* mode 0-1 */ +#define LmMnSGDMASTAT(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x5B) + +/* mode 0-1 */ +#define LmMnDDMACTL(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x5C) + +#define LmMnFLUSH 0x40 /* wo */ +#define LmMnRLSRTRY 0x20 /* wo */ +#define LmMnDISCARD 0x10 /* wo */ +#define LmMnRESETDAT 0x08 /* wo */ +#define LmMnSUSDAT 0x04 /* wo */ +#define LmMnSTOPDAT 0x02 /* wo */ +#define LmMnSTARTDAT 0x01 /* wo */ + +/* mode 0-1 */ +#define LmMnDDMASTAT(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x5D) + +#define LmMnDPEMPTY 0x80 +#define LmMnFLUSHING 0x40 +#define LmMnDDMAREQ 0x20 +#define LmMnHDMAREQ 0x10 +#define LmMnDATFREE 0x08 +#define LmMnDATSUS 0x04 +#define LmMnDATACT 0x02 +#define LmMnDATEN 0x01 + +/* mode 0-1 */ +#define LmMnDDMAMODE(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x5E) + +#define LmMnDMATYPE_NORMAL 0x0000 +#define LmMnDMATYPE_HOST_ONLY_TX 0x0001 +#define LmMnDMATYPE_DEVICE_ONLY_TX 0x0002 +#define LmMnDMATYPE_INVALID 0x0003 +#define LmMnDMATYPE_MASK 0x0003 + +#define LmMnDMAWRAP 0x0004 +#define LmMnBITBUCKET 0x0008 +#define LmMnDISHDR 0x0010 +#define LmMnSTPCRC 0x0020 +#define LmXTEST 0x0040 +#define LmMnDISCRC 0x0080 +#define LmMnENINTLK 0x0100 +#define LmMnADDRFRM 0x0400 +#define LmMnENXMTCRC 0x0800 + +/* mode 0-1 */ +#define LmMnXFRCNT(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x70) + +/* mode 0-1 */ +#define LmMnDPSEL(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x7B) +#define LmMnDPSEL_MASK 0x07 +#define LmMnEOLPRE 0x40 +#define LmMnEOSPRE 0x80 + +/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */ +/* Receive Mode n = 0 */ +#define LmMnHRADDR 0x00 +#define LmMnHBYTECNT 0x01 +#define LmMnHREWIND 0x02 +#define LmMnDWADDR 0x03 +#define LmMnDSPACECNT 0x04 +#define LmMnDFRMSIZE 0x05 + +/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */ +/* Transmit Mode n = 1 */ +#define LmMnHWADDR 0x00 +#define LmMnHSPACECNT 0x01 +/* #define LmMnHREWIND 0x02 */ +#define LmMnDRADDR 0x03 +#define LmMnDBYTECNT 0x04 +/* #define LmMnDFRMSIZE 0x05 */ + +/* mode 0-1 */ +#define LmMnDPACC(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x78) +#define LmMnDPACC_MASK 0x00FFFFFF + +/* mode 0-1 */ +#define LmMnHOLDLVL(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x7D) + +#define LmPRMSTAT0(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x80) +#define LmPRMSTAT0BYTE0 0x80 +#define LmPRMSTAT0BYTE1 0x81 +#define LmPRMSTAT0BYTE2 0x82 +#define LmPRMSTAT0BYTE3 0x83 + +#define LmFRAMERCVD 0x80000000 +#define LmXFRRDYRCVD 0x40000000 +#define LmUNKNOWNP 0x20000000 +#define LmBREAK 0x10000000 +#define LmDONE 0x08000000 +#define LmOPENACPT 0x04000000 +#define LmOPENRJCT 0x02000000 +#define LmOPENRTRY 0x01000000 +#define LmCLOSERV1 0x00800000 +#define LmCLOSERV0 0x00400000 +#define LmCLOSENORM 0x00200000 +#define LmCLOSECLAF 0x00100000 +#define LmNOTIFYRV2 0x00080000 +#define LmNOTIFYRV1 0x00040000 +#define LmNOTIFYRV0 0x00020000 +#define LmNOTIFYSPIN 0x00010000 +#define LmBROADRV4 0x00008000 +#define LmBROADRV3 0x00004000 +#define LmBROADRV2 0x00002000 +#define LmBROADRV1 0x00001000 +#define LmBROADSES 0x00000800 +#define LmBROADRVCH1 0x00000400 +#define LmBROADRVCH0 0x00000200 +#define LmBROADCH 0x00000100 +#define LmAIPRVWP 0x00000080 +#define LmAIPWP 0x00000040 +#define LmAIPWD 0x00000020 +#define LmAIPWC 0x00000010 +#define LmAIPRV2 0x00000008 +#define LmAIPRV1 0x00000004 +#define LmAIPRV0 0x00000002 +#define LmAIPNRML 0x00000001 + +#define LmBROADCAST_MASK (LmBROADCH | LmBROADRVCH0 | \ + LmBROADRVCH1) + +#define LmPRMSTAT1(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0x84) +#define LmPRMSTAT1BYTE0 0x84 +#define LmPRMSTAT1BYTE1 0x85 +#define LmPRMSTAT1BYTE2 0x86 +#define LmPRMSTAT1BYTE3 0x87 + +#define LmFRMRCVDSTAT 0x80000000 +#define LmBREAK_DET 0x04000000 +#define LmCLOSE_DET 0x02000000 +#define LmDONE_DET 0x01000000 +#define LmXRDY 0x00040000 +#define LmSYNCSRST 0x00020000 +#define LmSYNC 0x00010000 +#define LmXHOLD 0x00008000 +#define LmRRDY 0x00004000 +#define LmHOLD 0x00002000 +#define LmROK 0x00001000 +#define LmRIP 0x00000800 +#define LmCRBLK 0x00000400 +#define LmACK 0x00000200 +#define LmNAK 0x00000100 +#define LmHARDRST 0x00000080 +#define LmERROR 0x00000040 +#define LmRERR 0x00000020 +#define LmPMREQP 0x00000010 +#define LmPMREQS 0x00000008 +#define LmPMACK 0x00000004 +#define LmPMNAK 0x00000002 +#define LmDMAT 0x00000001 + +/* mode 1 */ +#define LmMnSATAFS(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x7E) +#define LmMnXMTSIZE(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0x93) + +/* mode 0 */ +#define LmMnFRMERR(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0xB0) + +#define LmACRCERR 0x00000800 +#define LmPHYOVRN 0x00000400 +#define LmOBOVRN 0x00000200 +#define LmMnZERODATA 0x00000100 +#define LmSATAINTLK 0x00000080 +#define LmMnCRCERR 0x00000020 +#define LmRRDYOVRN 0x00000010 +#define LmMISSSOAF 0x00000008 +#define LmMISSSOF 0x00000004 +#define LmMISSEOAF 0x00000002 +#define LmMISSEOF 0x00000001 + +#define LmFRMERREN(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xB4) + +#define EN_LmACRCERR 0x00000800 +#define EN_LmPHYOVRN 0x00000400 +#define EN_LmOBOVRN 0x00000200 +#define EN_LmMnZERODATA 0x00000100 +#define EN_LmSATAINTLK 0x00000080 +#define EN_LmFRMBAD 0x00000040 +#define EN_LmMnCRCERR 0x00000020 +#define EN_LmRRDYOVRN 0x00000010 +#define EN_LmMISSSOAF 0x00000008 +#define EN_LmMISSSOF 0x00000004 +#define EN_LmMISSEOAF 0x00000002 +#define EN_LmMISSEOF 0x00000001 + +#define LmFRMERREN_MASK (EN_LmSATAINTLK | EN_LmMnCRCERR | \ + EN_LmRRDYOVRN | EN_LmMISSSOF | \ + EN_LmMISSEOAF | EN_LmMISSEOF | \ + EN_LmACRCERR | LmPHYOVRN | \ + EN_LmOBOVRN | EN_LmMnZERODATA) + +#define LmHWTSTATEN(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xC5) + +#define EN_LmDONETO 0x80 +#define EN_LmINVDISP 0x40 +#define EN_LmINVDW 0x20 +#define EN_LmDWSEVENT 0x08 +#define EN_LmCRTTTO 0x04 +#define EN_LmANTTTO 0x02 +#define EN_LmBITLTTO 0x01 + +#define LmHWTSTATEN_MASK (EN_LmINVDISP | EN_LmINVDW | \ + EN_LmDWSEVENT | EN_LmCRTTTO | \ + EN_LmANTTTO | EN_LmDONETO | \ + EN_LmBITLTTO) + +#define LmHWTSTAT(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xC7) + +#define LmDONETO 0x80 +#define LmINVDISP 0x40 +#define LmINVDW 0x20 +#define LmDWSEVENT 0x08 +#define LmCRTTTO 0x04 +#define LmANTTTO 0x02 +#define LmBITLTTO 0x01 + +#define LmMnDATABUFADR(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0xC8) +#define LmDATABUFADR_MASK 0x0FFF + +#define LmMnDATABUF(LinkNum, Mode) LmSEQ_PHY_REG(Mode, LinkNum, 0xCA) + +#define LmPRIMSTAT0EN(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xE0) + +#define EN_LmUNKNOWNP 0x20000000 +#define EN_LmBREAK 0x10000000 +#define EN_LmDONE 0x08000000 +#define EN_LmOPENACPT 0x04000000 +#define EN_LmOPENRJCT 0x02000000 +#define EN_LmOPENRTRY 0x01000000 +#define EN_LmCLOSERV1 0x00800000 +#define EN_LmCLOSERV0 0x00400000 +#define EN_LmCLOSENORM 0x00200000 +#define EN_LmCLOSECLAF 0x00100000 +#define EN_LmNOTIFYRV2 0x00080000 +#define EN_LmNOTIFYRV1 0x00040000 +#define EN_LmNOTIFYRV0 0x00020000 +#define EN_LmNOTIFYSPIN 0x00010000 +#define EN_LmBROADRV4 0x00008000 +#define EN_LmBROADRV3 0x00004000 +#define EN_LmBROADRV2 0x00002000 +#define EN_LmBROADRV1 0x00001000 +#define EN_LmBROADRV0 0x00000800 +#define EN_LmBROADRVCH1 0x00000400 +#define EN_LmBROADRVCH0 0x00000200 +#define EN_LmBROADCH 0x00000100 +#define EN_LmAIPRVWP 0x00000080 +#define EN_LmAIPWP 0x00000040 +#define EN_LmAIPWD 0x00000020 +#define EN_LmAIPWC 0x00000010 +#define EN_LmAIPRV2 0x00000008 +#define EN_LmAIPRV1 0x00000004 +#define EN_LmAIPRV0 0x00000002 +#define EN_LmAIPNRML 0x00000001 + +#define LmPRIMSTAT0EN_MASK (EN_LmBREAK | \ + EN_LmDONE | EN_LmOPENACPT | \ + EN_LmOPENRJCT | EN_LmOPENRTRY | \ + EN_LmCLOSERV1 | EN_LmCLOSERV0 | \ + EN_LmCLOSENORM | EN_LmCLOSECLAF | \ + EN_LmBROADRV4 | EN_LmBROADRV3 | \ + EN_LmBROADRV2 | EN_LmBROADRV1 | \ + EN_LmBROADRV0 | EN_LmBROADRVCH1 | \ + EN_LmBROADRVCH0 | EN_LmBROADCH | \ + EN_LmAIPRVWP | EN_LmAIPWP | \ + EN_LmAIPWD | EN_LmAIPWC | \ + EN_LmAIPRV2 | EN_LmAIPRV1 | \ + EN_LmAIPRV0 | EN_LmAIPNRML) + +#define LmPRIMSTAT1EN(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xE4) + +#define EN_LmXRDY 0x00040000 +#define EN_LmSYNCSRST 0x00020000 +#define EN_LmSYNC 0x00010000 +#define EN_LmXHOLD 0x00008000 +#define EN_LmRRDY 0x00004000 +#define EN_LmHOLD 0x00002000 +#define EN_LmROK 0x00001000 +#define EN_LmRIP 0x00000800 +#define EN_LmCRBLK 0x00000400 +#define EN_LmACK 0x00000200 +#define EN_LmNAK 0x00000100 +#define EN_LmHARDRST 0x00000080 +#define EN_LmERROR 0x00000040 +#define EN_LmRERR 0x00000020 +#define EN_LmPMREQP 0x00000010 +#define EN_LmPMREQS 0x00000008 +#define EN_LmPMACK 0x00000004 +#define EN_LmPMNAK 0x00000002 +#define EN_LmDMAT 0x00000001 + +#define LmPRIMSTAT1EN_MASK (EN_LmHARDRST | \ + EN_LmSYNCSRST | \ + EN_LmPMREQP | EN_LmPMREQS | \ + EN_LmPMACK | EN_LmPMNAK) + +#define LmSMSTATE(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xE8) + +#define LmSMSTATEBRK(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xEC) + +#define LmSMDBGCTL(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xF0) + + +/* + * LmSEQ CIO Bus Mode 3 Register. + * Mode 3: Configuration and Setup, IOP Context SCB. + */ +#define LmM3SATATIMER(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x48) + +#define LmM3INTVEC0(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x90) + +#define LmM3INTVEC1(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x92) + +#define LmM3INTVEC2(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x94) + +#define LmM3INTVEC3(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x96) + +#define LmM3INTVEC4(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x98) + +#define LmM3INTVEC5(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x9A) + +#define LmM3INTVEC6(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x9C) + +#define LmM3INTVEC7(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0x9E) + +#define LmM3INTVEC8(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0xA4) + +#define LmM3INTVEC9(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0xA6) + +#define LmM3INTVEC10(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0xB0) + +#define LmM3FRMGAP(LinkNum) LmSEQ_PHY_REG(3, LinkNum, 0xB4) + +#define LmBITL_TIMER(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xA2) + +#define LmWWN(LinkNum) LmSEQ_PHY_REG(0, LinkNum, 0xA8) + + +/* + * LmSEQ CIO Bus Mode 5 Registers. + * Mode 5: Phy/OOB Control and Status. + */ +#define LmSEQ_OOB_REG(phy_id, reg) LmSEQ_PHY_REG(5, (phy_id), (reg)) + +#define OOB_BFLTR 0x100 + +#define BFLTR_THR_MASK 0xF0 +#define BFLTR_TC_MASK 0x0F + +#define OOB_INIT_MIN 0x102 + +#define OOB_INIT_MAX 0x104 + +#define OOB_INIT_NEG 0x106 + +#define OOB_SAS_MIN 0x108 + +#define OOB_SAS_MAX 0x10A + +#define OOB_SAS_NEG 0x10C + +#define OOB_WAKE_MIN 0x10E + +#define OOB_WAKE_MAX 0x110 + +#define OOB_WAKE_NEG 0x112 + +#define OOB_IDLE_MAX 0x114 + +#define OOB_BURST_MAX 0x116 + +#define OOB_DATA_KBITS 0x126 + +#define OOB_ALIGN_0_DATA 0x12C + +#define OOB_ALIGN_1_DATA 0x130 + +#define D10_2_DATA_k 0x00 +#define SYNC_DATA_k 0x02 +#define ALIGN_1_DATA_k 0x04 +#define ALIGN_0_DATA_k 0x08 +#define BURST_DATA_k 0x10 + +#define OOB_PHY_RESET_COUNT 0x13C + +#define OOB_SIG_GEN 0x140 + +#define START_OOB 0x80 +#define START_DWS 0x40 +#define ALIGN_CNT3 0x30 +#define ALIGN_CNT2 0x20 +#define ALIGN_CNT1 0x10 +#define ALIGN_CNT4 0x00 +#define STOP_DWS 0x08 +#define SEND_COMSAS 0x04 +#define SEND_COMINIT 0x02 +#define SEND_COMWAKE 0x01 + +#define OOB_XMIT 0x141 + +#define TX_ENABLE 0x80 +#define XMIT_OOB_BURST 0x10 +#define XMIT_D10_2 0x08 +#define XMIT_SYNC 0x04 +#define XMIT_ALIGN_1 0x02 +#define XMIT_ALIGN_0 0x01 + +#define FUNCTION_MASK 0x142 + +#define SAS_MODE_DIS 0x80 +#define SATA_MODE_DIS 0x40 +#define SPINUP_HOLD_DIS 0x20 +#define HOT_PLUG_DIS 0x10 +#define SATA_PS_DIS 0x08 +#define FUNCTION_MASK_DEFAULT (SPINUP_HOLD_DIS | SATA_PS_DIS) + +#define OOB_MODE 0x143 + +#define SAS_MODE 0x80 +#define SATA_MODE 0x40 +#define SLOW_CLK 0x20 +#define FORCE_XMIT_15 0x08 +#define PHY_SPEED_60 0x04 +#define PHY_SPEED_30 0x02 +#define PHY_SPEED_15 0x01 + +#define CURRENT_STATUS 0x144 + +#define CURRENT_OOB_DONE 0x80 +#define CURRENT_LOSS_OF_SIGNAL 0x40 +#define CURRENT_SPINUP_HOLD 0x20 +#define CURRENT_HOT_PLUG_CNCT 0x10 +#define CURRENT_GTO_TIMEOUT 0x08 +#define CURRENT_OOB_TIMEOUT 0x04 +#define CURRENT_DEVICE_PRESENT 0x02 +#define CURRENT_OOB_ERROR 0x01 + +#define CURRENT_OOB1_ERROR (CURRENT_HOT_PLUG_CNCT | \ + CURRENT_GTO_TIMEOUT) + +#define CURRENT_OOB2_ERROR (CURRENT_HOT_PLUG_CNCT | \ + CURRENT_OOB_ERROR) + +#define DEVICE_ADDED_W_CNT (CURRENT_OOB_DONE | \ + CURRENT_HOT_PLUG_CNCT | \ + CURRENT_DEVICE_PRESENT) + +#define DEVICE_ADDED_WO_CNT (CURRENT_OOB_DONE | \ + CURRENT_DEVICE_PRESENT) + +#define DEVICE_REMOVED CURRENT_LOSS_OF_SIGNAL + +#define CURRENT_PHY_MASK (CURRENT_OOB_DONE | \ + CURRENT_LOSS_OF_SIGNAL | \ + CURRENT_SPINUP_HOLD | \ + CURRENT_HOT_PLUG_CNCT | \ + CURRENT_GTO_TIMEOUT | \ + CURRENT_DEVICE_PRESENT | \ + CURRENT_OOB_ERROR ) + +#define CURRENT_ERR_MASK (CURRENT_LOSS_OF_SIGNAL | \ + CURRENT_GTO_TIMEOUT | \ + CURRENT_OOB_TIMEOUT | \ + CURRENT_OOB_ERROR ) + +#define SPEED_MASK 0x145 + +#define SATA_SPEED_30_DIS 0x10 +#define SATA_SPEED_15_DIS 0x08 +#define SAS_SPEED_60_DIS 0x04 +#define SAS_SPEED_30_DIS 0x02 +#define SAS_SPEED_15_DIS 0x01 +#define SAS_SPEED_MASK_DEFAULT 0x00 + +#define OOB_TIMER_ENABLE 0x14D + +#define HOT_PLUG_EN 0x80 +#define RCD_EN 0x40 +#define COMTIMER_EN 0x20 +#define SNTT_EN 0x10 +#define SNLT_EN 0x04 +#define SNWT_EN 0x02 +#define ALIGN_EN 0x01 + +#define OOB_STATUS 0x14E + +#define OOB_DONE 0x80 +#define LOSS_OF_SIGNAL 0x40 /* ro */ +#define SPINUP_HOLD 0x20 +#define HOT_PLUG_CNCT 0x10 /* ro */ +#define GTO_TIMEOUT 0x08 /* ro */ +#define OOB_TIMEOUT 0x04 /* ro */ +#define DEVICE_PRESENT 0x02 /* ro */ +#define OOB_ERROR 0x01 /* ro */ + +#define OOB_STATUS_ERROR_MASK (LOSS_OF_SIGNAL | GTO_TIMEOUT | \ + OOB_TIMEOUT | OOB_ERROR) + +#define OOB_STATUS_CLEAR 0x14F + +#define OOB_DONE_CLR 0x80 +#define LOSS_OF_SIGNAL_CLR 0x40 +#define SPINUP_HOLD_CLR 0x20 +#define HOT_PLUG_CNCT_CLR 0x10 +#define GTO_TIMEOUT_CLR 0x08 +#define OOB_TIMEOUT_CLR 0x04 +#define OOB_ERROR_CLR 0x01 + +#define HOT_PLUG_DELAY 0x150 +/* In 5 ms units. 20 = 100 ms. */ +#define HOTPLUG_DELAY_TIMEOUT 20 + + +#define INT_ENABLE_2 0x15A + +#define OOB_DONE_EN 0x80 +#define LOSS_OF_SIGNAL_EN 0x40 +#define SPINUP_HOLD_EN 0x20 +#define HOT_PLUG_CNCT_EN 0x10 +#define GTO_TIMEOUT_EN 0x08 +#define OOB_TIMEOUT_EN 0x04 +#define DEVICE_PRESENT_EN 0x02 +#define OOB_ERROR_EN 0x01 + +#define PHY_CONTROL_0 0x160 + +#define PHY_LOWPWREN_TX 0x80 +#define PHY_LOWPWREN_RX 0x40 +#define SPARE_REG_160_B5 0x20 +#define OFFSET_CANCEL_RX 0x10 + +/* bits 3:2 */ +#define PHY_RXCOMCENTER_60V 0x00 +#define PHY_RXCOMCENTER_70V 0x04 +#define PHY_RXCOMCENTER_80V 0x08 +#define PHY_RXCOMCENTER_90V 0x0C +#define PHY_RXCOMCENTER_MASK 0x0C + +#define PHY_RESET 0x02 +#define SAS_DEFAULT_SEL 0x01 + +#define PHY_CONTROL_1 0x161 + +/* bits 2:0 */ +#define SATA_PHY_DETLEVEL_50mv 0x00 +#define SATA_PHY_DETLEVEL_75mv 0x01 +#define SATA_PHY_DETLEVEL_100mv 0x02 +#define SATA_PHY_DETLEVEL_125mv 0x03 +#define SATA_PHY_DETLEVEL_150mv 0x04 +#define SATA_PHY_DETLEVEL_175mv 0x05 +#define SATA_PHY_DETLEVEL_200mv 0x06 +#define SATA_PHY_DETLEVEL_225mv 0x07 +#define SATA_PHY_DETLEVEL_MASK 0x07 + +/* bits 5:3 */ +#define SAS_PHY_DETLEVEL_50mv 0x00 +#define SAS_PHY_DETLEVEL_75mv 0x08 +#define SAS_PHY_DETLEVEL_100mv 0x10 +#define SAS_PHY_DETLEVEL_125mv 0x11 +#define SAS_PHY_DETLEVEL_150mv 0x20 +#define SAS_PHY_DETLEVEL_175mv 0x21 +#define SAS_PHY_DETLEVEL_200mv 0x30 +#define SAS_PHY_DETLEVEL_225mv 0x31 +#define SAS_PHY_DETLEVEL_MASK 0x38 + +#define PHY_CONTROL_2 0x162 + +/* bits 7:5 */ +#define SATA_PHY_DRV_400mv 0x00 +#define SATA_PHY_DRV_450mv 0x20 +#define SATA_PHY_DRV_500mv 0x40 +#define SATA_PHY_DRV_550mv 0x60 +#define SATA_PHY_DRV_600mv 0x80 +#define SATA_PHY_DRV_650mv 0xA0 +#define SATA_PHY_DRV_725mv 0xC0 +#define SATA_PHY_DRV_800mv 0xE0 +#define SATA_PHY_DRV_MASK 0xE0 + +/* bits 4:3 */ +#define SATA_PREEMP_0 0x00 +#define SATA_PREEMP_1 0x08 +#define SATA_PREEMP_2 0x10 +#define SATA_PREEMP_3 0x18 +#define SATA_PREEMP_MASK 0x18 + +#define SATA_CMSH1P5 0x04 + +/* bits 1:0 */ +#define SATA_SLEW_0 0x00 +#define SATA_SLEW_1 0x01 +#define SATA_SLEW_2 0x02 +#define SATA_SLEW_3 0x03 +#define SATA_SLEW_MASK 0x03 + +#define PHY_CONTROL_3 0x163 + +/* bits 7:5 */ +#define SAS_PHY_DRV_400mv 0x00 +#define SAS_PHY_DRV_450mv 0x20 +#define SAS_PHY_DRV_500mv 0x40 +#define SAS_PHY_DRV_550mv 0x60 +#define SAS_PHY_DRV_600mv 0x80 +#define SAS_PHY_DRV_650mv 0xA0 +#define SAS_PHY_DRV_725mv 0xC0 +#define SAS_PHY_DRV_800mv 0xE0 +#define SAS_PHY_DRV_MASK 0xE0 + +/* bits 4:3 */ +#define SAS_PREEMP_0 0x00 +#define SAS_PREEMP_1 0x08 +#define SAS_PREEMP_2 0x10 +#define SAS_PREEMP_3 0x18 +#define SAS_PREEMP_MASK 0x18 + +#define SAS_CMSH1P5 0x04 + +/* bits 1:0 */ +#define SAS_SLEW_0 0x00 +#define SAS_SLEW_1 0x01 +#define SAS_SLEW_2 0x02 +#define SAS_SLEW_3 0x03 +#define SAS_SLEW_MASK 0x03 + +#define PHY_CONTROL_4 0x168 + +#define PHY_DONE_CAL_TX 0x80 +#define PHY_DONE_CAL_RX 0x40 +#define RX_TERM_LOAD_DIS 0x20 +#define TX_TERM_LOAD_DIS 0x10 +#define AUTO_TERM_CAL_DIS 0x08 +#define PHY_SIGDET_FLTR_EN 0x04 +#define OSC_FREQ 0x02 +#define PHY_START_CAL 0x01 + +/* + * HST_PCIX2 Registers, Addresss Range: (0x00-0xFC) + */ +#define PCIX_REG_BASE_ADR 0xB8040000 + +#define PCIC_VENDOR_ID 0x00 + +#define PCIC_DEVICE_ID 0x02 + +#define PCIC_COMMAND 0x04 + +#define INT_DIS 0x0400 +#define FBB_EN 0x0200 /* ro */ +#define SERR_EN 0x0100 +#define STEP_EN 0x0080 /* ro */ +#define PERR_EN 0x0040 +#define VGA_EN 0x0020 /* ro */ +#define MWI_EN 0x0010 +#define SPC_EN 0x0008 +#define MST_EN 0x0004 +#define MEM_EN 0x0002 +#define IO_EN 0x0001 + +#define PCIC_STATUS 0x06 + +#define PERR_DET 0x8000 +#define SERR_GEN 0x4000 +#define MABT_DET 0x2000 +#define TABT_DET 0x1000 +#define TABT_GEN 0x0800 +#define DPERR_DET 0x0100 +#define CAP_LIST 0x0010 +#define INT_STAT 0x0008 + +#define PCIC_DEVREV_ID 0x08 + +#define PCIC_CLASS_CODE 0x09 + +#define PCIC_CACHELINE_SIZE 0x0C + +#define PCIC_MBAR0 0x10 + +#define PCIC_MBAR0_OFFSET 0 + +#define PCIC_MBAR1 0x18 + +#define PCIC_MBAR1_OFFSET 2 + +#define PCIC_IOBAR 0x20 + +#define PCIC_IOBAR_OFFSET 4 + +#define PCIC_SUBVENDOR_ID 0x2C + +#define PCIC_SUBSYTEM_ID 0x2E + +#define PCIX_STATUS 0x44 +#define RCV_SCE 0x20000000 +#define UNEXP_SC 0x00080000 +#define SC_DISCARD 0x00040000 + +#define ECC_CTRL_STAT 0x48 +#define UNCOR_ECCERR 0x00000008 + +#define PCIC_PM_CSR 0x5C + +#define PWR_STATE_D0 0 +#define PWR_STATE_D1 1 /* not supported */ +#define PWR_STATE_D2 2 /* not supported */ +#define PWR_STATE_D3 3 + +#define PCIC_BASE1 0x6C /* internal use only */ + +#define BASE1_RSVD 0xFFFFFFF8 + +#define PCIC_BASEA 0x70 /* internal use only */ + +#define BASEA_RSVD 0xFFFFFFC0 +#define BASEA_START 0 + +#define PCIC_BASEB 0x74 /* internal use only */ + +#define BASEB_RSVD 0xFFFFFF80 +#define BASEB_IOMAP_MASK 0x7F +#define BASEB_START 0x80 + +#define PCIC_BASEC 0x78 /* internal use only */ + +#define BASEC_RSVD 0xFFFFFFFC +#define BASEC_MASK 0x03 +#define BASEC_START 0x58 + +#define PCIC_MBAR_KEY 0x7C /* internal use only */ + +#define MBAR_KEY_MASK 0xFFFFFFFF + +#define PCIC_HSTPCIX_CNTRL 0xA0 + +#define REWIND_DIS 0x0800 +#define SC_TMR_DIS 0x04000000 + +#define PCIC_MBAR0_MASK 0xA8 +#define PCIC_MBAR0_SIZE_MASK 0x1FFFE000 +#define PCIC_MBAR0_SIZE_SHIFT 13 +#define PCIC_MBAR0_SIZE(val) \ + (((val) & PCIC_MBAR0_SIZE_MASK) >> PCIC_MBAR0_SIZE_SHIFT) + +#define PCIC_FLASH_MBAR 0xB8 + +#define PCIC_INTRPT_STAT 0xD4 + +#define PCIC_TP_CTRL 0xFC + +/* + * EXSI Registers, Addresss Range: (0x00-0xFC) + */ +#define EXSI_REG_BASE_ADR REG_BASE_ADDR_EXSI + +#define EXSICNFGR (EXSI_REG_BASE_ADR + 0x00) + +#define OCMINITIALIZED 0x80000000 +#define ASIEN 0x00400000 +#define HCMODE 0x00200000 +#define PCIDEF 0x00100000 +#define COMSTOCK 0x00080000 +#define SEEPROMEND 0x00040000 +#define MSTTIMEN 0x00020000 +#define XREGEX 0x00000200 +#define NVRAMW 0x00000100 +#define NVRAMEX 0x00000080 +#define SRAMW 0x00000040 +#define SRAMEX 0x00000020 +#define FLASHW 0x00000010 +#define FLASHEX 0x00000008 +#define SEEPROMCFG 0x00000004 +#define SEEPROMTYP 0x00000002 +#define SEEPROMEX 0x00000001 + + +#define EXSICNTRLR (EXSI_REG_BASE_ADR + 0x04) + +#define MODINT_EN 0x00000001 + + +#define PMSTATR (EXSI_REG_BASE_ADR + 0x10) + +#define FLASHRST 0x00000002 +#define FLASHRDY 0x00000001 + + +#define FLCNFGR (EXSI_REG_BASE_ADR + 0x14) + +#define FLWEH_MASK 0x30000000 +#define FLWESU_MASK 0x0C000000 +#define FLWEPW_MASK 0x03F00000 +#define FLOEH_MASK 0x000C0000 +#define FLOESU_MASK 0x00030000 +#define FLOEPW_MASK 0x0000FC00 +#define FLCSH_MASK 0x00000300 +#define FLCSSU_MASK 0x000000C0 +#define FLCSPW_MASK 0x0000003F + +#define SRCNFGR (EXSI_REG_BASE_ADR + 0x18) + +#define SRWEH_MASK 0x30000000 +#define SRWESU_MASK 0x0C000000 +#define SRWEPW_MASK 0x03F00000 + +#define SROEH_MASK 0x000C0000 +#define SROESU_MASK 0x00030000 +#define SROEPW_MASK 0x0000FC00 +#define SRCSH_MASK 0x00000300 +#define SRCSSU_MASK 0x000000C0 +#define SRCSPW_MASK 0x0000003F + +#define NVCNFGR (EXSI_REG_BASE_ADR + 0x1C) + +#define NVWEH_MASK 0x30000000 +#define NVWESU_MASK 0x0C000000 +#define NVWEPW_MASK 0x03F00000 +#define NVOEH_MASK 0x000C0000 +#define NVOESU_MASK 0x00030000 +#define NVOEPW_MASK 0x0000FC00 +#define NVCSH_MASK 0x00000300 +#define NVCSSU_MASK 0x000000C0 +#define NVCSPW_MASK 0x0000003F + +#define XRCNFGR (EXSI_REG_BASE_ADR + 0x20) + +#define XRWEH_MASK 0x30000000 +#define XRWESU_MASK 0x0C000000 +#define XRWEPW_MASK 0x03F00000 +#define XROEH_MASK 0x000C0000 +#define XROESU_MASK 0x00030000 +#define XROEPW_MASK 0x0000FC00 +#define XRCSH_MASK 0x00000300 +#define XRCSSU_MASK 0x000000C0 +#define XRCSPW_MASK 0x0000003F + +#define XREGADDR (EXSI_REG_BASE_ADR + 0x24) + +#define XRADDRINCEN 0x80000000 +#define XREGADD_MASK 0x007FFFFF + + +#define XREGDATAR (EXSI_REG_BASE_ADR + 0x28) + +#define XREGDATA_MASK 0x0000FFFF + +#define GPIOOER (EXSI_REG_BASE_ADR + 0x40) + +#define GPIOODENR (EXSI_REG_BASE_ADR + 0x44) + +#define GPIOINVR (EXSI_REG_BASE_ADR + 0x48) + +#define GPIODATAOR (EXSI_REG_BASE_ADR + 0x4C) + +#define GPIODATAIR (EXSI_REG_BASE_ADR + 0x50) + +#define GPIOCNFGR (EXSI_REG_BASE_ADR + 0x54) + +#define GPIO_EXTSRC 0x00000001 + +#define SCNTRLR (EXSI_REG_BASE_ADR + 0xA0) + +#define SXFERDONE 0x00000100 +#define SXFERCNT_MASK 0x000000E0 +#define SCMDTYP_MASK 0x0000001C +#define SXFERSTART 0x00000002 +#define SXFEREN 0x00000001 + +#define SRATER (EXSI_REG_BASE_ADR + 0xA4) + +#define SADDRR (EXSI_REG_BASE_ADR + 0xA8) + +#define SADDR_MASK 0x0000FFFF + +#define SDATAOR (EXSI_REG_BASE_ADR + 0xAC) + +#define SDATAOR0 (EXSI_REG_BASE_ADR + 0xAC) +#define SDATAOR1 (EXSI_REG_BASE_ADR + 0xAD) +#define SDATAOR2 (EXSI_REG_BASE_ADR + 0xAE) +#define SDATAOR3 (EXSI_REG_BASE_ADR + 0xAF) + +#define SDATAIR (EXSI_REG_BASE_ADR + 0xB0) + +#define SDATAIR0 (EXSI_REG_BASE_ADR + 0xB0) +#define SDATAIR1 (EXSI_REG_BASE_ADR + 0xB1) +#define SDATAIR2 (EXSI_REG_BASE_ADR + 0xB2) +#define SDATAIR3 (EXSI_REG_BASE_ADR + 0xB3) + +#define ASISTAT0R (EXSI_REG_BASE_ADR + 0xD0) +#define ASIFMTERR 0x00000400 +#define ASISEECHKERR 0x00000200 +#define ASIERR 0x00000100 + +#define ASISTAT1R (EXSI_REG_BASE_ADR + 0xD4) +#define CHECKSUM_MASK 0x0000FFFF + +#define ASIERRADDR (EXSI_REG_BASE_ADR + 0xD8) +#define ASIERRDATAR (EXSI_REG_BASE_ADR + 0xDC) +#define ASIERRSTATR (EXSI_REG_BASE_ADR + 0xE0) +#define CPI2ASIBYTECNT_MASK 0x00070000 +#define CPI2ASIBYTEEN_MASK 0x0000F000 +#define CPI2ASITARGERR_MASK 0x00000F00 +#define CPI2ASITARGMID_MASK 0x000000F0 +#define CPI2ASIMSTERR_MASK 0x0000000F + +/* + * XSRAM, External SRAM (DWord and any BE pattern accessible) + */ +#define XSRAM_REG_BASE_ADDR 0xB8100000 +#define XSRAM_SIZE 0x100000 + +/* + * NVRAM Registers, Address Range: (0x00000 - 0x3FFFF). + */ +#define NVRAM_REG_BASE_ADR 0xBF800000 +#define NVRAM_MAX_BASE_ADR 0x003FFFFF + +/* OCM base address */ +#define OCM_BASE_ADDR 0xA0000000 +#define OCM_MAX_SIZE 0x20000 + +/* + * Sequencers (Central and Link) Scratch RAM page definitions. + */ + +/* + * The Central Management Sequencer (CSEQ) Scratch Memory is a 1024 + * byte memory. It is dword accessible and has byte parity + * protection. The CSEQ accesses it in 32 byte windows, either as mode + * dependent or mode independent memory. Each mode has 96 bytes, + * (three 32 byte pages 0-2, not contiguous), leaving 128 bytes of + * Mode Independent memory (four 32 byte pages 3-7). Note that mode + * dependent scratch memory, Mode 8, page 0-3 overlaps mode + * independent scratch memory, pages 0-3. + * - 896 bytes of mode dependent scratch, 96 bytes per Modes 0-7, and + * 128 bytes in mode 8, + * - 259 bytes of mode independent scratch, common to modes 0-15. + * + * Sequencer scratch RAM is 1024 bytes. This scratch memory is + * divided into mode dependent and mode independent scratch with this + * memory further subdivided into pages of size 32 bytes. There are 5 + * pages (160 bytes) of mode independent scratch and 3 pages of + * dependent scratch memory for modes 0-7 (768 bytes). Mode 8 pages + * 0-2 dependent scratch overlap with pages 0-2 of mode independent + * scratch memory. + * + * The host accesses this scratch in a different manner from the + * central sequencer. The sequencer has to use CSEQ registers CSCRPAGE + * and CMnSCRPAGE to access the scratch memory. A flat mapping of the + * scratch memory is avaliable for software convenience and to prevent + * corruption while the sequencer is running. This memory is mapped + * onto addresses 800h - BFFh, total of 400h bytes. + * + * These addresses are mapped as follows: + * + * 800h-83Fh Mode Dependent Scratch Mode 0 Pages 0-1 + * 840h-87Fh Mode Dependent Scratch Mode 1 Pages 0-1 + * 880h-8BFh Mode Dependent Scratch Mode 2 Pages 0-1 + * 8C0h-8FFh Mode Dependent Scratch Mode 3 Pages 0-1 + * 900h-93Fh Mode Dependent Scratch Mode 4 Pages 0-1 + * 940h-97Fh Mode Dependent Scratch Mode 5 Pages 0-1 + * 980h-9BFh Mode Dependent Scratch Mode 6 Pages 0-1 + * 9C0h-9FFh Mode Dependent Scratch Mode 7 Pages 0-1 + * A00h-A5Fh Mode Dependent Scratch Mode 8 Pages 0-2 + * Mode Independent Scratch Pages 0-2 + * A60h-A7Fh Mode Dependent Scratch Mode 8 Page 3 + * Mode Independent Scratch Page 3 + * A80h-AFFh Mode Independent Scratch Pages 4-7 + * B00h-B1Fh Mode Dependent Scratch Mode 0 Page 2 + * B20h-B3Fh Mode Dependent Scratch Mode 1 Page 2 + * B40h-B5Fh Mode Dependent Scratch Mode 2 Page 2 + * B60h-B7Fh Mode Dependent Scratch Mode 3 Page 2 + * B80h-B9Fh Mode Dependent Scratch Mode 4 Page 2 + * BA0h-BBFh Mode Dependent Scratch Mode 5 Page 2 + * BC0h-BDFh Mode Dependent Scratch Mode 6 Page 2 + * BE0h-BFFh Mode Dependent Scratch Mode 7 Page 2 + */ + +/* General macros */ +#define CSEQ_PAGE_SIZE 32 /* Scratch page size (in bytes) */ + +/* All macros start with offsets from base + 0x800 (CMAPPEDSCR). + * Mode dependent scratch page 0, mode 0. + * For modes 1-7 you have to do arithmetic. */ +#define CSEQ_LRM_SAVE_SINDEX (CMAPPEDSCR + 0x0000) +#define CSEQ_LRM_SAVE_SCBPTR (CMAPPEDSCR + 0x0002) +#define CSEQ_Q_LINK_HEAD (CMAPPEDSCR + 0x0004) +#define CSEQ_Q_LINK_TAIL (CMAPPEDSCR + 0x0006) +#define CSEQ_LRM_SAVE_SCRPAGE (CMAPPEDSCR + 0x0008) + +/* Mode dependent scratch page 0 mode 8 macros. */ +#define CSEQ_RET_ADDR (CMAPPEDSCR + 0x0200) +#define CSEQ_RET_SCBPTR (CMAPPEDSCR + 0x0202) +#define CSEQ_SAVE_SCBPTR (CMAPPEDSCR + 0x0204) +#define CSEQ_EMPTY_TRANS_CTX (CMAPPEDSCR + 0x0206) +#define CSEQ_RESP_LEN (CMAPPEDSCR + 0x0208) +#define CSEQ_TMF_SCBPTR (CMAPPEDSCR + 0x020A) +#define CSEQ_GLOBAL_PREV_SCB (CMAPPEDSCR + 0x020C) +#define CSEQ_GLOBAL_HEAD (CMAPPEDSCR + 0x020E) +#define CSEQ_CLEAR_LU_HEAD (CMAPPEDSCR + 0x0210) +#define CSEQ_TMF_OPCODE (CMAPPEDSCR + 0x0212) +#define CSEQ_SCRATCH_FLAGS (CMAPPEDSCR + 0x0213) +#define CSEQ_HSB_SITE (CMAPPEDSCR + 0x021A) +#define CSEQ_FIRST_INV_SCB_SITE (CMAPPEDSCR + 0x021C) +#define CSEQ_FIRST_INV_DDB_SITE (CMAPPEDSCR + 0x021E) + +/* Mode dependent scratch page 1 mode 8 macros. */ +#define CSEQ_LUN_TO_CLEAR (CMAPPEDSCR + 0x0220) +#define CSEQ_LUN_TO_CHECK (CMAPPEDSCR + 0x0228) + +/* Mode dependent scratch page 2 mode 8 macros */ +#define CSEQ_HQ_NEW_POINTER (CMAPPEDSCR + 0x0240) +#define CSEQ_HQ_DONE_BASE (CMAPPEDSCR + 0x0248) +#define CSEQ_HQ_DONE_POINTER (CMAPPEDSCR + 0x0250) +#define CSEQ_HQ_DONE_PASS (CMAPPEDSCR + 0x0254) + +/* Mode independent scratch page 4 macros. */ +#define CSEQ_Q_EXE_HEAD (CMAPPEDSCR + 0x0280) +#define CSEQ_Q_EXE_TAIL (CMAPPEDSCR + 0x0282) +#define CSEQ_Q_DONE_HEAD (CMAPPEDSCR + 0x0284) +#define CSEQ_Q_DONE_TAIL (CMAPPEDSCR + 0x0286) +#define CSEQ_Q_SEND_HEAD (CMAPPEDSCR + 0x0288) +#define CSEQ_Q_SEND_TAIL (CMAPPEDSCR + 0x028A) +#define CSEQ_Q_DMA2CHIM_HEAD (CMAPPEDSCR + 0x028C) +#define CSEQ_Q_DMA2CHIM_TAIL (CMAPPEDSCR + 0x028E) +#define CSEQ_Q_COPY_HEAD (CMAPPEDSCR + 0x0290) +#define CSEQ_Q_COPY_TAIL (CMAPPEDSCR + 0x0292) +#define CSEQ_REG0 (CMAPPEDSCR + 0x0294) +#define CSEQ_REG1 (CMAPPEDSCR + 0x0296) +#define CSEQ_REG2 (CMAPPEDSCR + 0x0298) +#define CSEQ_LINK_CTL_Q_MAP (CMAPPEDSCR + 0x029C) +#define CSEQ_MAX_CSEQ_MODE (CMAPPEDSCR + 0x029D) +#define CSEQ_FREE_LIST_HACK_COUNT (CMAPPEDSCR + 0x029E) + +/* Mode independent scratch page 5 macros. */ +#define CSEQ_EST_NEXUS_REQ_QUEUE (CMAPPEDSCR + 0x02A0) +#define CSEQ_EST_NEXUS_REQ_COUNT (CMAPPEDSCR + 0x02A8) +#define CSEQ_Q_EST_NEXUS_HEAD (CMAPPEDSCR + 0x02B0) +#define CSEQ_Q_EST_NEXUS_TAIL (CMAPPEDSCR + 0x02B2) +#define CSEQ_NEED_EST_NEXUS_SCB (CMAPPEDSCR + 0x02B4) +#define CSEQ_EST_NEXUS_REQ_HEAD (CMAPPEDSCR + 0x02B6) +#define CSEQ_EST_NEXUS_REQ_TAIL (CMAPPEDSCR + 0x02B7) +#define CSEQ_EST_NEXUS_SCB_OFFSET (CMAPPEDSCR + 0x02B8) + +/* Mode independent scratch page 6 macros. */ +#define CSEQ_INT_ROUT_RET_ADDR0 (CMAPPEDSCR + 0x02C0) +#define CSEQ_INT_ROUT_RET_ADDR1 (CMAPPEDSCR + 0x02C2) +#define CSEQ_INT_ROUT_SCBPTR (CMAPPEDSCR + 0x02C4) +#define CSEQ_INT_ROUT_MODE (CMAPPEDSCR + 0x02C6) +#define CSEQ_ISR_SCRATCH_FLAGS (CMAPPEDSCR + 0x02C7) +#define CSEQ_ISR_SAVE_SINDEX (CMAPPEDSCR + 0x02C8) +#define CSEQ_ISR_SAVE_DINDEX (CMAPPEDSCR + 0x02CA) +#define CSEQ_Q_MONIRTT_HEAD (CMAPPEDSCR + 0x02D0) +#define CSEQ_Q_MONIRTT_TAIL (CMAPPEDSCR + 0x02D2) +#define CSEQ_FREE_SCB_MASK (CMAPPEDSCR + 0x02D5) +#define CSEQ_BUILTIN_FREE_SCB_HEAD (CMAPPEDSCR + 0x02D6) +#define CSEQ_BUILTIN_FREE_SCB_TAIL (CMAPPEDSCR + 0x02D8) +#define CSEQ_EXTENDED_FREE_SCB_HEAD (CMAPPEDSCR + 0x02DA) +#define CSEQ_EXTENDED_FREE_SCB_TAIL (CMAPPEDSCR + 0x02DC) + +/* Mode independent scratch page 7 macros. */ +#define CSEQ_EMPTY_REQ_QUEUE (CMAPPEDSCR + 0x02E0) +#define CSEQ_EMPTY_REQ_COUNT (CMAPPEDSCR + 0x02E8) +#define CSEQ_Q_EMPTY_HEAD (CMAPPEDSCR + 0x02F0) +#define CSEQ_Q_EMPTY_TAIL (CMAPPEDSCR + 0x02F2) +#define CSEQ_NEED_EMPTY_SCB (CMAPPEDSCR + 0x02F4) +#define CSEQ_EMPTY_REQ_HEAD (CMAPPEDSCR + 0x02F6) +#define CSEQ_EMPTY_REQ_TAIL (CMAPPEDSCR + 0x02F7) +#define CSEQ_EMPTY_SCB_OFFSET (CMAPPEDSCR + 0x02F8) +#define CSEQ_PRIMITIVE_DATA (CMAPPEDSCR + 0x02FA) +#define CSEQ_TIMEOUT_CONST (CMAPPEDSCR + 0x02FC) + +/*************************************************************************** +* Link m Sequencer scratch RAM is 512 bytes. +* This scratch memory is divided into mode dependent and mode +* independent scratch with this memory further subdivided into +* pages of size 32 bytes. There are 4 pages (128 bytes) of +* mode independent scratch and 4 pages of dependent scratch +* memory for modes 0-2 (384 bytes). +* +* The host accesses this scratch in a different manner from the +* link sequencer. The sequencer has to use LSEQ registers +* LmSCRPAGE and LmMnSCRPAGE to access the scratch memory. A flat +* mapping of the scratch memory is avaliable for software +* convenience and to prevent corruption while the sequencer is +* running. This memory is mapped onto addresses 800h - 9FFh. +* +* These addresses are mapped as follows: +* +* 800h-85Fh Mode Dependent Scratch Mode 0 Pages 0-2 +* 860h-87Fh Mode Dependent Scratch Mode 0 Page 3 +* Mode Dependent Scratch Mode 5 Page 0 +* 880h-8DFh Mode Dependent Scratch Mode 1 Pages 0-2 +* 8E0h-8FFh Mode Dependent Scratch Mode 1 Page 3 +* Mode Dependent Scratch Mode 5 Page 1 +* 900h-95Fh Mode Dependent Scratch Mode 2 Pages 0-2 +* 960h-97Fh Mode Dependent Scratch Mode 2 Page 3 +* Mode Dependent Scratch Mode 5 Page 2 +* 980h-9DFh Mode Independent Scratch Pages 0-3 +* 9E0h-9FFh Mode Independent Scratch Page 3 +* Mode Dependent Scratch Mode 5 Page 3 +* +****************************************************************************/ +/* General macros */ +#define LSEQ_MODE_SCRATCH_SIZE 0x80 /* Size of scratch RAM per mode */ +#define LSEQ_PAGE_SIZE 0x20 /* Scratch page size (in bytes) */ +#define LSEQ_MODE5_PAGE0_OFFSET 0x60 + +/* Common mode dependent scratch page 0 macros for modes 0,1,2, and 5 */ +/* Indexed using LSEQ_MODE_SCRATCH_SIZE * mode, for modes 0,1,2. */ +#define LmSEQ_RET_ADDR(LinkNum) (LmSCRATCH(LinkNum) + 0x0000) +#define LmSEQ_REG0_MODE(LinkNum) (LmSCRATCH(LinkNum) + 0x0002) +#define LmSEQ_MODE_FLAGS(LinkNum) (LmSCRATCH(LinkNum) + 0x0004) + +/* Mode flag macros (byte 0) */ +#define SAS_SAVECTX_OCCURRED 0x80 +#define SAS_OOBSVC_OCCURRED 0x40 +#define SAS_OOB_DEVICE_PRESENT 0x20 +#define SAS_CFGHDR_OCCURRED 0x10 +#define SAS_RCV_INTS_ARE_DISABLED 0x08 +#define SAS_OOB_HOT_PLUG_CNCT 0x04 +#define SAS_AWAIT_OPEN_CONNECTION 0x02 +#define SAS_CFGCMPLT_OCCURRED 0x01 + +/* Mode flag macros (byte 1) */ +#define SAS_RLSSCB_OCCURRED 0x80 +#define SAS_FORCED_HEADER_MISS 0x40 + +#define LmSEQ_RET_ADDR2(LinkNum) (LmSCRATCH(LinkNum) + 0x0006) +#define LmSEQ_RET_ADDR1(LinkNum) (LmSCRATCH(LinkNum) + 0x0008) +#define LmSEQ_OPCODE_TO_CSEQ(LinkNum) (LmSCRATCH(LinkNum) + 0x000B) +#define LmSEQ_DATA_TO_CSEQ(LinkNum) (LmSCRATCH(LinkNum) + 0x000C) + +/* Mode dependent scratch page 0 macros for mode 0 (non-common) */ +/* Absolute offsets */ +#define LmSEQ_FIRST_INV_DDB_SITE(LinkNum) (LmSCRATCH(LinkNum) + 0x000E) +#define LmSEQ_EMPTY_TRANS_CTX(LinkNum) (LmSCRATCH(LinkNum) + 0x0010) +#define LmSEQ_RESP_LEN(LinkNum) (LmSCRATCH(LinkNum) + 0x0012) +#define LmSEQ_FIRST_INV_SCB_SITE(LinkNum) (LmSCRATCH(LinkNum) + 0x0014) +#define LmSEQ_INTEN_SAVE(LinkNum) (LmSCRATCH(LinkNum) + 0x0016) +#define LmSEQ_LINK_RST_FRM_LEN(LinkNum) (LmSCRATCH(LinkNum) + 0x001A) +#define LmSEQ_LINK_RST_PROTOCOL(LinkNum) (LmSCRATCH(LinkNum) + 0x001B) +#define LmSEQ_RESP_STATUS(LinkNum) (LmSCRATCH(LinkNum) + 0x001C) +#define LmSEQ_LAST_LOADED_SGE(LinkNum) (LmSCRATCH(LinkNum) + 0x001D) +#define LmSEQ_SAVE_SCBPTR(LinkNum) (LmSCRATCH(LinkNum) + 0x001E) + +/* Mode dependent scratch page 0 macros for mode 1 (non-common) */ +/* Absolute offsets */ +#define LmSEQ_Q_XMIT_HEAD(LinkNum) (LmSCRATCH(LinkNum) + 0x008E) +#define LmSEQ_M1_EMPTY_TRANS_CTX(LinkNum) (LmSCRATCH(LinkNum) + 0x0090) +#define LmSEQ_INI_CONN_TAG(LinkNum) (LmSCRATCH(LinkNum) + 0x0092) +#define LmSEQ_FAILED_OPEN_STATUS(LinkNum) (LmSCRATCH(LinkNum) + 0x009A) +#define LmSEQ_XMIT_REQUEST_TYPE(LinkNum) (LmSCRATCH(LinkNum) + 0x009B) +#define LmSEQ_M1_RESP_STATUS(LinkNum) (LmSCRATCH(LinkNum) + 0x009C) +#define LmSEQ_M1_LAST_LOADED_SGE(LinkNum) (LmSCRATCH(LinkNum) + 0x009D) +#define LmSEQ_M1_SAVE_SCBPTR(LinkNum) (LmSCRATCH(LinkNum) + 0x009E) + +/* Mode dependent scratch page 0 macros for mode 2 (non-common) */ +#define LmSEQ_PORT_COUNTER(LinkNum) (LmSCRATCH(LinkNum) + 0x010E) +#define LmSEQ_PM_TABLE_PTR(LinkNum) (LmSCRATCH(LinkNum) + 0x0110) +#define LmSEQ_SATA_INTERLOCK_TMR_SAVE(LinkNum) (LmSCRATCH(LinkNum) + 0x0112) +#define LmSEQ_IP_BITL(LinkNum) (LmSCRATCH(LinkNum) + 0x0114) +#define LmSEQ_COPY_SMP_CONN_TAG(LinkNum) (LmSCRATCH(LinkNum) + 0x0116) +#define LmSEQ_P0M2_OFFS1AH(LinkNum) (LmSCRATCH(LinkNum) + 0x011A) + +/* Mode dependent scratch page 0 macros for modes 4/5 (non-common) */ +/* Absolute offsets */ +#define LmSEQ_SAVED_OOB_STATUS(LinkNum) (LmSCRATCH(LinkNum) + 0x006E) +#define LmSEQ_SAVED_OOB_MODE(LinkNum) (LmSCRATCH(LinkNum) + 0x006F) +#define LmSEQ_Q_LINK_HEAD(LinkNum) (LmSCRATCH(LinkNum) + 0x0070) +#define LmSEQ_LINK_RST_ERR(LinkNum) (LmSCRATCH(LinkNum) + 0x0072) +#define LmSEQ_SAVED_OOB_SIGNALS(LinkNum) (LmSCRATCH(LinkNum) + 0x0073) +#define LmSEQ_SAS_RESET_MODE(LinkNum) (LmSCRATCH(LinkNum) + 0x0074) +#define LmSEQ_LINK_RESET_RETRY_COUNT(LinkNum) (LmSCRATCH(LinkNum) + 0x0075) +#define LmSEQ_NUM_LINK_RESET_RETRIES(LinkNum) (LmSCRATCH(LinkNum) + 0x0076) +#define LmSEQ_OOB_INT_ENABLES(LinkNum) (LmSCRATCH(LinkNum) + 0x007A) +#define LmSEQ_NOTIFY_TIMER_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x007C) +#define LmSEQ_NOTIFY_TIMER_DOWN_COUNT(LinkNum) (LmSCRATCH(LinkNum) + 0x007E) + +/* Mode dependent scratch page 1, mode 0 and mode 1 */ +#define LmSEQ_SG_LIST_PTR_ADDR0(LinkNum) (LmSCRATCH(LinkNum) + 0x0020) +#define LmSEQ_SG_LIST_PTR_ADDR1(LinkNum) (LmSCRATCH(LinkNum) + 0x0030) +#define LmSEQ_M1_SG_LIST_PTR_ADDR0(LinkNum) (LmSCRATCH(LinkNum) + 0x00A0) +#define LmSEQ_M1_SG_LIST_PTR_ADDR1(LinkNum) (LmSCRATCH(LinkNum) + 0x00B0) + +/* Mode dependent scratch page 1 macros for mode 2 */ +/* Absolute offsets */ +#define LmSEQ_INVALID_DWORD_COUNT(LinkNum) (LmSCRATCH(LinkNum) + 0x0120) +#define LmSEQ_DISPARITY_ERROR_COUNT(LinkNum) (LmSCRATCH(LinkNum) + 0x0124) +#define LmSEQ_LOSS_OF_SYNC_COUNT(LinkNum) (LmSCRATCH(LinkNum) + 0x0128) + +/* Mode dependent scratch page 1 macros for mode 4/5 */ +#define LmSEQ_FRAME_TYPE_MASK(LinkNum) (LmSCRATCH(LinkNum) + 0x00E0) +#define LmSEQ_HASHED_DEST_ADDR_MASK(LinkNum) (LmSCRATCH(LinkNum) + 0x00E1) +#define LmSEQ_HASHED_SRC_ADDR_MASK_PRINT(LinkNum) (LmSCRATCH(LinkNum) + 0x00E4) +#define LmSEQ_HASHED_SRC_ADDR_MASK(LinkNum) (LmSCRATCH(LinkNum) + 0x00E5) +#define LmSEQ_NUM_FILL_BYTES_MASK(LinkNum) (LmSCRATCH(LinkNum) + 0x00EB) +#define LmSEQ_TAG_MASK(LinkNum) (LmSCRATCH(LinkNum) + 0x00F0) +#define LmSEQ_TARGET_PORT_XFER_TAG(LinkNum) (LmSCRATCH(LinkNum) + 0x00F2) +#define LmSEQ_DATA_OFFSET(LinkNum) (LmSCRATCH(LinkNum) + 0x00F4) + +/* Mode dependent scratch page 2 macros for mode 0 */ +/* Absolute offsets */ +#define LmSEQ_SMP_RCV_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0040) +#define LmSEQ_DEVICE_BITS(LinkNum) (LmSCRATCH(LinkNum) + 0x005B) +#define LmSEQ_SDB_DDB(LinkNum) (LmSCRATCH(LinkNum) + 0x005C) +#define LmSEQ_SDB_NUM_TAGS(LinkNum) (LmSCRATCH(LinkNum) + 0x005E) +#define LmSEQ_SDB_CURR_TAG(LinkNum) (LmSCRATCH(LinkNum) + 0x005F) + +/* Mode dependent scratch page 2 macros for mode 1 */ +/* Absolute offsets */ +/* byte 0 bits 1-0 are domain select. */ +#define LmSEQ_TX_ID_ADDR_FRAME(LinkNum) (LmSCRATCH(LinkNum) + 0x00C0) +#define LmSEQ_OPEN_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x00C8) +#define LmSEQ_SRST_AS_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x00CC) +#define LmSEQ_LAST_LOADED_SG_EL(LinkNum) (LmSCRATCH(LinkNum) + 0x00D4) + +/* Mode dependent scratch page 2 macros for mode 2 */ +/* Absolute offsets */ +#define LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0140) +#define LmSEQ_CLOSE_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0144) +#define LmSEQ_BREAK_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0148) +#define LmSEQ_DWS_RESET_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x014C) +#define LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(LinkNum) \ + (LmSCRATCH(LinkNum) + 0x0150) +#define LmSEQ_MCTL_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0154) + +/* Mode dependent scratch page 2 macros for mode 5 */ +#define LmSEQ_COMINIT_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0160) +#define LmSEQ_RCV_ID_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0164) +#define LmSEQ_RCV_FIS_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0168) +#define LmSEQ_DEV_PRES_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x016C) + +/* Mode dependent scratch page 3 macros for modes 0 and 1 */ +/* None defined */ + +/* Mode dependent scratch page 3 macros for modes 2 and 5 */ +/* None defined */ + +/* Mode Independent Scratch page 0 macros. */ +#define LmSEQ_Q_TGTXFR_HEAD(LinkNum) (LmSCRATCH(LinkNum) + 0x0180) +#define LmSEQ_Q_TGTXFR_TAIL(LinkNum) (LmSCRATCH(LinkNum) + 0x0182) +#define LmSEQ_LINK_NUMBER(LinkNum) (LmSCRATCH(LinkNum) + 0x0186) +#define LmSEQ_SCRATCH_FLAGS(LinkNum) (LmSCRATCH(LinkNum) + 0x0187) +/* + * Currently only bit 0, SAS_DWSAQD, is used. + */ +#define SAS_DWSAQD 0x01 /* + * DWSSTATUS: DWSAQD + * bit las read in ISR. + */ +#define LmSEQ_CONNECTION_STATE(LinkNum) (LmSCRATCH(LinkNum) + 0x0188) +/* Connection states (byte 0) */ +#define SAS_WE_OPENED_CS 0x01 +#define SAS_DEVICE_OPENED_CS 0x02 +#define SAS_WE_SENT_DONE_CS 0x04 +#define SAS_DEVICE_SENT_DONE_CS 0x08 +#define SAS_WE_SENT_CLOSE_CS 0x10 +#define SAS_DEVICE_SENT_CLOSE_CS 0x20 +#define SAS_WE_SENT_BREAK_CS 0x40 +#define SAS_DEVICE_SENT_BREAK_CS 0x80 +/* Connection states (byte 1) */ +#define SAS_OPN_TIMEOUT_OR_OPN_RJCT_CS 0x01 +#define SAS_AIP_RECEIVED_CS 0x02 +#define SAS_CREDIT_TIMEOUT_OCCURRED_CS 0x04 +#define SAS_ACKNAK_TIMEOUT_OCCURRED_CS 0x08 +#define SAS_SMPRSP_TIMEOUT_OCCURRED_CS 0x10 +#define SAS_DONE_TIMEOUT_OCCURRED_CS 0x20 +/* Connection states (byte 2) */ +#define SAS_SMP_RESPONSE_RECEIVED_CS 0x01 +#define SAS_INTLK_TIMEOUT_OCCURRED_CS 0x02 +#define SAS_DEVICE_SENT_DMAT_CS 0x04 +#define SAS_DEVICE_SENT_SYNCSRST_CS 0x08 +#define SAS_CLEARING_AFFILIATION_CS 0x20 +#define SAS_RXTASK_ACTIVE_CS 0x40 +#define SAS_TXTASK_ACTIVE_CS 0x80 +/* Connection states (byte 3) */ +#define SAS_PHY_LOSS_OF_SIGNAL_CS 0x01 +#define SAS_DWS_TIMER_EXPIRED_CS 0x02 +#define SAS_LINK_RESET_NOT_COMPLETE_CS 0x04 +#define SAS_PHY_DISABLED_CS 0x08 +#define SAS_LINK_CTL_TASK_ACTIVE_CS 0x10 +#define SAS_PHY_EVENT_TASK_ACTIVE_CS 0x20 +#define SAS_DEVICE_SENT_ID_FRAME_CS 0x40 +#define SAS_DEVICE_SENT_REG_FIS_CS 0x40 +#define SAS_DEVICE_SENT_HARD_RESET_CS 0x80 +#define SAS_PHY_IS_DOWN_FLAGS (SAS_PHY_LOSS_OF_SIGNAL_CS|\ + SAS_DWS_TIMER_EXPIRED_CS |\ + SAS_LINK_RESET_NOT_COMPLETE_CS|\ + SAS_PHY_DISABLED_CS) + +#define SAS_LINK_CTL_PHY_EVENT_FLAGS (SAS_LINK_CTL_TASK_ACTIVE_CS |\ + SAS_PHY_EVENT_TASK_ACTIVE_CS |\ + SAS_DEVICE_SENT_ID_FRAME_CS |\ + SAS_DEVICE_SENT_HARD_RESET_CS) + +#define LmSEQ_CONCTL(LinkNum) (LmSCRATCH(LinkNum) + 0x018C) +#define LmSEQ_CONSTAT(LinkNum) (LmSCRATCH(LinkNum) + 0x018E) +#define LmSEQ_CONNECTION_MODES(LinkNum) (LmSCRATCH(LinkNum) + 0x018F) +#define LmSEQ_REG1_ISR(LinkNum) (LmSCRATCH(LinkNum) + 0x0192) +#define LmSEQ_REG2_ISR(LinkNum) (LmSCRATCH(LinkNum) + 0x0194) +#define LmSEQ_REG3_ISR(LinkNum) (LmSCRATCH(LinkNum) + 0x0196) +#define LmSEQ_REG0_ISR(LinkNum) (LmSCRATCH(LinkNum) + 0x0198) + +/* Mode independent scratch page 1 macros. */ +#define LmSEQ_EST_NEXUS_SCBPTR0(LinkNum) (LmSCRATCH(LinkNum) + 0x01A0) +#define LmSEQ_EST_NEXUS_SCBPTR1(LinkNum) (LmSCRATCH(LinkNum) + 0x01A2) +#define LmSEQ_EST_NEXUS_SCBPTR2(LinkNum) (LmSCRATCH(LinkNum) + 0x01A4) +#define LmSEQ_EST_NEXUS_SCBPTR3(LinkNum) (LmSCRATCH(LinkNum) + 0x01A6) +#define LmSEQ_EST_NEXUS_SCB_OPCODE0(LinkNum) (LmSCRATCH(LinkNum) + 0x01A8) +#define LmSEQ_EST_NEXUS_SCB_OPCODE1(LinkNum) (LmSCRATCH(LinkNum) + 0x01A9) +#define LmSEQ_EST_NEXUS_SCB_OPCODE2(LinkNum) (LmSCRATCH(LinkNum) + 0x01AA) +#define LmSEQ_EST_NEXUS_SCB_OPCODE3(LinkNum) (LmSCRATCH(LinkNum) + 0x01AB) +#define LmSEQ_EST_NEXUS_SCB_HEAD(LinkNum) (LmSCRATCH(LinkNum) + 0x01AC) +#define LmSEQ_EST_NEXUS_SCB_TAIL(LinkNum) (LmSCRATCH(LinkNum) + 0x01AD) +#define LmSEQ_EST_NEXUS_BUF_AVAIL(LinkNum) (LmSCRATCH(LinkNum) + 0x01AE) +#define LmSEQ_TIMEOUT_CONST(LinkNum) (LmSCRATCH(LinkNum) + 0x01B8) +#define LmSEQ_ISR_SAVE_SINDEX(LinkNum) (LmSCRATCH(LinkNum) + 0x01BC) +#define LmSEQ_ISR_SAVE_DINDEX(LinkNum) (LmSCRATCH(LinkNum) + 0x01BE) + +/* Mode independent scratch page 2 macros. */ +#define LmSEQ_EMPTY_SCB_PTR0(LinkNum) (LmSCRATCH(LinkNum) + 0x01C0) +#define LmSEQ_EMPTY_SCB_PTR1(LinkNum) (LmSCRATCH(LinkNum) + 0x01C2) +#define LmSEQ_EMPTY_SCB_PTR2(LinkNum) (LmSCRATCH(LinkNum) + 0x01C4) +#define LmSEQ_EMPTY_SCB_PTR3(LinkNum) (LmSCRATCH(LinkNum) + 0x01C6) +#define LmSEQ_EMPTY_SCB_OPCD0(LinkNum) (LmSCRATCH(LinkNum) + 0x01C8) +#define LmSEQ_EMPTY_SCB_OPCD1(LinkNum) (LmSCRATCH(LinkNum) + 0x01C9) +#define LmSEQ_EMPTY_SCB_OPCD2(LinkNum) (LmSCRATCH(LinkNum) + 0x01CA) +#define LmSEQ_EMPTY_SCB_OPCD3(LinkNum) (LmSCRATCH(LinkNum) + 0x01CB) +#define LmSEQ_EMPTY_SCB_HEAD(LinkNum) (LmSCRATCH(LinkNum) + 0x01CC) +#define LmSEQ_EMPTY_SCB_TAIL(LinkNum) (LmSCRATCH(LinkNum) + 0x01CD) +#define LmSEQ_EMPTY_BUFS_AVAIL(LinkNum) (LmSCRATCH(LinkNum) + 0x01CE) +#define LmSEQ_ATA_SCR_REGS(LinkNum) (LmSCRATCH(LinkNum) + 0x01D4) + +/* Mode independent scratch page 3 macros. */ +#define LmSEQ_DEV_PRES_TMR_TOUT_CONST(LinkNum) (LmSCRATCH(LinkNum) + 0x01E0) +#define LmSEQ_SATA_INTERLOCK_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01E4) +#define LmSEQ_STP_SHUTDOWN_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01E8) +#define LmSEQ_SRST_ASSERT_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01EC) +#define LmSEQ_RCV_FIS_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01F0) +#define LmSEQ_ONE_MILLISEC_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01F4) +#define LmSEQ_TEN_MS_COMINIT_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01F8) +#define LmSEQ_SMP_RCV_TIMEOUT(LinkNum) (LmSCRATCH(LinkNum) + 0x01FC) + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_sas.h b/drivers/scsi/aic94xx/aic94xx_sas.h new file mode 100644 index 000000000000..64d231712345 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_sas.h @@ -0,0 +1,785 @@ +/* + * Aic94xx SAS/SATA driver SAS definitions and hardware interface header file. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _AIC94XX_SAS_H_ +#define _AIC94XX_SAS_H_ + +#include + +/* ---------- DDBs ---------- */ +/* DDBs are device descriptor blocks which describe a device in the + * domain that this sequencer can maintain low-level connections for + * us. They are be 64 bytes. + */ + +struct asd_ddb_ssp_smp_target_port { + u8 conn_type; /* byte 0 */ +#define DDB_TP_CONN_TYPE 0x81 /* Initiator port and addr frame type 0x01 */ + + u8 conn_rate; + __be16 init_conn_tag; + u8 dest_sas_addr[8]; /* bytes 4-11 */ + + __le16 send_queue_head; + u8 sq_suspended; + u8 ddb_type; /* DDB_TYPE_TARGET */ +#define DDB_TYPE_UNUSED 0xFF +#define DDB_TYPE_TARGET 0xFE +#define DDB_TYPE_INITIATOR 0xFD +#define DDB_TYPE_PM_PORT 0xFC + + __le16 _r_a; + __be16 awt_def; + + u8 compat_features; /* byte 20 */ + u8 pathway_blocked_count; + __be16 arb_wait_time; + __be32 more_compat_features; /* byte 24 */ + + u8 conn_mask; + u8 flags; /* concurrent conn:2,2 and open:0(1) */ +#define CONCURRENT_CONN_SUPP 0x04 +#define OPEN_REQUIRED 0x01 + + u16 _r_b; + __le16 exec_queue_tail; + __le16 send_queue_tail; + __le16 sister_ddb; + + __le16 _r_c; + + u8 max_concurrent_conn; + u8 num_concurrent_conn; + u8 num_contexts; + + u8 _r_d; + + __le16 active_task_count; + + u8 _r_e[9]; + + u8 itnl_reason; /* I_T nexus loss reason */ + + __le16 _r_f; + + __le16 itnl_timeout; +#define ITNL_TIMEOUT_CONST 0x7D0 /* 2 seconds */ + + __le32 itnl_timestamp; +} __attribute__ ((packed)); + +struct asd_ddb_stp_sata_target_port { + u8 conn_type; /* byte 0 */ + u8 conn_rate; + __be16 init_conn_tag; + u8 dest_sas_addr[8]; /* bytes 4-11 */ + + __le16 send_queue_head; + u8 sq_suspended; + u8 ddb_type; /* DDB_TYPE_TARGET */ + + __le16 _r_a; + + __be16 awt_def; + u8 compat_features; /* byte 20 */ + u8 pathway_blocked_count; + __be16 arb_wait_time; + __be32 more_compat_features; /* byte 24 */ + + u8 conn_mask; + u8 flags; /* concurrent conn:2,2 and open:0(1) */ +#define SATA_MULTIPORT 0x80 +#define SUPPORTS_AFFIL 0x40 +#define STP_AFFIL_POL 0x20 + + u8 _r_b; + u8 flags2; /* STP close policy:0 */ +#define STP_CL_POL_NO_TX 0x00 +#define STP_CL_POL_BTW_CMDS 0x01 + + __le16 exec_queue_tail; + __le16 send_queue_tail; + __le16 sister_ddb; + __le16 ata_cmd_scbptr; + __le32 sata_tag_alloc_mask; + __le16 active_task_count; + __le16 _r_c; + __le32 sata_sactive; + u8 num_sata_tags; + u8 sata_status; + u8 sata_ending_status; + u8 itnl_reason; /* I_T nexus loss reason */ + __le16 ncq_data_scb_ptr; + __le16 itnl_timeout; + __le32 itnl_timestamp; +} __attribute__ ((packed)); + +/* This struct asd_ddb_init_port, describes the device descriptor block + * of an initiator port (when the sequencer is operating in target mode). + * Bytes [0,11] and [20,27] are from the OPEN address frame. + * The sequencer allocates an initiator port DDB entry. + */ +struct asd_ddb_init_port { + u8 conn_type; /* byte 0 */ + u8 conn_rate; + __be16 init_conn_tag; /* BE */ + u8 dest_sas_addr[8]; + __le16 send_queue_head; /* LE, byte 12 */ + u8 sq_suspended; + u8 ddb_type; /* DDB_TYPE_INITIATOR */ + __le16 _r_a; + __be16 awt_def; /* BE */ + u8 compat_features; + u8 pathway_blocked_count; + __be16 arb_wait_time; /* BE */ + __be32 more_compat_features; /* BE */ + u8 conn_mask; + u8 flags; /* == 5 */ + u16 _r_b; + __le16 exec_queue_tail; /* execution queue tail */ + __le16 send_queue_tail; + __le16 sister_ddb; + __le16 init_resp_timeout; /* initiator response timeout */ + __le32 _r_c; + __le16 active_tasks; /* active task count */ + __le16 init_list; /* initiator list link pointer */ + __le32 _r_d; + u8 max_conn_to[3]; /* from Conn-Disc mode page, in us, LE */ + u8 itnl_reason; /* I_T nexus loss reason */ + __le16 bus_inact_to; /* from Conn-Disc mode page, in 100 us, LE */ + __le16 itnl_to; /* from the Protocol Specific Port Ctrl MP */ + __le32 itnl_timestamp; +} __attribute__ ((packed)); + +/* This struct asd_ddb_sata_tag, describes a look-up table to be used + * by the sequencers. SATA II, IDENTIFY DEVICE data, word 76, bit 8: + * NCQ support. This table is used by the sequencers to find the + * corresponding SCB, given a SATA II tag value. + */ +struct asd_ddb_sata_tag { + __le16 scb_pointer[32]; +} __attribute__ ((packed)); + +/* This struct asd_ddb_sata_pm_table, describes a port number to + * connection handle look-up table. SATA targets attached to a port + * multiplier require a 4-bit port number value. There is one DDB + * entry of this type for each SATA port multiplier (sister DDB). + * Given a SATA PM port number, this table gives us the SATA PM Port + * DDB of the SATA port multiplier port (i.e. the SATA target + * discovered on the port). + */ +struct asd_ddb_sata_pm_table { + __le16 ddb_pointer[16]; + __le16 _r_a[16]; +} __attribute__ ((packed)); + +/* This struct asd_ddb_sata_pm_port, describes the SATA port multiplier + * port format DDB. + */ +struct asd_ddb_sata_pm_port { + u8 _r_a[15]; + u8 ddb_type; + u8 _r_b[13]; + u8 pm_port_flags; +#define PM_PORT_MASK 0xF0 +#define PM_PORT_SET 0x02 + u8 _r_c[6]; + __le16 sister_ddb; + __le16 ata_cmd_scbptr; + __le32 sata_tag_alloc_mask; + __le16 active_task_count; + __le16 parent_ddb; + __le32 sata_sactive; + u8 num_sata_tags; + u8 sata_status; + u8 sata_ending_status; + u8 _r_d[9]; +} __attribute__ ((packed)); + +/* This struct asd_ddb_seq_shared, describes a DDB shared by the + * central and link sequencers. port_map_by_links is indexed phy + * number [0,7]; each byte is a bit mask of all the phys that are in + * the same port as the indexed phy. + */ +struct asd_ddb_seq_shared { + __le16 q_free_ddb_head; + __le16 q_free_ddb_tail; + __le16 q_free_ddb_cnt; + __le16 q_used_ddb_head; + __le16 q_used_ddb_tail; + __le16 shared_mem_lock; + __le16 smp_conn_tag; + __le16 est_nexus_buf_cnt; + __le16 est_nexus_buf_thresh; + u32 _r_a; + u8 settable_max_contexts; + u8 _r_b[23]; + u8 conn_not_active; + u8 phy_is_up; + u8 _r_c[8]; + u8 port_map_by_links[8]; +} __attribute__ ((packed)); + +/* ---------- SG Element ---------- */ + +/* This struct sg_el, describes the hardware scatter gather buffer + * element. All entries are little endian. In an SCB, there are 2 of + * this, plus one more, called a link element of this indicating a + * sublist if needed. + * + * A link element has only the bus address set and the flags (DS) bit + * valid. The bus address points to the start of the sublist. + * + * If a sublist is needed, then that sublist should also include the 2 + * sg_el embedded in the SCB, in which case next_sg_offset is 32, + * since sizeof(sg_el) = 16; EOS should be 1 and EOL 0 in this case. + */ +struct sg_el { + __le64 bus_addr; + __le32 size; + __le16 _r; + u8 next_sg_offs; + u8 flags; +#define ASD_SG_EL_DS_MASK 0x30 +#define ASD_SG_EL_DS_OCM 0x10 +#define ASD_SG_EL_DS_HM 0x00 +#define ASD_SG_EL_LIST_MASK 0xC0 +#define ASD_SG_EL_LIST_EOL 0x40 +#define ASD_SG_EL_LIST_EOS 0x80 +} __attribute__ ((packed)); + +/* ---------- SCBs ---------- */ + +/* An SCB (sequencer control block) is comprised of a common header + * and a task part, for a total of 128 bytes. All fields are in LE + * order, unless otherwise noted. + */ + +/* This struct scb_header, defines the SCB header format. + */ +struct scb_header { + __le64 next_scb; + __le16 index; /* transaction context */ + u8 opcode; +} __attribute__ ((packed)); + +/* SCB opcodes: Execution queue + */ +#define INITIATE_SSP_TASK 0x00 +#define INITIATE_LONG_SSP_TASK 0x01 +#define INITIATE_BIDIR_SSP_TASK 0x02 +#define ABORT_TASK 0x03 +#define INITIATE_SSP_TMF 0x04 +#define SSP_TARG_GET_DATA 0x05 +#define SSP_TARG_GET_DATA_GOOD 0x06 +#define SSP_TARG_SEND_RESP 0x07 +#define QUERY_SSP_TASK 0x08 +#define INITIATE_ATA_TASK 0x09 +#define INITIATE_ATAPI_TASK 0x0a +#define CONTROL_ATA_DEV 0x0b +#define INITIATE_SMP_TASK 0x0c +#define SMP_TARG_SEND_RESP 0x0f + +/* SCB opcodes: Send Queue + */ +#define SSP_TARG_SEND_DATA 0x40 +#define SSP_TARG_SEND_DATA_GOOD 0x41 + +/* SCB opcodes: Link Queue + */ +#define CONTROL_PHY 0x80 +#define SEND_PRIMITIVE 0x81 +#define INITIATE_LINK_ADM_TASK 0x82 + +/* SCB opcodes: other + */ +#define EMPTY_SCB 0xc0 +#define INITIATE_SEQ_ADM_TASK 0xc1 +#define EST_ICL_TARG_WINDOW 0xc2 +#define COPY_MEM 0xc3 +#define CLEAR_NEXUS 0xc4 +#define INITIATE_DDB_ADM_TASK 0xc6 +#define ESTABLISH_NEXUS_ESCB 0xd0 + +#define LUN_SIZE 8 + +/* See SAS spec, task IU + */ +struct ssp_task_iu { + u8 lun[LUN_SIZE]; /* BE */ + u16 _r_a; + u8 tmf; + u8 _r_b; + __be16 tag; /* BE */ + u8 _r_c[14]; +} __attribute__ ((packed)); + +/* See SAS spec, command IU + */ +struct ssp_command_iu { + u8 lun[LUN_SIZE]; + u8 _r_a; + u8 efb_prio_attr; /* enable first burst, task prio & attr */ +#define EFB_MASK 0x80 +#define TASK_PRIO_MASK 0x78 +#define TASK_ATTR_MASK 0x07 + + u8 _r_b; + u8 add_cdb_len; /* in dwords, since bit 0,1 are reserved */ + union { + u8 cdb[16]; + struct { + __le64 long_cdb_addr; /* bus address, LE */ + __le32 long_cdb_size; /* LE */ + u8 _r_c[3]; + u8 eol_ds; /* eol:6,6, ds:5,4 */ + } long_cdb; /* sequencer extension */ + }; +} __attribute__ ((packed)); + +struct xfer_rdy_iu { + __be32 requested_offset; /* BE */ + __be32 write_data_len; /* BE */ + __be32 _r_a; +} __attribute__ ((packed)); + +/* ---------- SCB tasks ---------- */ + +/* This is both ssp_task and long_ssp_task + */ +struct initiate_ssp_task { + u8 proto_conn_rate; /* proto:6,4, conn_rate:3,0 */ + __le32 total_xfer_len; + struct ssp_frame_hdr ssp_frame; + struct ssp_command_iu ssp_cmd; + __le16 sister_scb; /* 0xFFFF */ + __le16 conn_handle; /* index to DDB for the intended target */ + u8 data_dir; /* :1,0 */ +#define DATA_DIR_NONE 0x00 +#define DATA_DIR_IN 0x01 +#define DATA_DIR_OUT 0x02 +#define DATA_DIR_BYRECIPIENT 0x03 + + u8 _r_a; + u8 retry_count; + u8 _r_b[5]; + struct sg_el sg_element[3]; /* 2 real and 1 link */ +} __attribute__ ((packed)); + +/* This defines both ata_task and atapi_task. + * ata: C bit of FIS should be 1, + * atapi: C bit of FIS should be 1, and command register should be 0xA0, + * to indicate a packet command. + */ +struct initiate_ata_task { + u8 proto_conn_rate; + __le32 total_xfer_len; + struct host_to_dev_fis fis; + __le32 data_offs; + u8 atapi_packet[16]; + u8 _r_a[12]; + __le16 sister_scb; + __le16 conn_handle; + u8 ata_flags; /* CSMI:6,6, DTM:4,4, QT:3,3, data dir:1,0 */ +#define CSMI_TASK 0x40 +#define DATA_XFER_MODE_DMA 0x10 +#define ATA_Q_TYPE_MASK 0x08 +#define ATA_Q_TYPE_UNTAGGED 0x00 +#define ATA_Q_TYPE_NCQ 0x08 + + u8 _r_b; + u8 retry_count; + u8 _r_c; + u8 flags; +#define STP_AFFIL_POLICY 0x20 +#define SET_AFFIL_POLICY 0x10 +#define RET_PARTIAL_SGLIST 0x02 + + u8 _r_d[3]; + struct sg_el sg_element[3]; +} __attribute__ ((packed)); + +struct initiate_smp_task { + u8 proto_conn_rate; + u8 _r_a[40]; + struct sg_el smp_req; + __le16 sister_scb; + __le16 conn_handle; + u8 _r_c[8]; + struct sg_el smp_resp; + u8 _r_d[32]; +} __attribute__ ((packed)); + +struct control_phy { + u8 phy_id; + u8 sub_func; +#define DISABLE_PHY 0x00 +#define ENABLE_PHY 0x01 +#define RELEASE_SPINUP_HOLD 0x02 +#define ENABLE_PHY_NO_SAS_OOB 0x03 +#define ENABLE_PHY_NO_SATA_OOB 0x04 +#define PHY_NO_OP 0x05 +#define EXECUTE_HARD_RESET 0x81 + + u8 func_mask; + u8 speed_mask; + u8 hot_plug_delay; + u8 port_type; + u8 flags; +#define DEV_PRES_TIMER_OVERRIDE_ENABLE 0x01 +#define DISABLE_PHY_IF_OOB_FAILS 0x02 + + __le32 timeout_override; + u8 link_reset_retries; + u8 _r_a[47]; + __le16 conn_handle; + u8 _r_b[56]; +} __attribute__ ((packed)); + +struct control_ata_dev { + u8 proto_conn_rate; + __le32 _r_a; + struct host_to_dev_fis fis; + u8 _r_b[32]; + __le16 sister_scb; + __le16 conn_handle; + u8 ata_flags; /* 0 */ + u8 _r_c[55]; +} __attribute__ ((packed)); + +struct empty_scb { + u8 num_valid; + __le32 _r_a; +#define ASD_EDBS_PER_SCB 7 +/* header+data+CRC+DMA suffix data */ +#define ASD_EDB_SIZE (24+1024+4+16) + struct sg_el eb[ASD_EDBS_PER_SCB]; +#define ELEMENT_NOT_VALID 0xC0 +} __attribute__ ((packed)); + +struct initiate_link_adm { + u8 phy_id; + u8 sub_func; +#define GET_LINK_ERROR_COUNT 0x00 +#define RESET_LINK_ERROR_COUNT 0x01 +#define ENABLE_NOTIFY_SPINUP_INTS 0x02 + + u8 _r_a[57]; + __le16 conn_handle; + u8 _r_b[56]; +} __attribute__ ((packed)); + +struct copy_memory { + u8 _r_a; + __le16 xfer_len; + __le16 _r_b; + __le64 src_busaddr; + u8 src_ds; /* See definition of sg_el */ + u8 _r_c[45]; + __le16 conn_handle; + __le64 _r_d; + __le64 dest_busaddr; + u8 dest_ds; /* See definition of sg_el */ + u8 _r_e[39]; +} __attribute__ ((packed)); + +struct abort_task { + u8 proto_conn_rate; + __le32 _r_a; + struct ssp_frame_hdr ssp_frame; + struct ssp_task_iu ssp_task; + __le16 sister_scb; + __le16 conn_handle; + u8 flags; /* ovrd_itnl_timer:3,3, suspend_data_trans:2,2 */ +#define SUSPEND_DATA_TRANS 0x04 + + u8 _r_b; + u8 retry_count; + u8 _r_c[5]; + __le16 index; /* Transaction context of task to be queried */ + __le16 itnl_to; + u8 _r_d[44]; +} __attribute__ ((packed)); + +struct clear_nexus { + u8 nexus; +#define NEXUS_ADAPTER 0x00 +#define NEXUS_PORT 0x01 +#define NEXUS_I_T 0x02 +#define NEXUS_I_T_L 0x03 +#define NEXUS_TAG 0x04 +#define NEXUS_TRANS_CX 0x05 +#define NEXUS_SATA_TAG 0x06 +#define NEXUS_T_L 0x07 +#define NEXUS_L 0x08 +#define NEXUS_T_TAG 0x09 + + __le32 _r_a; + u8 flags; +#define SUSPEND_TX 0x80 +#define RESUME_TX 0x40 +#define SEND_Q 0x04 +#define EXEC_Q 0x02 +#define NOTINQ 0x01 + + u8 _r_b[3]; + u8 conn_mask; + u8 _r_c[19]; + struct ssp_task_iu ssp_task; /* LUN and TAG */ + __le16 _r_d; + __le16 conn_handle; + __le64 _r_e; + __le16 index; /* Transaction context of task to be cleared */ + __le16 context; /* Clear nexus context */ + u8 _r_f[44]; +} __attribute__ ((packed)); + +struct initiate_ssp_tmf { + u8 proto_conn_rate; + __le32 _r_a; + struct ssp_frame_hdr ssp_frame; + struct ssp_task_iu ssp_task; + __le16 sister_scb; + __le16 conn_handle; + u8 flags; /* itnl override and suspend data tx */ +#define OVERRIDE_ITNL_TIMER 8 + + u8 _r_b; + u8 retry_count; + u8 _r_c[5]; + __le16 index; /* Transaction context of task to be queried */ + __le16 itnl_to; + u8 _r_d[44]; +} __attribute__ ((packed)); + +/* Transmits an arbitrary primitive on the link. + * Used for NOTIFY and BROADCAST. + */ +struct send_prim { + u8 phy_id; + u8 wait_transmit; /* :0,0 */ + u8 xmit_flags; +#define XMTPSIZE_MASK 0xF0 +#define XMTPSIZE_SINGLE 0x10 +#define XMTPSIZE_REPEATED 0x20 +#define XMTPSIZE_CONT 0x20 +#define XMTPSIZE_TRIPLE 0x30 +#define XMTPSIZE_REDUNDANT 0x60 +#define XMTPSIZE_INF 0 + +#define XMTCONTEN 0x04 +#define XMTPFRM 0x02 /* Transmit at the next frame boundary */ +#define XMTPIMM 0x01 /* Transmit immediately */ + + __le16 _r_a; + u8 prim[4]; /* K, D0, D1, D2 */ + u8 _r_b[50]; + __le16 conn_handle; + u8 _r_c[56]; +} __attribute__ ((packed)); + +/* This describes both SSP Target Get Data and SSP Target Get Data And + * Send Good Response SCBs. Used when the sequencer is operating in + * target mode... + */ +struct ssp_targ_get_data { + u8 proto_conn_rate; + __le32 total_xfer_len; + struct ssp_frame_hdr ssp_frame; + struct xfer_rdy_iu xfer_rdy; + u8 lun[LUN_SIZE]; + __le64 _r_a; + __le16 sister_scb; + __le16 conn_handle; + u8 data_dir; /* 01b */ + u8 _r_b; + u8 retry_count; + u8 _r_c[5]; + struct sg_el sg_element[3]; +} __attribute__ ((packed)); + +/* ---------- The actual SCB struct ---------- */ + +struct scb { + struct scb_header header; + union { + struct initiate_ssp_task ssp_task; + struct initiate_ata_task ata_task; + struct initiate_smp_task smp_task; + struct control_phy control_phy; + struct control_ata_dev control_ata_dev; + struct empty_scb escb; + struct initiate_link_adm link_adm; + struct copy_memory cp_mem; + struct abort_task abort_task; + struct clear_nexus clear_nexus; + struct initiate_ssp_tmf ssp_tmf; + }; +} __attribute__ ((packed)); + +/* ---------- Done List ---------- */ +/* The done list entry opcode field is defined below. + * The mnemonic encoding and meaning is as follows: + * TC - Task Complete, status was received and acknowledged + * TF - Task Failed, indicates an error prior to receiving acknowledgment + * for the command: + * - no conn, + * - NACK or R_ERR received in response to this command, + * - credit blocked or not available, or in the case of SMP request, + * - no SMP response was received. + * In these four cases it is known that the target didn't receive the + * command. + * TI - Task Interrupted, error after the command was acknowledged. It is + * known that the command was received by the target. + * TU - Task Unacked, command was transmitted but neither ACK (R_OK) nor NAK + * (R_ERR) was received due to loss of signal, broken connection, loss of + * dword sync or other reason. The application client should send the + * appropriate task query. + * TA - Task Aborted, see TF. + * _RESP - The completion includes an empty buffer containing status. + * TO - Timeout. + */ +#define TC_NO_ERROR 0x00 +#define TC_UNDERRUN 0x01 +#define TC_OVERRUN 0x02 +#define TF_OPEN_TO 0x03 +#define TF_OPEN_REJECT 0x04 +#define TI_BREAK 0x05 +#define TI_PROTO_ERR 0x06 +#define TC_SSP_RESP 0x07 +#define TI_PHY_DOWN 0x08 +#define TF_PHY_DOWN 0x09 +#define TC_LINK_ADM_RESP 0x0a +#define TC_CSMI 0x0b +#define TC_ATA_RESP 0x0c +#define TU_PHY_DOWN 0x0d +#define TU_BREAK 0x0e +#define TI_SATA_TO 0x0f +#define TI_NAK 0x10 +#define TC_CONTROL_PHY 0x11 +#define TF_BREAK 0x12 +#define TC_RESUME 0x13 +#define TI_ACK_NAK_TO 0x14 +#define TF_SMPRSP_TO 0x15 +#define TF_SMP_XMIT_RCV_ERR 0x16 +#define TC_PARTIAL_SG_LIST 0x17 +#define TU_ACK_NAK_TO 0x18 +#define TU_SATA_TO 0x19 +#define TF_NAK_RECV 0x1a +#define TA_I_T_NEXUS_LOSS 0x1b +#define TC_ATA_R_ERR_RECV 0x1c +#define TF_TMF_NO_CTX 0x1d +#define TA_ON_REQ 0x1e +#define TF_TMF_NO_TAG 0x1f +#define TF_TMF_TAG_FREE 0x20 +#define TF_TMF_TASK_DONE 0x21 +#define TF_TMF_NO_CONN_HANDLE 0x22 +#define TC_TASK_CLEARED 0x23 +#define TI_SYNCS_RECV 0x24 +#define TU_SYNCS_RECV 0x25 +#define TF_IRTT_TO 0x26 +#define TF_NO_SMP_CONN 0x27 +#define TF_IU_SHORT 0x28 +#define TF_DATA_OFFS_ERR 0x29 +#define TF_INV_CONN_HANDLE 0x2a +#define TF_REQUESTED_N_PENDING 0x2b + +/* 0xc1 - 0xc7: empty buffer received, + 0xd1 - 0xd7: establish nexus empty buffer received +*/ +/* This is the ESCB mask */ +#define ESCB_RECVD 0xC0 + + +/* This struct done_list_struct defines the done list entry. + * All fields are LE. + */ +struct done_list_struct { + __le16 index; /* aka transaction context */ + u8 opcode; + u8 status_block[4]; + u8 toggle; /* bit 0 */ +#define DL_TOGGLE_MASK 0x01 +} __attribute__ ((packed)); + +/* ---------- PHYS ---------- */ + +struct asd_phy { + struct asd_sas_phy sas_phy; + struct asd_phy_desc *phy_desc; /* hw profile */ + + struct sas_identify_frame *identify_frame; + struct asd_dma_tok *id_frm_tok; + + u8 frame_rcvd[ASD_EDB_SIZE]; +}; + + +#define ASD_SCB_SIZE sizeof(struct scb) +#define ASD_DDB_SIZE sizeof(struct asd_ddb_ssp_smp_target_port) + +/* Define this to 0 if you do not want NOTIFY (ENABLE SPINIP) sent. + * Default: 0x10 (it's a mask) + */ +#define ASD_NOTIFY_ENABLE_SPINUP 0x10 + +/* If enabled, set this to the interval between transmission + * of NOTIFY (ENABLE SPINUP). In units of 200 us. + */ +#define ASD_NOTIFY_TIMEOUT 2500 + +/* Initial delay after OOB, before we transmit NOTIFY (ENABLE SPINUP). + * If 0, transmit immediately. In milliseconds. + */ +#define ASD_NOTIFY_DOWN_COUNT 0 + +/* Device present timer timeout constant, 10 ms. */ +#define ASD_DEV_PRESENT_TIMEOUT 0x2710 + +#define ASD_SATA_INTERLOCK_TIMEOUT 0 + +/* How long to wait before shutting down an STP connection, unless + * an STP target sent frame(s). 50 usec. + * IGNORED by the sequencer (i.e. value 0 always). + */ +#define ASD_STP_SHUTDOWN_TIMEOUT 0x0 + +/* ATA soft reset timer timeout. 5 usec. */ +#define ASD_SRST_ASSERT_TIMEOUT 0x05 + +/* 31 sec */ +#define ASD_RCV_FIS_TIMEOUT 0x01D905C0 + +#define ASD_ONE_MILLISEC_TIMEOUT 0x03e8 + +/* COMINIT timer */ +#define ASD_TEN_MILLISEC_TIMEOUT 0x2710 +#define ASD_COMINIT_TIMEOUT ASD_TEN_MILLISEC_TIMEOUT + +/* 1 sec */ +#define ASD_SMP_RCV_TIMEOUT 0x000F4240 + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c new file mode 100644 index 000000000000..fc1b7438a913 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -0,0 +1,732 @@ +/* + * Aic94xx SAS/SATA driver SCB management. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#include "aic94xx.h" +#include "aic94xx_reg.h" +#include "aic94xx_hwi.h" +#include "aic94xx_seq.h" + +#include "aic94xx_dump.h" + +/* ---------- EMPTY SCB ---------- */ + +#define DL_PHY_MASK 7 +#define BYTES_DMAED 0 +#define PRIMITIVE_RECVD 0x08 +#define PHY_EVENT 0x10 +#define LINK_RESET_ERROR 0x18 +#define TIMER_EVENT 0x20 +#define REQ_TASK_ABORT 0xF0 +#define REQ_DEVICE_RESET 0xF1 +#define SIGNAL_NCQ_ERROR 0xF2 +#define CLEAR_NCQ_ERROR 0xF3 + +#define PHY_EVENTS_STATUS (CURRENT_LOSS_OF_SIGNAL | CURRENT_OOB_DONE \ + | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \ + | CURRENT_OOB_ERROR) + +static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode) +{ + switch (oob_mode & 7) { + case PHY_SPEED_60: + /* FIXME: sas transport class doesn't have this */ + phy->sas_phy.linkrate = PHY_LINKRATE_6; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; + break; + case PHY_SPEED_30: + phy->sas_phy.linkrate = PHY_LINKRATE_3; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + case PHY_SPEED_15: + phy->sas_phy.linkrate = PHY_LINKRATE_1_5; + phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + } + if (oob_mode & SAS_MODE) + phy->sas_phy.oob_mode = SAS_OOB_MODE; + else if (oob_mode & SATA_MODE) + phy->sas_phy.oob_mode = SATA_OOB_MODE; +} + +static inline void asd_phy_event_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; + int phy_id = dl->status_block[0] & DL_PHY_MASK; + struct asd_phy *phy = &asd_ha->phys[phy_id]; + + u8 oob_status = dl->status_block[1] & PHY_EVENTS_STATUS; + u8 oob_mode = dl->status_block[2]; + + switch (oob_status) { + case CURRENT_LOSS_OF_SIGNAL: + /* directly attached device was removed */ + ASD_DPRINTK("phy%d: device unplugged\n", phy_id); + asd_turn_led(asd_ha, phy_id, 0); + sas_phy_disconnected(&phy->sas_phy); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); + break; + case CURRENT_OOB_DONE: + /* hot plugged device */ + asd_turn_led(asd_ha, phy_id, 1); + get_lrate_mode(phy, oob_mode); + ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n", + phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + break; + case CURRENT_SPINUP_HOLD: + /* hot plug SATA, no COMWAKE sent */ + asd_turn_led(asd_ha, phy_id, 1); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); + break; + case CURRENT_GTO_TIMEOUT: + case CURRENT_OOB_ERROR: + ASD_DPRINTK("phy%d error while OOB: oob status:0x%x\n", phy_id, + dl->status_block[1]); + asd_turn_led(asd_ha, phy_id, 0); + sas_phy_disconnected(&phy->sas_phy); + sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); + break; + } +} + +/* If phys are enabled sparsely, this will do the right thing. */ +static inline unsigned ord_phy(struct asd_ha_struct *asd_ha, + struct asd_phy *phy) +{ + u8 enabled_mask = asd_ha->hw_prof.enabled_phys; + int i, k = 0; + + for_each_phy(enabled_mask, enabled_mask, i) { + if (&asd_ha->phys[i] == phy) + return k; + k++; + } + return 0; +} + +/** + * asd_get_attached_sas_addr -- extract/generate attached SAS address + * phy: pointer to asd_phy + * sas_addr: pointer to buffer where the SAS address is to be written + * + * This function extracts the SAS address from an IDENTIFY frame + * received. If OOB is SATA, then a SAS address is generated from the + * HA tables. + * + * LOCKING: the frame_rcvd_lock needs to be held since this parses the frame + * buffer. + */ +static inline void asd_get_attached_sas_addr(struct asd_phy *phy, u8 *sas_addr) +{ + if (phy->sas_phy.frame_rcvd[0] == 0x34 + && phy->sas_phy.oob_mode == SATA_OOB_MODE) { + struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha; + /* FIS device-to-host */ + u64 addr = be64_to_cpu(*(__be64 *)phy->phy_desc->sas_addr); + + addr += asd_ha->hw_prof.sata_name_base + ord_phy(asd_ha, phy); + *(__be64 *)sas_addr = cpu_to_be64(addr); + } else { + struct sas_identify_frame *idframe = + (void *) phy->sas_phy.frame_rcvd; + memcpy(sas_addr, idframe->sas_addr, SAS_ADDR_SIZE); + } +} + +static inline void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl, + int edb_id, int phy_id) +{ + unsigned long flags; + int edb_el = edb_id + ascb->edb_index; + struct asd_dma_tok *edb = ascb->ha->seq.edb_arr[edb_el]; + struct asd_phy *phy = &ascb->ha->phys[phy_id]; + struct sas_ha_struct *sas_ha = phy->sas_phy.ha; + u16 size = ((dl->status_block[3] & 7) << 8) | dl->status_block[2]; + + size = min(size, (u16) sizeof(phy->frame_rcvd)); + + spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); + memcpy(phy->sas_phy.frame_rcvd, edb->vaddr, size); + phy->sas_phy.frame_rcvd_size = size; + asd_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr); + spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); + asd_dump_frame_rcvd(phy, dl); + sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED); +} + +static inline void asd_link_reset_err_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl, + int phy_id) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; + struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id]; + u8 lr_error = dl->status_block[1]; + u8 retries_left = dl->status_block[2]; + + switch (lr_error) { + case 0: + ASD_DPRINTK("phy%d: Receive ID timer expired\n", phy_id); + break; + case 1: + ASD_DPRINTK("phy%d: Loss of signal\n", phy_id); + break; + case 2: + ASD_DPRINTK("phy%d: Loss of dword sync\n", phy_id); + break; + case 3: + ASD_DPRINTK("phy%d: Receive FIS timeout\n", phy_id); + break; + default: + ASD_DPRINTK("phy%d: unknown link reset error code: 0x%x\n", + phy_id, lr_error); + break; + } + + asd_turn_led(asd_ha, phy_id, 0); + sas_phy_disconnected(sas_phy); + sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + + if (retries_left == 0) { + int num = 1; + struct asd_ascb *cp = asd_ascb_alloc_list(ascb->ha, &num, + GFP_ATOMIC); + if (!cp) { + asd_printk("%s: out of memory\n", __FUNCTION__); + goto out; + } + ASD_DPRINTK("phy%d: retries:0 performing link reset seq\n", + phy_id); + asd_build_control_phy(cp, phy_id, ENABLE_PHY); + if (asd_post_ascb_list(ascb->ha, cp, 1) != 0) + asd_ascb_free(cp); + } +out: + ; +} + +static inline void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl, + int phy_id) +{ + unsigned long flags; + struct sas_ha_struct *sas_ha = &ascb->ha->sas_ha; + struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id]; + u8 reg = dl->status_block[1]; + u32 cont = dl->status_block[2] << ((reg & 3)*8); + + reg &= ~3; + switch (reg) { + case LmPRMSTAT0BYTE0: + switch (cont) { + case LmBROADCH: + case LmBROADRVCH0: + case LmBROADRVCH1: + case LmBROADSES: + ASD_DPRINTK("phy%d: BROADCAST change received:%d\n", + phy_id, cont); + spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); + sas_phy->sas_prim = ffs(cont); + spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); + sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD); + break; + + case LmUNKNOWNP: + ASD_DPRINTK("phy%d: unknown BREAK\n", phy_id); + break; + + default: + ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n", + phy_id, reg, cont); + break; + } + break; + case LmPRMSTAT1BYTE0: + switch (cont) { + case LmHARDRST: + ASD_DPRINTK("phy%d: HARD_RESET primitive rcvd\n", + phy_id); + /* The sequencer disables all phys on that port. + * We have to re-enable the phys ourselves. */ + sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET); + break; + + default: + ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n", + phy_id, reg, cont); + break; + } + break; + default: + ASD_DPRINTK("unknown primitive register:0x%x\n", + dl->status_block[1]); + break; + } +} + +/** + * asd_invalidate_edb -- invalidate an EDB and if necessary post the ESCB + * @ascb: pointer to Empty SCB + * @edb_id: index [0,6] to the empty data buffer which is to be invalidated + * + * After an EDB has been invalidated, if all EDBs in this ESCB have been + * invalidated, the ESCB is posted back to the sequencer. + * Context is tasklet/IRQ. + */ +void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id) +{ + struct asd_seq_data *seq = &ascb->ha->seq; + struct empty_scb *escb = &ascb->scb->escb; + struct sg_el *eb = &escb->eb[edb_id]; + struct asd_dma_tok *edb = seq->edb_arr[ascb->edb_index + edb_id]; + + memset(edb->vaddr, 0, ASD_EDB_SIZE); + eb->flags |= ELEMENT_NOT_VALID; + escb->num_valid--; + + if (escb->num_valid == 0) { + int i; + /* ASD_DPRINTK("reposting escb: vaddr: 0x%p, " + "dma_handle: 0x%08llx, next: 0x%08llx, " + "index:%d, opcode:0x%02x\n", + ascb->dma_scb.vaddr, + (u64)ascb->dma_scb.dma_handle, + le64_to_cpu(ascb->scb->header.next_scb), + le16_to_cpu(ascb->scb->header.index), + ascb->scb->header.opcode); + */ + escb->num_valid = ASD_EDBS_PER_SCB; + for (i = 0; i < ASD_EDBS_PER_SCB; i++) + escb->eb[i].flags = 0; + if (!list_empty(&ascb->list)) + list_del_init(&ascb->list); + i = asd_post_escb_list(ascb->ha, ascb, 1); + if (i) + asd_printk("couldn't post escb, err:%d\n", i); + } +} + +static void escb_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; + int edb = (dl->opcode & DL_PHY_MASK) - 1; /* [0xc1,0xc7] -> [0,6] */ + u8 sb_opcode = dl->status_block[0]; + int phy_id = sb_opcode & DL_PHY_MASK; + struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id]; + + if (edb > 6 || edb < 0) { + ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n", + edb, dl->opcode); + ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n", + sb_opcode, phy_id); + ASD_DPRINTK("escb: vaddr: 0x%p, " + "dma_handle: 0x%llx, next: 0x%llx, " + "index:%d, opcode:0x%02x\n", + ascb->dma_scb.vaddr, + (unsigned long long)ascb->dma_scb.dma_handle, + (unsigned long long) + le64_to_cpu(ascb->scb->header.next_scb), + le16_to_cpu(ascb->scb->header.index), + ascb->scb->header.opcode); + } + + sb_opcode &= ~DL_PHY_MASK; + + switch (sb_opcode) { + case BYTES_DMAED: + ASD_DPRINTK("%s: phy%d: BYTES_DMAED\n", __FUNCTION__, phy_id); + asd_bytes_dmaed_tasklet(ascb, dl, edb, phy_id); + break; + case PRIMITIVE_RECVD: + ASD_DPRINTK("%s: phy%d: PRIMITIVE_RECVD\n", __FUNCTION__, + phy_id); + asd_primitive_rcvd_tasklet(ascb, dl, phy_id); + break; + case PHY_EVENT: + ASD_DPRINTK("%s: phy%d: PHY_EVENT\n", __FUNCTION__, phy_id); + asd_phy_event_tasklet(ascb, dl); + break; + case LINK_RESET_ERROR: + ASD_DPRINTK("%s: phy%d: LINK_RESET_ERROR\n", __FUNCTION__, + phy_id); + asd_link_reset_err_tasklet(ascb, dl, phy_id); + break; + case TIMER_EVENT: + ASD_DPRINTK("%s: phy%d: TIMER_EVENT, lost dw sync\n", + __FUNCTION__, phy_id); + asd_turn_led(asd_ha, phy_id, 0); + /* the device is gone */ + sas_phy_disconnected(sas_phy); + sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT); + break; + case REQ_TASK_ABORT: + ASD_DPRINTK("%s: phy%d: REQ_TASK_ABORT\n", __FUNCTION__, + phy_id); + break; + case REQ_DEVICE_RESET: + ASD_DPRINTK("%s: phy%d: REQ_DEVICE_RESET\n", __FUNCTION__, + phy_id); + break; + case SIGNAL_NCQ_ERROR: + ASD_DPRINTK("%s: phy%d: SIGNAL_NCQ_ERROR\n", __FUNCTION__, + phy_id); + break; + case CLEAR_NCQ_ERROR: + ASD_DPRINTK("%s: phy%d: CLEAR_NCQ_ERROR\n", __FUNCTION__, + phy_id); + break; + default: + ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __FUNCTION__, + phy_id, sb_opcode); + ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n", + edb, dl->opcode); + ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n", + sb_opcode, phy_id); + ASD_DPRINTK("escb: vaddr: 0x%p, " + "dma_handle: 0x%llx, next: 0x%llx, " + "index:%d, opcode:0x%02x\n", + ascb->dma_scb.vaddr, + (unsigned long long)ascb->dma_scb.dma_handle, + (unsigned long long) + le64_to_cpu(ascb->scb->header.next_scb), + le16_to_cpu(ascb->scb->header.index), + ascb->scb->header.opcode); + + break; + } + + asd_invalidate_edb(ascb, edb); +} + +int asd_init_post_escbs(struct asd_ha_struct *asd_ha) +{ + struct asd_seq_data *seq = &asd_ha->seq; + int i; + + for (i = 0; i < seq->num_escbs; i++) + seq->escb_arr[i]->tasklet_complete = escb_tasklet_complete; + + ASD_DPRINTK("posting %d escbs\n", i); + return asd_post_escb_list(asd_ha, seq->escb_arr[0], seq->num_escbs); +} + +/* ---------- CONTROL PHY ---------- */ + +#define CONTROL_PHY_STATUS (CURRENT_DEVICE_PRESENT | CURRENT_OOB_DONE \ + | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \ + | CURRENT_OOB_ERROR) + +/** + * control_phy_tasklet_complete -- tasklet complete for CONTROL PHY ascb + * @ascb: pointer to an ascb + * @dl: pointer to the done list entry + * + * This function completes a CONTROL PHY scb and frees the ascb. + * A note on LEDs: + * - an LED blinks if there is IO though it, + * - if a device is connected to the LED, it is lit, + * - if no device is connected to the LED, is is dimmed (off). + */ +static void control_phy_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct scb *scb = ascb->scb; + struct control_phy *control_phy = &scb->control_phy; + u8 phy_id = control_phy->phy_id; + struct asd_phy *phy = &ascb->ha->phys[phy_id]; + + u8 status = dl->status_block[0]; + u8 oob_status = dl->status_block[1]; + u8 oob_mode = dl->status_block[2]; + /* u8 oob_signals= dl->status_block[3]; */ + + if (status != 0) { + ASD_DPRINTK("%s: phy%d status block opcode:0x%x\n", + __FUNCTION__, phy_id, status); + goto out; + } + + switch (control_phy->sub_func) { + case DISABLE_PHY: + asd_ha->hw_prof.enabled_phys &= ~(1 << phy_id); + asd_turn_led(asd_ha, phy_id, 0); + asd_control_led(asd_ha, phy_id, 0); + ASD_DPRINTK("%s: disable phy%d\n", __FUNCTION__, phy_id); + break; + + case ENABLE_PHY: + asd_control_led(asd_ha, phy_id, 1); + if (oob_status & CURRENT_OOB_DONE) { + asd_ha->hw_prof.enabled_phys |= (1 << phy_id); + get_lrate_mode(phy, oob_mode); + asd_turn_led(asd_ha, phy_id, 1); + ASD_DPRINTK("%s: phy%d, lrate:0x%x, proto:0x%x\n", + __FUNCTION__, phy_id,phy->sas_phy.linkrate, + phy->sas_phy.iproto); + } else if (oob_status & CURRENT_SPINUP_HOLD) { + asd_ha->hw_prof.enabled_phys |= (1 << phy_id); + asd_turn_led(asd_ha, phy_id, 1); + ASD_DPRINTK("%s: phy%d, spinup hold\n", __FUNCTION__, + phy_id); + } else if (oob_status & CURRENT_ERR_MASK) { + asd_turn_led(asd_ha, phy_id, 0); + ASD_DPRINTK("%s: phy%d: error: oob status:0x%02x\n", + __FUNCTION__, phy_id, oob_status); + } else if (oob_status & (CURRENT_HOT_PLUG_CNCT + | CURRENT_DEVICE_PRESENT)) { + asd_ha->hw_prof.enabled_phys |= (1 << phy_id); + asd_turn_led(asd_ha, phy_id, 1); + ASD_DPRINTK("%s: phy%d: hot plug or device present\n", + __FUNCTION__, phy_id); + } else { + asd_ha->hw_prof.enabled_phys |= (1 << phy_id); + asd_turn_led(asd_ha, phy_id, 0); + ASD_DPRINTK("%s: phy%d: no device present: " + "oob_status:0x%x\n", + __FUNCTION__, phy_id, oob_status); + } + break; + case RELEASE_SPINUP_HOLD: + case PHY_NO_OP: + case EXECUTE_HARD_RESET: + ASD_DPRINTK("%s: phy%d: sub_func:0x%x\n", __FUNCTION__, + phy_id, control_phy->sub_func); + /* XXX finish */ + break; + default: + ASD_DPRINTK("%s: phy%d: sub_func:0x%x?\n", __FUNCTION__, + phy_id, control_phy->sub_func); + break; + } +out: + asd_ascb_free(ascb); +} + +static inline void set_speed_mask(u8 *speed_mask, struct asd_phy_desc *pd) +{ + /* disable all speeds, then enable defaults */ + *speed_mask = SAS_SPEED_60_DIS | SAS_SPEED_30_DIS | SAS_SPEED_15_DIS + | SATA_SPEED_30_DIS | SATA_SPEED_15_DIS; + + switch (pd->max_sas_lrate) { + case PHY_LINKRATE_6: + *speed_mask &= ~SAS_SPEED_60_DIS; + default: + case PHY_LINKRATE_3: + *speed_mask &= ~SAS_SPEED_30_DIS; + case PHY_LINKRATE_1_5: + *speed_mask &= ~SAS_SPEED_15_DIS; + } + + switch (pd->min_sas_lrate) { + case PHY_LINKRATE_6: + *speed_mask |= SAS_SPEED_30_DIS; + case PHY_LINKRATE_3: + *speed_mask |= SAS_SPEED_15_DIS; + default: + case PHY_LINKRATE_1_5: + /* nothing to do */ + ; + } + + switch (pd->max_sata_lrate) { + case PHY_LINKRATE_3: + *speed_mask &= ~SATA_SPEED_30_DIS; + default: + case PHY_LINKRATE_1_5: + *speed_mask &= ~SATA_SPEED_15_DIS; + } + + switch (pd->min_sata_lrate) { + case PHY_LINKRATE_3: + *speed_mask |= SATA_SPEED_15_DIS; + default: + case PHY_LINKRATE_1_5: + /* nothing to do */ + ; + } +} + +/** + * asd_build_control_phy -- build a CONTROL PHY SCB + * @ascb: pointer to an ascb + * @phy_id: phy id to control, integer + * @subfunc: subfunction, what to actually to do the phy + * + * This function builds a CONTROL PHY scb. No allocation of any kind + * is performed. @ascb is allocated with the list function. + * The caller can override the ascb->tasklet_complete to point + * to its own callback function. It must call asd_ascb_free() + * at its tasklet complete function. + * See the default implementation. + */ +void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc) +{ + struct asd_phy *phy = &ascb->ha->phys[phy_id]; + struct scb *scb = ascb->scb; + struct control_phy *control_phy = &scb->control_phy; + + scb->header.opcode = CONTROL_PHY; + control_phy->phy_id = (u8) phy_id; + control_phy->sub_func = subfunc; + + switch (subfunc) { + case EXECUTE_HARD_RESET: /* 0x81 */ + case ENABLE_PHY: /* 0x01 */ + /* decide hot plug delay */ + control_phy->hot_plug_delay = HOTPLUG_DELAY_TIMEOUT; + + /* decide speed mask */ + set_speed_mask(&control_phy->speed_mask, phy->phy_desc); + + /* initiator port settings are in the hi nibble */ + if (phy->sas_phy.role == PHY_ROLE_INITIATOR) + control_phy->port_type = SAS_PROTO_ALL << 4; + else if (phy->sas_phy.role == PHY_ROLE_TARGET) + control_phy->port_type = SAS_PROTO_ALL; + else + control_phy->port_type = + (SAS_PROTO_ALL << 4) | SAS_PROTO_ALL; + + /* link reset retries, this should be nominal */ + control_phy->link_reset_retries = 10; + + case RELEASE_SPINUP_HOLD: /* 0x02 */ + /* decide the func_mask */ + control_phy->func_mask = FUNCTION_MASK_DEFAULT; + if (phy->phy_desc->flags & ASD_SATA_SPINUP_HOLD) + control_phy->func_mask &= ~SPINUP_HOLD_DIS; + else + control_phy->func_mask |= SPINUP_HOLD_DIS; + } + + control_phy->conn_handle = cpu_to_le16(0xFFFF); + + ascb->tasklet_complete = control_phy_tasklet_complete; +} + +/* ---------- INITIATE LINK ADM TASK ---------- */ + +static void link_adm_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + u8 opcode = dl->opcode; + struct initiate_link_adm *link_adm = &ascb->scb->link_adm; + u8 phy_id = link_adm->phy_id; + + if (opcode != TC_NO_ERROR) { + asd_printk("phy%d: link adm task 0x%x completed with error " + "0x%x\n", phy_id, link_adm->sub_func, opcode); + } + ASD_DPRINTK("phy%d: link adm task 0x%x: 0x%x\n", + phy_id, link_adm->sub_func, opcode); + + asd_ascb_free(ascb); +} + +void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id, + u8 subfunc) +{ + struct scb *scb = ascb->scb; + struct initiate_link_adm *link_adm = &scb->link_adm; + + scb->header.opcode = INITIATE_LINK_ADM_TASK; + + link_adm->phy_id = phy_id; + link_adm->sub_func = subfunc; + link_adm->conn_handle = cpu_to_le16(0xFFFF); + + ascb->tasklet_complete = link_adm_tasklet_complete; +} + +/* ---------- SCB timer ---------- */ + +/** + * asd_ascb_timedout -- called when a pending SCB's timer has expired + * @data: unsigned long, a pointer to the ascb in question + * + * This is the default timeout function which does the most necessary. + * Upper layers can implement their own timeout function, say to free + * resources they have with this SCB, and then call this one at the + * end of their timeout function. To do this, one should initialize + * the ascb->timer.{function, data, expires} prior to calling the post + * funcion. The timer is started by the post function. + */ +void asd_ascb_timedout(unsigned long data) +{ + struct asd_ascb *ascb = (void *) data; + struct asd_seq_data *seq = &ascb->ha->seq; + unsigned long flags; + + ASD_DPRINTK("scb:0x%x timed out\n", ascb->scb->header.opcode); + + spin_lock_irqsave(&seq->pend_q_lock, flags); + seq->pending--; + list_del_init(&ascb->list); + spin_unlock_irqrestore(&seq->pend_q_lock, flags); + + asd_ascb_free(ascb); +} + +/* ---------- CONTROL PHY ---------- */ + +/* Given the spec value, return a driver value. */ +static const int phy_func_table[] = { + [PHY_FUNC_NOP] = PHY_NO_OP, + [PHY_FUNC_LINK_RESET] = ENABLE_PHY, + [PHY_FUNC_HARD_RESET] = EXECUTE_HARD_RESET, + [PHY_FUNC_DISABLE] = DISABLE_PHY, + [PHY_FUNC_RELEASE_SPINUP_HOLD] = RELEASE_SPINUP_HOLD, +}; + +int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func) +{ + struct asd_ha_struct *asd_ha = phy->ha->lldd_ha; + struct asd_ascb *ascb; + int res = 1; + + if (func == PHY_FUNC_CLEAR_ERROR_LOG) + return -ENOSYS; + + ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); + if (!ascb) + return -ENOMEM; + + asd_build_control_phy(ascb, phy->id, phy_func_table[func]); + res = asd_post_ascb_list(asd_ha, ascb , 1); + if (res) + asd_ascb_free(ascb); + + return res; +} diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c new file mode 100644 index 000000000000..eec1e0db0e0f --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_sds.c @@ -0,0 +1,1136 @@ +/* + * Aic94xx SAS/SATA driver access to shared data structures and memory + * maps. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include + +#include "aic94xx.h" +#include "aic94xx_reg.h" + +/* ---------- OCM stuff ---------- */ + +struct asd_ocm_dir_ent { + u8 type; + u8 offs[3]; + u8 _r1; + u8 size[3]; +} __attribute__ ((packed)); + +struct asd_ocm_dir { + char sig[2]; + u8 _r1[2]; + u8 major; /* 0 */ + u8 minor; /* 0 */ + u8 _r2; + u8 num_de; + struct asd_ocm_dir_ent entry[15]; +} __attribute__ ((packed)); + +#define OCM_DE_OCM_DIR 0x00 +#define OCM_DE_WIN_DRVR 0x01 +#define OCM_DE_BIOS_CHIM 0x02 +#define OCM_DE_RAID_ENGN 0x03 +#define OCM_DE_BIOS_INTL 0x04 +#define OCM_DE_BIOS_CHIM_OSM 0x05 +#define OCM_DE_BIOS_CHIM_DYNAMIC 0x06 +#define OCM_DE_ADDC2C_RES0 0x07 +#define OCM_DE_ADDC2C_RES1 0x08 +#define OCM_DE_ADDC2C_RES2 0x09 +#define OCM_DE_ADDC2C_RES3 0x0A + +#define OCM_INIT_DIR_ENTRIES 5 +/*************************************************************************** +* OCM dircetory default +***************************************************************************/ +static struct asd_ocm_dir OCMDirInit = +{ + .sig = {0x4D, 0x4F}, /* signature */ + .num_de = OCM_INIT_DIR_ENTRIES, /* no. of directory entries */ +}; + +/*************************************************************************** +* OCM dircetory Entries default +***************************************************************************/ +static struct asd_ocm_dir_ent OCMDirEntriesInit[OCM_INIT_DIR_ENTRIES] = +{ + { + .type = (OCM_DE_ADDC2C_RES0), /* Entry type */ + .offs = {128}, /* Offset */ + .size = {0, 4}, /* size */ + }, + { + .type = (OCM_DE_ADDC2C_RES1), /* Entry type */ + .offs = {128, 4}, /* Offset */ + .size = {0, 4}, /* size */ + }, + { + .type = (OCM_DE_ADDC2C_RES2), /* Entry type */ + .offs = {128, 8}, /* Offset */ + .size = {0, 4}, /* size */ + }, + { + .type = (OCM_DE_ADDC2C_RES3), /* Entry type */ + .offs = {128, 12}, /* Offset */ + .size = {0, 4}, /* size */ + }, + { + .type = (OCM_DE_WIN_DRVR), /* Entry type */ + .offs = {128, 16}, /* Offset */ + .size = {128, 235, 1}, /* size */ + }, +}; + +struct asd_bios_chim_struct { + char sig[4]; + u8 major; /* 1 */ + u8 minor; /* 0 */ + u8 bios_major; + u8 bios_minor; + __le32 bios_build; + u8 flags; + u8 pci_slot; + __le16 ue_num; + __le16 ue_size; + u8 _r[14]; + /* The unit element array is right here. + */ +} __attribute__ ((packed)); + +/** + * asd_read_ocm_seg - read an on chip memory (OCM) segment + * @asd_ha: pointer to the host adapter structure + * @buffer: where to write the read data + * @offs: offset into OCM where to read from + * @size: how many bytes to read + * + * Return the number of bytes not read. Return 0 on success. + */ +static int asd_read_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer, + u32 offs, int size) +{ + u8 *p = buffer; + if (unlikely(asd_ha->iospace)) + asd_read_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size); + else { + for ( ; size > 0; size--, offs++, p++) + *p = asd_read_ocm_byte(asd_ha, offs); + } + return size; +} + +static int asd_read_ocm_dir(struct asd_ha_struct *asd_ha, + struct asd_ocm_dir *dir, u32 offs) +{ + int err = asd_read_ocm_seg(asd_ha, dir, offs, sizeof(*dir)); + if (err) { + ASD_DPRINTK("couldn't read ocm segment\n"); + return err; + } + + if (dir->sig[0] != 'M' || dir->sig[1] != 'O') { + ASD_DPRINTK("no valid dir signature(%c%c) at start of OCM\n", + dir->sig[0], dir->sig[1]); + return -ENOENT; + } + if (dir->major != 0) { + asd_printk("unsupported major version of ocm dir:0x%x\n", + dir->major); + return -ENOENT; + } + dir->num_de &= 0xf; + return 0; +} + +/** + * asd_write_ocm_seg - write an on chip memory (OCM) segment + * @asd_ha: pointer to the host adapter structure + * @buffer: where to read the write data + * @offs: offset into OCM to write to + * @size: how many bytes to write + * + * Return the number of bytes not written. Return 0 on success. + */ +static void asd_write_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer, + u32 offs, int size) +{ + u8 *p = buffer; + if (unlikely(asd_ha->iospace)) + asd_write_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size); + else { + for ( ; size > 0; size--, offs++, p++) + asd_write_ocm_byte(asd_ha, offs, *p); + } + return; +} + +#define THREE_TO_NUM(X) ((X)[0] | ((X)[1] << 8) | ((X)[2] << 16)) + +static int asd_find_dir_entry(struct asd_ocm_dir *dir, u8 type, + u32 *offs, u32 *size) +{ + int i; + struct asd_ocm_dir_ent *ent; + + for (i = 0; i < dir->num_de; i++) { + if (dir->entry[i].type == type) + break; + } + if (i >= dir->num_de) + return -ENOENT; + ent = &dir->entry[i]; + *offs = (u32) THREE_TO_NUM(ent->offs); + *size = (u32) THREE_TO_NUM(ent->size); + return 0; +} + +#define OCM_BIOS_CHIM_DE 2 +#define BC_BIOS_PRESENT 1 + +static int asd_get_bios_chim(struct asd_ha_struct *asd_ha, + struct asd_ocm_dir *dir) +{ + int err; + struct asd_bios_chim_struct *bc_struct; + u32 offs, size; + + err = asd_find_dir_entry(dir, OCM_BIOS_CHIM_DE, &offs, &size); + if (err) { + ASD_DPRINTK("couldn't find BIOS_CHIM dir ent\n"); + goto out; + } + err = -ENOMEM; + bc_struct = kmalloc(sizeof(*bc_struct), GFP_KERNEL); + if (!bc_struct) { + asd_printk("no memory for bios_chim struct\n"); + goto out; + } + err = asd_read_ocm_seg(asd_ha, (void *)bc_struct, offs, + sizeof(*bc_struct)); + if (err) { + ASD_DPRINTK("couldn't read ocm segment\n"); + goto out2; + } + if (strncmp(bc_struct->sig, "SOIB", 4) + && strncmp(bc_struct->sig, "IPSA", 4)) { + ASD_DPRINTK("BIOS_CHIM entry has no valid sig(%c%c%c%c)\n", + bc_struct->sig[0], bc_struct->sig[1], + bc_struct->sig[2], bc_struct->sig[3]); + err = -ENOENT; + goto out2; + } + if (bc_struct->major != 1) { + asd_printk("BIOS_CHIM unsupported major version:0x%x\n", + bc_struct->major); + err = -ENOENT; + goto out2; + } + if (bc_struct->flags & BC_BIOS_PRESENT) { + asd_ha->hw_prof.bios.present = 1; + asd_ha->hw_prof.bios.maj = bc_struct->bios_major; + asd_ha->hw_prof.bios.min = bc_struct->bios_minor; + asd_ha->hw_prof.bios.bld = le32_to_cpu(bc_struct->bios_build); + ASD_DPRINTK("BIOS present (%d,%d), %d\n", + asd_ha->hw_prof.bios.maj, + asd_ha->hw_prof.bios.min, + asd_ha->hw_prof.bios.bld); + } + asd_ha->hw_prof.ue.num = le16_to_cpu(bc_struct->ue_num); + asd_ha->hw_prof.ue.size= le16_to_cpu(bc_struct->ue_size); + ASD_DPRINTK("ue num:%d, ue size:%d\n", asd_ha->hw_prof.ue.num, + asd_ha->hw_prof.ue.size); + size = asd_ha->hw_prof.ue.num * asd_ha->hw_prof.ue.size; + if (size > 0) { + err = -ENOMEM; + asd_ha->hw_prof.ue.area = kmalloc(size, GFP_KERNEL); + if (!asd_ha->hw_prof.ue.area) + goto out2; + err = asd_read_ocm_seg(asd_ha, (void *)asd_ha->hw_prof.ue.area, + offs + sizeof(*bc_struct), size); + if (err) { + kfree(asd_ha->hw_prof.ue.area); + asd_ha->hw_prof.ue.area = NULL; + asd_ha->hw_prof.ue.num = 0; + asd_ha->hw_prof.ue.size = 0; + ASD_DPRINTK("couldn't read ue entries(%d)\n", err); + } + } +out2: + kfree(bc_struct); +out: + return err; +} + +static void +asd_hwi_initialize_ocm_dir (struct asd_ha_struct *asd_ha) +{ + int i; + + /* Zero OCM */ + for (i = 0; i < OCM_MAX_SIZE; i += 4) + asd_write_ocm_dword(asd_ha, i, 0); + + /* Write Dir */ + asd_write_ocm_seg(asd_ha, &OCMDirInit, 0, + sizeof(struct asd_ocm_dir)); + + /* Write Dir Entries */ + for (i = 0; i < OCM_INIT_DIR_ENTRIES; i++) + asd_write_ocm_seg(asd_ha, &OCMDirEntriesInit[i], + sizeof(struct asd_ocm_dir) + + (i * sizeof(struct asd_ocm_dir_ent)) + , sizeof(struct asd_ocm_dir_ent)); + +} + +static int +asd_hwi_check_ocm_access (struct asd_ha_struct *asd_ha) +{ + struct pci_dev *pcidev = asd_ha->pcidev; + u32 reg; + int err = 0; + u32 v; + + /* check if OCM has been initialized by BIOS */ + reg = asd_read_reg_dword(asd_ha, EXSICNFGR); + + if (!(reg & OCMINITIALIZED)) { + err = pci_read_config_dword(pcidev, PCIC_INTRPT_STAT, &v); + if (err) { + asd_printk("couldn't access PCIC_INTRPT_STAT of %s\n", + pci_name(pcidev)); + goto out; + } + + printk(KERN_INFO "OCM is not initialized by BIOS," + "reinitialize it and ignore it, current IntrptStatus" + "is 0x%x\n", v); + + if (v) + err = pci_write_config_dword(pcidev, + PCIC_INTRPT_STAT, v); + if (err) { + asd_printk("couldn't write PCIC_INTRPT_STAT of %s\n", + pci_name(pcidev)); + goto out; + } + + asd_hwi_initialize_ocm_dir(asd_ha); + + } +out: + return err; +} + +/** + * asd_read_ocm - read on chip memory (OCM) + * @asd_ha: pointer to the host adapter structure + */ +int asd_read_ocm(struct asd_ha_struct *asd_ha) +{ + int err; + struct asd_ocm_dir *dir; + + if (asd_hwi_check_ocm_access(asd_ha)) + return -1; + + dir = kmalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) { + asd_printk("no memory for ocm dir\n"); + return -ENOMEM; + } + + err = asd_read_ocm_dir(asd_ha, dir, 0); + if (err) + goto out; + + err = asd_get_bios_chim(asd_ha, dir); +out: + kfree(dir); + return err; +} + +/* ---------- FLASH stuff ---------- */ + +#define FLASH_RESET 0xF0 +#define FLASH_MANUF_AMD 1 + +#define FLASH_SIZE 0x200000 +#define FLASH_DIR_COOKIE "*** ADAPTEC FLASH DIRECTORY *** " +#define FLASH_NEXT_ENTRY_OFFS 0x2000 +#define FLASH_MAX_DIR_ENTRIES 32 + +#define FLASH_DE_TYPE_MASK 0x3FFFFFFF +#define FLASH_DE_MS 0x120 +#define FLASH_DE_CTRL_A_USER 0xE0 + +struct asd_flash_de { + __le32 type; + __le32 offs; + __le32 pad_size; + __le32 image_size; + __le32 chksum; + u8 _r[12]; + u8 version[32]; +} __attribute__ ((packed)); + +struct asd_flash_dir { + u8 cookie[32]; + __le32 rev; /* 2 */ + __le32 chksum; + __le32 chksum_antidote; + __le32 bld; + u8 bld_id[32]; /* build id data */ + u8 ver_data[32]; /* date and time of build */ + __le32 ae_mask; + __le32 v_mask; + __le32 oc_mask; + u8 _r[20]; + struct asd_flash_de dir_entry[FLASH_MAX_DIR_ENTRIES]; +} __attribute__ ((packed)); + +struct asd_manuf_sec { + char sig[2]; /* 'S', 'M' */ + u16 offs_next; + u8 maj; /* 0 */ + u8 min; /* 0 */ + u16 chksum; + u16 size; + u8 _r[6]; + u8 sas_addr[SAS_ADDR_SIZE]; + u8 pcba_sn[ASD_PCBA_SN_SIZE]; + /* Here start the other segments */ + u8 linked_list[0]; +} __attribute__ ((packed)); + +struct asd_manuf_phy_desc { + u8 state; /* low 4 bits */ +#define MS_PHY_STATE_ENABLEABLE 0 +#define MS_PHY_STATE_REPORTED 1 +#define MS_PHY_STATE_HIDDEN 2 + u8 phy_id; + u16 _r; + u8 phy_control_0; /* mode 5 reg 0x160 */ + u8 phy_control_1; /* mode 5 reg 0x161 */ + u8 phy_control_2; /* mode 5 reg 0x162 */ + u8 phy_control_3; /* mode 5 reg 0x163 */ +} __attribute__ ((packed)); + +struct asd_manuf_phy_param { + char sig[2]; /* 'P', 'M' */ + u16 next; + u8 maj; /* 0 */ + u8 min; /* 2 */ + u8 num_phy_desc; /* 8 */ + u8 phy_desc_size; /* 8 */ + u8 _r[3]; + u8 usage_model_id; + u32 _r2; + struct asd_manuf_phy_desc phy_desc[ASD_MAX_PHYS]; +} __attribute__ ((packed)); + +#if 0 +static const char *asd_sb_type[] = { + "unknown", + "SGPIO", + [2 ... 0x7F] = "unknown", + [0x80] = "ADPT_I2C", + [0x81 ... 0xFF] = "VENDOR_UNIQUExx" +}; +#endif + +struct asd_ms_sb_desc { + u8 type; + u8 node_desc_index; + u8 conn_desc_index; + u8 _recvd[0]; +} __attribute__ ((packed)); + +#if 0 +static const char *asd_conn_type[] = { + [0 ... 7] = "unknown", + "SFF8470", + "SFF8482", + "SFF8484", + [0x80] = "PCIX_DAUGHTER0", + [0x81] = "SAS_DAUGHTER0", + [0x82 ... 0xFF] = "VENDOR_UNIQUExx" +}; + +static const char *asd_conn_location[] = { + "unknown", + "internal", + "external", + "board_to_board", +}; +#endif + +struct asd_ms_conn_desc { + u8 type; + u8 location; + u8 num_sideband_desc; + u8 size_sideband_desc; + u32 _resvd; + u8 name[16]; + struct asd_ms_sb_desc sb_desc[0]; +} __attribute__ ((packed)); + +struct asd_nd_phy_desc { + u8 vp_attch_type; + u8 attch_specific[0]; +} __attribute__ ((packed)); + +#if 0 +static const char *asd_node_type[] = { + "IOP", + "IO_CONTROLLER", + "EXPANDER", + "PORT_MULTIPLIER", + "PORT_MULTIPLEXER", + "MULTI_DROP_I2C_BUS", +}; +#endif + +struct asd_ms_node_desc { + u8 type; + u8 num_phy_desc; + u8 size_phy_desc; + u8 _resvd; + u8 name[16]; + struct asd_nd_phy_desc phy_desc[0]; +} __attribute__ ((packed)); + +struct asd_ms_conn_map { + char sig[2]; /* 'M', 'C' */ + __le16 next; + u8 maj; /* 0 */ + u8 min; /* 0 */ + __le16 cm_size; /* size of this struct */ + u8 num_conn; + u8 conn_size; + u8 num_nodes; + u8 usage_model_id; + u32 _resvd; + struct asd_ms_conn_desc conn_desc[0]; + struct asd_ms_node_desc node_desc[0]; +} __attribute__ ((packed)); + +struct asd_ctrla_phy_entry { + u8 sas_addr[SAS_ADDR_SIZE]; + u8 sas_link_rates; /* max in hi bits, min in low bits */ + u8 flags; + u8 sata_link_rates; + u8 _r[5]; +} __attribute__ ((packed)); + +struct asd_ctrla_phy_settings { + u8 id0; /* P'h'y */ + u8 _r; + u16 next; + u8 num_phys; /* number of PHYs in the PCI function */ + u8 _r2[3]; + struct asd_ctrla_phy_entry phy_ent[ASD_MAX_PHYS]; +} __attribute__ ((packed)); + +struct asd_ll_el { + u8 id0; + u8 id1; + __le16 next; + u8 something_here[0]; +} __attribute__ ((packed)); + +static int asd_poll_flash(struct asd_ha_struct *asd_ha) +{ + int c; + u8 d; + + for (c = 5000; c > 0; c--) { + d = asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar); + d ^= asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar); + if (!d) + return 0; + udelay(5); + } + return -ENOENT; +} + +static int asd_reset_flash(struct asd_ha_struct *asd_ha) +{ + int err; + + err = asd_poll_flash(asd_ha); + if (err) + return err; + asd_write_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar, FLASH_RESET); + err = asd_poll_flash(asd_ha); + + return err; +} + +static inline int asd_read_flash_seg(struct asd_ha_struct *asd_ha, + void *buffer, u32 offs, int size) +{ + asd_read_reg_string(asd_ha, buffer, asd_ha->hw_prof.flash.bar+offs, + size); + return 0; +} + +/** + * asd_find_flash_dir - finds and reads the flash directory + * @asd_ha: pointer to the host adapter structure + * @flash_dir: pointer to flash directory structure + * + * If found, the flash directory segment will be copied to + * @flash_dir. Return 1 if found, 0 if not. + */ +static int asd_find_flash_dir(struct asd_ha_struct *asd_ha, + struct asd_flash_dir *flash_dir) +{ + u32 v; + for (v = 0; v < FLASH_SIZE; v += FLASH_NEXT_ENTRY_OFFS) { + asd_read_flash_seg(asd_ha, flash_dir, v, + sizeof(FLASH_DIR_COOKIE)-1); + if (memcmp(flash_dir->cookie, FLASH_DIR_COOKIE, + sizeof(FLASH_DIR_COOKIE)-1) == 0) { + asd_ha->hw_prof.flash.dir_offs = v; + asd_read_flash_seg(asd_ha, flash_dir, v, + sizeof(*flash_dir)); + return 1; + } + } + return 0; +} + +static int asd_flash_getid(struct asd_ha_struct *asd_ha) +{ + int err = 0; + u32 reg, inc; + + reg = asd_read_reg_dword(asd_ha, EXSICNFGR); + + if (!(reg & FLASHEX)) { + ASD_DPRINTK("flash doesn't exist\n"); + return -ENOENT; + } + if (pci_read_config_dword(asd_ha->pcidev, PCI_CONF_FLSH_BAR, + &asd_ha->hw_prof.flash.bar)) { + asd_printk("couldn't read PCI_CONF_FLSH_BAR of %s\n", + pci_name(asd_ha->pcidev)); + return -ENOENT; + } + asd_ha->hw_prof.flash.present = 1; + asd_ha->hw_prof.flash.wide = reg & FLASHW ? 1 : 0; + err = asd_reset_flash(asd_ha); + if (err) { + ASD_DPRINTK("couldn't reset flash(%d)\n", err); + return err; + } + /* Get flash info. This would most likely be AMD Am29LV family flash. + * First try the sequence for word mode. It is the same as for + * 008B (byte mode only), 160B (word mode) and 800D (word mode). + */ + reg = asd_ha->hw_prof.flash.bar; + inc = asd_ha->hw_prof.flash.wide ? 2 : 1; + asd_write_reg_byte(asd_ha, reg + 0x555, 0xAA); + asd_write_reg_byte(asd_ha, reg + 0x2AA, 0x55); + asd_write_reg_byte(asd_ha, reg + 0x555, 0x90); + asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg); + asd_ha->hw_prof.flash.dev_id= asd_read_reg_byte(asd_ha,reg+inc); + asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha,reg+inc+inc); + /* Get out of autoselect mode. */ + err = asd_reset_flash(asd_ha); + + if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) { + ASD_DPRINTK("0Found FLASH(%d) manuf:%d, dev_id:0x%x, " + "sec_prot:%d\n", + asd_ha->hw_prof.flash.wide ? 16 : 8, + asd_ha->hw_prof.flash.manuf, + asd_ha->hw_prof.flash.dev_id, + asd_ha->hw_prof.flash.sec_prot); + return 0; + } + + /* Ok, try the sequence for byte mode of 160B and 800D. + * We may actually never need this. + */ + asd_write_reg_byte(asd_ha, reg + 0xAAA, 0xAA); + asd_write_reg_byte(asd_ha, reg + 0x555, 0x55); + asd_write_reg_byte(asd_ha, reg + 0xAAA, 0x90); + asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg); + asd_ha->hw_prof.flash.dev_id = asd_read_reg_byte(asd_ha, reg + 2); + asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha, reg + 4); + err = asd_reset_flash(asd_ha); + + if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) { + ASD_DPRINTK("1Found FLASH(%d) manuf:%d, dev_id:0x%x, " + "sec_prot:%d\n", + asd_ha->hw_prof.flash.wide ? 16 : 8, + asd_ha->hw_prof.flash.manuf, + asd_ha->hw_prof.flash.dev_id, + asd_ha->hw_prof.flash.sec_prot); + return 0; + } + + return -ENOENT; +} + +static u16 asd_calc_flash_chksum(u16 *p, int size) +{ + u16 chksum = 0; + + while (size-- > 0) + chksum += *p++; + + return chksum; +} + + +static int asd_find_flash_de(struct asd_flash_dir *flash_dir, u32 entry_type, + u32 *offs, u32 *size) +{ + int i; + struct asd_flash_de *de; + + for (i = 0; i < FLASH_MAX_DIR_ENTRIES; i++) { + u32 type = le32_to_cpu(flash_dir->dir_entry[i].type); + + type &= FLASH_DE_TYPE_MASK; + if (type == entry_type) + break; + } + if (i >= FLASH_MAX_DIR_ENTRIES) + return -ENOENT; + de = &flash_dir->dir_entry[i]; + *offs = le32_to_cpu(de->offs); + *size = le32_to_cpu(de->pad_size); + return 0; +} + +static int asd_validate_ms(struct asd_manuf_sec *ms) +{ + if (ms->sig[0] != 'S' || ms->sig[1] != 'M') { + ASD_DPRINTK("manuf sec: no valid sig(%c%c)\n", + ms->sig[0], ms->sig[1]); + return -ENOENT; + } + if (ms->maj != 0) { + asd_printk("unsupported manuf. sector. major version:%x\n", + ms->maj); + return -ENOENT; + } + ms->offs_next = le16_to_cpu((__force __le16) ms->offs_next); + ms->chksum = le16_to_cpu((__force __le16) ms->chksum); + ms->size = le16_to_cpu((__force __le16) ms->size); + + if (asd_calc_flash_chksum((u16 *)ms, ms->size/2)) { + asd_printk("failed manuf sector checksum\n"); + } + + return 0; +} + +static int asd_ms_get_sas_addr(struct asd_ha_struct *asd_ha, + struct asd_manuf_sec *ms) +{ + memcpy(asd_ha->hw_prof.sas_addr, ms->sas_addr, SAS_ADDR_SIZE); + return 0; +} + +static int asd_ms_get_pcba_sn(struct asd_ha_struct *asd_ha, + struct asd_manuf_sec *ms) +{ + memcpy(asd_ha->hw_prof.pcba_sn, ms->pcba_sn, ASD_PCBA_SN_SIZE); + asd_ha->hw_prof.pcba_sn[ASD_PCBA_SN_SIZE] = '\0'; + return 0; +} + +/** + * asd_find_ll_by_id - find a linked list entry by its id + * @start: void pointer to the first element in the linked list + * @id0: the first byte of the id (offs 0) + * @id1: the second byte of the id (offs 1) + * + * @start has to be the _base_ element start, since the + * linked list entries's offset is from this pointer. + * Some linked list entries use only the first id, in which case + * you can pass 0xFF for the second. + */ +static void *asd_find_ll_by_id(void * const start, const u8 id0, const u8 id1) +{ + struct asd_ll_el *el = start; + + do { + switch (id1) { + default: + if (el->id1 == id1) + case 0xFF: + if (el->id0 == id0) + return el; + } + el = start + le16_to_cpu(el->next); + } while (el != start); + + return NULL; +} + +/** + * asd_ms_get_phy_params - get phy parameters from the manufacturing sector + * @asd_ha: pointer to the host adapter structure + * @manuf_sec: pointer to the manufacturing sector + * + * The manufacturing sector contans also the linked list of sub-segments, + * since when it was read, its size was taken from the flash directory, + * not from the structure size. + * + * HIDDEN phys do not count in the total count. REPORTED phys cannot + * be enabled but are reported and counted towards the total. + * ENEBLEABLE phys are enabled by default and count towards the total. + * The absolute total phy number is ASD_MAX_PHYS. hw_prof->num_phys + * merely specifies the number of phys the host adapter decided to + * report. E.g., it is possible for phys 0, 1 and 2 to be HIDDEN, + * phys 3, 4 and 5 to be REPORTED and phys 6 and 7 to be ENEBLEABLE. + * In this case ASD_MAX_PHYS is 8, hw_prof->num_phys is 5, and only 2 + * are actually enabled (enabled by default, max number of phys + * enableable in this case). + */ +static int asd_ms_get_phy_params(struct asd_ha_struct *asd_ha, + struct asd_manuf_sec *manuf_sec) +{ + int i; + int en_phys = 0; + int rep_phys = 0; + struct asd_manuf_phy_param *phy_param; + struct asd_manuf_phy_param dflt_phy_param; + + phy_param = asd_find_ll_by_id(manuf_sec, 'P', 'M'); + if (!phy_param) { + ASD_DPRINTK("ms: no phy parameters found\n"); + ASD_DPRINTK("ms: Creating default phy parameters\n"); + dflt_phy_param.sig[0] = 'P'; + dflt_phy_param.sig[1] = 'M'; + dflt_phy_param.maj = 0; + dflt_phy_param.min = 2; + dflt_phy_param.num_phy_desc = 8; + dflt_phy_param.phy_desc_size = sizeof(struct asd_manuf_phy_desc); + for (i =0; i < ASD_MAX_PHYS; i++) { + dflt_phy_param.phy_desc[i].state = 0; + dflt_phy_param.phy_desc[i].phy_id = i; + dflt_phy_param.phy_desc[i].phy_control_0 = 0xf6; + dflt_phy_param.phy_desc[i].phy_control_1 = 0x10; + dflt_phy_param.phy_desc[i].phy_control_2 = 0x43; + dflt_phy_param.phy_desc[i].phy_control_3 = 0xeb; + } + + phy_param = &dflt_phy_param; + + } + + if (phy_param->maj != 0) { + asd_printk("unsupported manuf. phy param major version:0x%x\n", + phy_param->maj); + return -ENOENT; + } + + ASD_DPRINTK("ms: num_phy_desc: %d\n", phy_param->num_phy_desc); + asd_ha->hw_prof.enabled_phys = 0; + for (i = 0; i < phy_param->num_phy_desc; i++) { + struct asd_manuf_phy_desc *pd = &phy_param->phy_desc[i]; + switch (pd->state & 0xF) { + case MS_PHY_STATE_HIDDEN: + ASD_DPRINTK("ms: phy%d: HIDDEN\n", i); + continue; + case MS_PHY_STATE_REPORTED: + ASD_DPRINTK("ms: phy%d: REPORTED\n", i); + asd_ha->hw_prof.enabled_phys &= ~(1 << i); + rep_phys++; + continue; + case MS_PHY_STATE_ENABLEABLE: + ASD_DPRINTK("ms: phy%d: ENEBLEABLE\n", i); + asd_ha->hw_prof.enabled_phys |= (1 << i); + en_phys++; + break; + } + asd_ha->hw_prof.phy_desc[i].phy_control_0 = pd->phy_control_0; + asd_ha->hw_prof.phy_desc[i].phy_control_1 = pd->phy_control_1; + asd_ha->hw_prof.phy_desc[i].phy_control_2 = pd->phy_control_2; + asd_ha->hw_prof.phy_desc[i].phy_control_3 = pd->phy_control_3; + } + asd_ha->hw_prof.max_phys = rep_phys + en_phys; + asd_ha->hw_prof.num_phys = en_phys; + ASD_DPRINTK("ms: max_phys:0x%x, num_phys:0x%x\n", + asd_ha->hw_prof.max_phys, asd_ha->hw_prof.num_phys); + ASD_DPRINTK("ms: enabled_phys:0x%x\n", asd_ha->hw_prof.enabled_phys); + return 0; +} + +static int asd_ms_get_connector_map(struct asd_ha_struct *asd_ha, + struct asd_manuf_sec *manuf_sec) +{ + struct asd_ms_conn_map *cm; + + cm = asd_find_ll_by_id(manuf_sec, 'M', 'C'); + if (!cm) { + ASD_DPRINTK("ms: no connector map found\n"); + return 0; + } + + if (cm->maj != 0) { + ASD_DPRINTK("ms: unsupported: connector map major version 0x%x" + "\n", cm->maj); + return -ENOENT; + } + + /* XXX */ + + return 0; +} + + +/** + * asd_process_ms - find and extract information from the manufacturing sector + * @asd_ha: pointer to the host adapter structure + * @flash_dir: pointer to the flash directory + */ +static int asd_process_ms(struct asd_ha_struct *asd_ha, + struct asd_flash_dir *flash_dir) +{ + int err; + struct asd_manuf_sec *manuf_sec; + u32 offs, size; + + err = asd_find_flash_de(flash_dir, FLASH_DE_MS, &offs, &size); + if (err) { + ASD_DPRINTK("Couldn't find the manuf. sector\n"); + goto out; + } + + if (size == 0) + goto out; + + err = -ENOMEM; + manuf_sec = kmalloc(size, GFP_KERNEL); + if (!manuf_sec) { + ASD_DPRINTK("no mem for manuf sector\n"); + goto out; + } + + err = asd_read_flash_seg(asd_ha, (void *)manuf_sec, offs, size); + if (err) { + ASD_DPRINTK("couldn't read manuf sector at 0x%x, size 0x%x\n", + offs, size); + goto out2; + } + + err = asd_validate_ms(manuf_sec); + if (err) { + ASD_DPRINTK("couldn't validate manuf sector\n"); + goto out2; + } + + err = asd_ms_get_sas_addr(asd_ha, manuf_sec); + if (err) { + ASD_DPRINTK("couldn't read the SAS_ADDR\n"); + goto out2; + } + ASD_DPRINTK("manuf sect SAS_ADDR %llx\n", + SAS_ADDR(asd_ha->hw_prof.sas_addr)); + + err = asd_ms_get_pcba_sn(asd_ha, manuf_sec); + if (err) { + ASD_DPRINTK("couldn't read the PCBA SN\n"); + goto out2; + } + ASD_DPRINTK("manuf sect PCBA SN %s\n", asd_ha->hw_prof.pcba_sn); + + err = asd_ms_get_phy_params(asd_ha, manuf_sec); + if (err) { + ASD_DPRINTK("ms: couldn't get phy parameters\n"); + goto out2; + } + + err = asd_ms_get_connector_map(asd_ha, manuf_sec); + if (err) { + ASD_DPRINTK("ms: couldn't get connector map\n"); + goto out2; + } + +out2: + kfree(manuf_sec); +out: + return err; +} + +static int asd_process_ctrla_phy_settings(struct asd_ha_struct *asd_ha, + struct asd_ctrla_phy_settings *ps) +{ + int i; + for (i = 0; i < ps->num_phys; i++) { + struct asd_ctrla_phy_entry *pe = &ps->phy_ent[i]; + + if (!PHY_ENABLED(asd_ha, i)) + continue; + if (*(u64 *)pe->sas_addr == 0) { + asd_ha->hw_prof.enabled_phys &= ~(1 << i); + continue; + } + /* This is the SAS address which should be sent in IDENTIFY. */ + memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr, pe->sas_addr, + SAS_ADDR_SIZE); + asd_ha->hw_prof.phy_desc[i].max_sas_lrate = + (pe->sas_link_rates & 0xF0) >> 4; + asd_ha->hw_prof.phy_desc[i].min_sas_lrate = + (pe->sas_link_rates & 0x0F); + asd_ha->hw_prof.phy_desc[i].max_sata_lrate = + (pe->sata_link_rates & 0xF0) >> 4; + asd_ha->hw_prof.phy_desc[i].min_sata_lrate = + (pe->sata_link_rates & 0x0F); + asd_ha->hw_prof.phy_desc[i].flags = pe->flags; + ASD_DPRINTK("ctrla: phy%d: sas_addr: %llx, sas rate:0x%x-0x%x," + " sata rate:0x%x-0x%x, flags:0x%x\n", + i, + SAS_ADDR(asd_ha->hw_prof.phy_desc[i].sas_addr), + asd_ha->hw_prof.phy_desc[i].max_sas_lrate, + asd_ha->hw_prof.phy_desc[i].min_sas_lrate, + asd_ha->hw_prof.phy_desc[i].max_sata_lrate, + asd_ha->hw_prof.phy_desc[i].min_sata_lrate, + asd_ha->hw_prof.phy_desc[i].flags); + } + + return 0; +} + +/** + * asd_process_ctrl_a_user - process CTRL-A user settings + * @asd_ha: pointer to the host adapter structure + * @flash_dir: pointer to the flash directory + */ +static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha, + struct asd_flash_dir *flash_dir) +{ + int err, i; + u32 offs, size; + struct asd_ll_el *el; + struct asd_ctrla_phy_settings *ps; + struct asd_ctrla_phy_settings dflt_ps; + + err = asd_find_flash_de(flash_dir, FLASH_DE_CTRL_A_USER, &offs, &size); + if (err) { + ASD_DPRINTK("couldn't find CTRL-A user settings section\n"); + ASD_DPRINTK("Creating default CTRL-A user settings section\n"); + + dflt_ps.id0 = 'h'; + dflt_ps.num_phys = 8; + for (i =0; i < ASD_MAX_PHYS; i++) { + memcpy(dflt_ps.phy_ent[i].sas_addr, + asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE); + dflt_ps.phy_ent[i].sas_link_rates = 0x98; + dflt_ps.phy_ent[i].flags = 0x0; + dflt_ps.phy_ent[i].sata_link_rates = 0x0; + } + + size = sizeof(struct asd_ctrla_phy_settings); + ps = &dflt_ps; + } + + if (size == 0) + goto out; + + err = -ENOMEM; + el = kmalloc(size, GFP_KERNEL); + if (!el) { + ASD_DPRINTK("no mem for ctrla user settings section\n"); + goto out; + } + + err = asd_read_flash_seg(asd_ha, (void *)el, offs, size); + if (err) { + ASD_DPRINTK("couldn't read ctrla phy settings section\n"); + goto out2; + } + + err = -ENOENT; + ps = asd_find_ll_by_id(el, 'h', 0xFF); + if (!ps) { + ASD_DPRINTK("couldn't find ctrla phy settings struct\n"); + goto out2; + } + + err = asd_process_ctrla_phy_settings(asd_ha, ps); + if (err) { + ASD_DPRINTK("couldn't process ctrla phy settings\n"); + goto out2; + } +out2: + kfree(el); +out: + return err; +} + +/** + * asd_read_flash - read flash memory + * @asd_ha: pointer to the host adapter structure + */ +int asd_read_flash(struct asd_ha_struct *asd_ha) +{ + int err; + struct asd_flash_dir *flash_dir; + + err = asd_flash_getid(asd_ha); + if (err) + return err; + + flash_dir = kmalloc(sizeof(*flash_dir), GFP_KERNEL); + if (!flash_dir) + return -ENOMEM; + + err = -ENOENT; + if (!asd_find_flash_dir(asd_ha, flash_dir)) { + ASD_DPRINTK("couldn't find flash directory\n"); + goto out; + } + + if (le32_to_cpu(flash_dir->rev) != 2) { + asd_printk("unsupported flash dir version:0x%x\n", + le32_to_cpu(flash_dir->rev)); + goto out; + } + + err = asd_process_ms(asd_ha, flash_dir); + if (err) { + ASD_DPRINTK("couldn't process manuf sector settings\n"); + goto out; + } + + err = asd_process_ctrl_a_user(asd_ha, flash_dir); + if (err) { + ASD_DPRINTK("couldn't process CTRL-A user settings\n"); + goto out; + } + +out: + kfree(flash_dir); + return err; +} diff --git a/drivers/scsi/aic94xx/aic94xx_seq.c b/drivers/scsi/aic94xx/aic94xx_seq.c new file mode 100644 index 000000000000..9050c6f3f6bd --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_seq.c @@ -0,0 +1,1401 @@ +/* + * Aic94xx SAS/SATA driver sequencer interface. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * Parts of this code adapted from David Chaw's adp94xx_seq.c. + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include "aic94xx_reg.h" +#include "aic94xx_hwi.h" + +#include "aic94xx_seq.h" +#include "aic94xx_dump.h" + +/* It takes no more than 0.05 us for an instruction + * to complete. So waiting for 1 us should be more than + * plenty. + */ +#define PAUSE_DELAY 1 +#define PAUSE_TRIES 1000 + +static const struct firmware *sequencer_fw; +static const char *sequencer_version; +static u16 cseq_vecs[CSEQ_NUM_VECS], lseq_vecs[LSEQ_NUM_VECS], mode2_task, + cseq_idle_loop, lseq_idle_loop; +static u8 *cseq_code, *lseq_code; +static u32 cseq_code_size, lseq_code_size; + +static u16 first_scb_site_no = 0xFFFF; +static u16 last_scb_site_no; + +/* ---------- Pause/Unpause CSEQ/LSEQ ---------- */ + +/** + * asd_pause_cseq - pause the central sequencer + * @asd_ha: pointer to host adapter structure + * + * Return 0 on success, negative on failure. + */ +int asd_pause_cseq(struct asd_ha_struct *asd_ha) +{ + int count = PAUSE_TRIES; + u32 arp2ctl; + + arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL); + if (arp2ctl & PAUSED) + return 0; + + asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl | EPAUSE); + do { + arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL); + if (arp2ctl & PAUSED) + return 0; + udelay(PAUSE_DELAY); + } while (--count > 0); + + ASD_DPRINTK("couldn't pause CSEQ\n"); + return -1; +} + +/** + * asd_unpause_cseq - unpause the central sequencer. + * @asd_ha: pointer to host adapter structure. + * + * Return 0 on success, negative on error. + */ +int asd_unpause_cseq(struct asd_ha_struct *asd_ha) +{ + u32 arp2ctl; + int count = PAUSE_TRIES; + + arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL); + if (!(arp2ctl & PAUSED)) + return 0; + + asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl & ~EPAUSE); + do { + arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL); + if (!(arp2ctl & PAUSED)) + return 0; + udelay(PAUSE_DELAY); + } while (--count > 0); + + ASD_DPRINTK("couldn't unpause the CSEQ\n"); + return -1; +} + +/** + * asd_seq_pause_lseq - pause a link sequencer + * @asd_ha: pointer to a host adapter structure + * @lseq: link sequencer of interest + * + * Return 0 on success, negative on error. + */ +static inline int asd_seq_pause_lseq(struct asd_ha_struct *asd_ha, int lseq) +{ + u32 arp2ctl; + int count = PAUSE_TRIES; + + arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq)); + if (arp2ctl & PAUSED) + return 0; + + asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl | EPAUSE); + do { + arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq)); + if (arp2ctl & PAUSED) + return 0; + udelay(PAUSE_DELAY); + } while (--count > 0); + + ASD_DPRINTK("couldn't pause LSEQ %d\n", lseq); + return -1; +} + +/** + * asd_pause_lseq - pause the link sequencer(s) + * @asd_ha: pointer to host adapter structure + * @lseq_mask: mask of link sequencers of interest + * + * Return 0 on success, negative on failure. + */ +int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask) +{ + int lseq; + int err = 0; + + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + err = asd_seq_pause_lseq(asd_ha, lseq); + if (err) + return err; + } + + return err; +} + +/** + * asd_seq_unpause_lseq - unpause a link sequencer + * @asd_ha: pointer to host adapter structure + * @lseq: link sequencer of interest + * + * Return 0 on success, negative on error. + */ +static inline int asd_seq_unpause_lseq(struct asd_ha_struct *asd_ha, int lseq) +{ + u32 arp2ctl; + int count = PAUSE_TRIES; + + arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq)); + if (!(arp2ctl & PAUSED)) + return 0; + + asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl & ~EPAUSE); + do { + arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq)); + if (!(arp2ctl & PAUSED)) + return 0; + udelay(PAUSE_DELAY); + } while (--count > 0); + + ASD_DPRINTK("couldn't unpause LSEQ %d\n", lseq); + return 0; +} + + +/** + * asd_unpause_lseq - unpause the link sequencer(s) + * @asd_ha: pointer to host adapter structure + * @lseq_mask: mask of link sequencers of interest + * + * Return 0 on success, negative on failure. + */ +int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask) +{ + int lseq; + int err = 0; + + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + err = asd_seq_unpause_lseq(asd_ha, lseq); + if (err) + return err; + } + + return err; +} + +/* ---------- Downloading CSEQ/LSEQ microcode ---------- */ + +static int asd_verify_cseq(struct asd_ha_struct *asd_ha, const u8 *_prog, + u32 size) +{ + u32 addr = CSEQ_RAM_REG_BASE_ADR; + const u32 *prog = (u32 *) _prog; + u32 i; + + for (i = 0; i < size; i += 4, prog++, addr += 4) { + u32 val = asd_read_reg_dword(asd_ha, addr); + + if (le32_to_cpu(*prog) != val) { + asd_printk("%s: cseq verify failed at %u " + "read:0x%x, wanted:0x%x\n", + pci_name(asd_ha->pcidev), + i, val, le32_to_cpu(*prog)); + return -1; + } + } + ASD_DPRINTK("verified %d bytes, passed\n", size); + return 0; +} + +/** + * asd_verify_lseq - verify the microcode of a link sequencer + * @asd_ha: pointer to host adapter structure + * @_prog: pointer to the microcode + * @size: size of the microcode in bytes + * @lseq: link sequencer of interest + * + * The link sequencer code is accessed in 4 KB pages, which are selected + * by setting LmRAMPAGE (bits 8 and 9) of the LmBISTCTL1 register. + * The 10 KB LSEQm instruction code is mapped, page at a time, at + * LmSEQRAM address. + */ +static int asd_verify_lseq(struct asd_ha_struct *asd_ha, const u8 *_prog, + u32 size, int lseq) +{ +#define LSEQ_CODEPAGE_SIZE 4096 + int pages = (size + LSEQ_CODEPAGE_SIZE - 1) / LSEQ_CODEPAGE_SIZE; + u32 page; + const u32 *prog = (u32 *) _prog; + + for (page = 0; page < pages; page++) { + u32 i; + + asd_write_reg_dword(asd_ha, LmBISTCTL1(lseq), + page << LmRAMPAGE_LSHIFT); + for (i = 0; size > 0 && i < LSEQ_CODEPAGE_SIZE; + i += 4, prog++, size-=4) { + + u32 val = asd_read_reg_dword(asd_ha, LmSEQRAM(lseq)+i); + + if (le32_to_cpu(*prog) != val) { + asd_printk("%s: LSEQ%d verify failed " + "page:%d, offs:%d\n", + pci_name(asd_ha->pcidev), + lseq, page, i); + return -1; + } + } + } + ASD_DPRINTK("LSEQ%d verified %d bytes, passed\n", lseq, + (int)((u8 *)prog-_prog)); + return 0; +} + +/** + * asd_verify_seq -- verify CSEQ/LSEQ microcode + * @asd_ha: pointer to host adapter structure + * @prog: pointer to microcode + * @size: size of the microcode + * @lseq_mask: if 0, verify CSEQ microcode, else mask of LSEQs of interest + * + * Return 0 if microcode is correct, negative on mismatch. + */ +static int asd_verify_seq(struct asd_ha_struct *asd_ha, const u8 *prog, + u32 size, u8 lseq_mask) +{ + if (lseq_mask == 0) + return asd_verify_cseq(asd_ha, prog, size); + else { + int lseq, err; + + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + err = asd_verify_lseq(asd_ha, prog, size, lseq); + if (err) + return err; + } + } + + return 0; +} +#define ASD_DMA_MODE_DOWNLOAD +#ifdef ASD_DMA_MODE_DOWNLOAD +/* This is the size of the CSEQ Mapped instruction page */ +#define MAX_DMA_OVLY_COUNT ((1U << 14)-1) +static int asd_download_seq(struct asd_ha_struct *asd_ha, + const u8 * const prog, u32 size, u8 lseq_mask) +{ + u32 comstaten; + u32 reg; + int page; + const int pages = (size + MAX_DMA_OVLY_COUNT - 1) / MAX_DMA_OVLY_COUNT; + struct asd_dma_tok *token; + int err = 0; + + if (size % 4) { + asd_printk("sequencer program not multiple of 4\n"); + return -1; + } + + asd_pause_cseq(asd_ha); + asd_pause_lseq(asd_ha, 0xFF); + + /* save, disable and clear interrupts */ + comstaten = asd_read_reg_dword(asd_ha, COMSTATEN); + asd_write_reg_dword(asd_ha, COMSTATEN, 0); + asd_write_reg_dword(asd_ha, COMSTAT, COMSTAT_MASK); + + asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN); + asd_write_reg_dword(asd_ha, CHIMINT, CHIMINT_MASK); + + token = asd_alloc_coherent(asd_ha, MAX_DMA_OVLY_COUNT, GFP_KERNEL); + if (!token) { + asd_printk("out of memory for dma SEQ download\n"); + err = -ENOMEM; + goto out; + } + ASD_DPRINTK("dma-ing %d bytes\n", size); + + for (page = 0; page < pages; page++) { + int i; + u32 left = min(size-page*MAX_DMA_OVLY_COUNT, + (u32)MAX_DMA_OVLY_COUNT); + + memcpy(token->vaddr, prog + page*MAX_DMA_OVLY_COUNT, left); + asd_write_reg_addr(asd_ha, OVLYDMAADR, token->dma_handle); + asd_write_reg_dword(asd_ha, OVLYDMACNT, left); + reg = !page ? RESETOVLYDMA : 0; + reg |= (STARTOVLYDMA | OVLYHALTERR); + reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ); + /* Start DMA. */ + asd_write_reg_dword(asd_ha, OVLYDMACTL, reg); + + for (i = PAUSE_TRIES*100; i > 0; i--) { + u32 dmadone = asd_read_reg_dword(asd_ha, OVLYDMACTL); + if (!(dmadone & OVLYDMAACT)) + break; + udelay(PAUSE_DELAY); + } + } + + reg = asd_read_reg_dword(asd_ha, COMSTAT); + if (!(reg & OVLYDMADONE) || (reg & OVLYERR) + || (asd_read_reg_dword(asd_ha, CHIMINT) & DEVEXCEPT_MASK)){ + asd_printk("%s: error DMA-ing sequencer code\n", + pci_name(asd_ha->pcidev)); + err = -ENODEV; + } + + asd_free_coherent(asd_ha, token); + out: + asd_write_reg_dword(asd_ha, COMSTATEN, comstaten); + + return err ? : asd_verify_seq(asd_ha, prog, size, lseq_mask); +} +#else /* ASD_DMA_MODE_DOWNLOAD */ +static int asd_download_seq(struct asd_ha_struct *asd_ha, const u8 *_prog, + u32 size, u8 lseq_mask) +{ + int i; + u32 reg = 0; + const u32 *prog = (u32 *) _prog; + + if (size % 4) { + asd_printk("sequencer program not multiple of 4\n"); + return -1; + } + + asd_pause_cseq(asd_ha); + asd_pause_lseq(asd_ha, 0xFF); + + reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ); + reg |= PIOCMODE; + + asd_write_reg_dword(asd_ha, OVLYDMACNT, size); + asd_write_reg_dword(asd_ha, OVLYDMACTL, reg); + + ASD_DPRINTK("downloading %s sequencer%s in PIO mode...\n", + lseq_mask ? "LSEQ" : "CSEQ", lseq_mask ? "s" : ""); + + for (i = 0; i < size; i += 4, prog++) + asd_write_reg_dword(asd_ha, SPIODATA, *prog); + + reg = (reg & ~PIOCMODE) | OVLYHALTERR; + asd_write_reg_dword(asd_ha, OVLYDMACTL, reg); + + return asd_verify_seq(asd_ha, _prog, size, lseq_mask); +} +#endif /* ASD_DMA_MODE_DOWNLOAD */ + +/** + * asd_seq_download_seqs - download the sequencer microcode + * @asd_ha: pointer to host adapter structure + * + * Download the central and link sequencer microcode. + */ +static int asd_seq_download_seqs(struct asd_ha_struct *asd_ha) +{ + int err; + + if (!asd_ha->hw_prof.enabled_phys) { + asd_printk("%s: no enabled phys!\n", pci_name(asd_ha->pcidev)); + return -ENODEV; + } + + /* Download the CSEQ */ + ASD_DPRINTK("downloading CSEQ...\n"); + err = asd_download_seq(asd_ha, cseq_code, cseq_code_size, 0); + if (err) { + asd_printk("CSEQ download failed:%d\n", err); + return err; + } + + /* Download the Link Sequencers code. All of the Link Sequencers + * microcode can be downloaded at the same time. + */ + ASD_DPRINTK("downloading LSEQs...\n"); + err = asd_download_seq(asd_ha, lseq_code, lseq_code_size, + asd_ha->hw_prof.enabled_phys); + if (err) { + /* Try it one at a time */ + u8 lseq; + u8 lseq_mask = asd_ha->hw_prof.enabled_phys; + + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + err = asd_download_seq(asd_ha, lseq_code, + lseq_code_size, 1<> 8; + asd_write_reg_byte(asd_ha, CSEQ_FREE_SCB_MASK, (u8)cmdctx); + } + asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_HEAD, + first_scb_site_no); + asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_TAIL, + last_scb_site_no); + asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_HEAD, 0xFFFF); + asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_TAIL, 0xFFFF); + + /* CSEQ Mode independent, page 7 setup. */ + asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE, 0); + asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE+4, 0); + asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT, 0); + asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT+4, 0); + asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_HEAD, 0xFFFF); + asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_TAIL, 0xFFFF); + asd_write_reg_word(asd_ha, CSEQ_NEED_EMPTY_SCB, 0); + asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_HEAD, 0); + asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_TAIL, 0); + asd_write_reg_byte(asd_ha, CSEQ_EMPTY_SCB_OFFSET, 0); + asd_write_reg_word(asd_ha, CSEQ_PRIMITIVE_DATA, 0); + asd_write_reg_dword(asd_ha, CSEQ_TIMEOUT_CONST, 0); +} + +/** + * asd_init_cseq_mdp - initialize CSEQ Mode dependent pages + * @asd_ha: pointer to host adapter structure + */ +static void asd_init_cseq_mdp(struct asd_ha_struct *asd_ha) +{ + int i; + int moffs; + + moffs = CSEQ_PAGE_SIZE * 2; + + /* CSEQ Mode dependent, modes 0-7, page 0 setup. */ + for (i = 0; i < 8; i++) { + asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SINDEX, 0); + asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCBPTR, 0); + asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_HEAD, 0xFFFF); + asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_TAIL, 0xFFFF); + asd_write_reg_byte(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCRPAGE, 0); + } + + /* CSEQ Mode dependent, mode 0-7, page 1 and 2 shall be ignored. */ + + /* CSEQ Mode dependent, mode 8, page 0 setup. */ + asd_write_reg_word(asd_ha, CSEQ_RET_ADDR, 0xFFFF); + asd_write_reg_word(asd_ha, CSEQ_RET_SCBPTR, 0); + asd_write_reg_word(asd_ha, CSEQ_SAVE_SCBPTR, 0); + asd_write_reg_word(asd_ha, CSEQ_EMPTY_TRANS_CTX, 0); + asd_write_reg_word(asd_ha, CSEQ_RESP_LEN, 0); + asd_write_reg_word(asd_ha, CSEQ_TMF_SCBPTR, 0); + asd_write_reg_word(asd_ha, CSEQ_GLOBAL_PREV_SCB, 0); + asd_write_reg_word(asd_ha, CSEQ_GLOBAL_HEAD, 0); + asd_write_reg_word(asd_ha, CSEQ_CLEAR_LU_HEAD, 0); + asd_write_reg_byte(asd_ha, CSEQ_TMF_OPCODE, 0); + asd_write_reg_byte(asd_ha, CSEQ_SCRATCH_FLAGS, 0); + asd_write_reg_word(asd_ha, CSEQ_HSB_SITE, 0); + asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_SCB_SITE, + (u16)last_scb_site_no+1); + asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_DDB_SITE, + (u16)asd_ha->hw_prof.max_ddbs); + + /* CSEQ Mode dependent, mode 8, page 1 setup. */ + asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR, 0); + asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR + 4, 0); + asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK, 0); + asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK + 4, 0); + + /* CSEQ Mode dependent, mode 8, page 2 setup. */ + /* Tell the sequencer the bus address of the first SCB. */ + asd_write_reg_addr(asd_ha, CSEQ_HQ_NEW_POINTER, + asd_ha->seq.next_scb.dma_handle); + ASD_DPRINTK("First SCB dma_handle: 0x%llx\n", + (unsigned long long)asd_ha->seq.next_scb.dma_handle); + + /* Tell the sequencer the first Done List entry address. */ + asd_write_reg_addr(asd_ha, CSEQ_HQ_DONE_BASE, + asd_ha->seq.actual_dl->dma_handle); + + /* Initialize the Q_DONE_POINTER with the least significant + * 4 bytes of the first Done List address. */ + asd_write_reg_dword(asd_ha, CSEQ_HQ_DONE_POINTER, + ASD_BUSADDR_LO(asd_ha->seq.actual_dl->dma_handle)); + + asd_write_reg_byte(asd_ha, CSEQ_HQ_DONE_PASS, ASD_DEF_DL_TOGGLE); + + /* CSEQ Mode dependent, mode 8, page 3 shall be ignored. */ +} + +/** + * asd_init_cseq_scratch -- setup and init CSEQ + * @asd_ha: pointer to host adapter structure + * + * Setup and initialize Central sequencers. Initialiaze the mode + * independent and dependent scratch page to the default settings. + */ +static void asd_init_cseq_scratch(struct asd_ha_struct *asd_ha) +{ + asd_init_cseq_mip(asd_ha); + asd_init_cseq_mdp(asd_ha); +} + +/** + * asd_init_lseq_mip -- initialize LSEQ Mode independent pages 0-3 + * @asd_ha: pointer to host adapter structure + */ +static void asd_init_lseq_mip(struct asd_ha_struct *asd_ha, u8 lseq) +{ + int i; + + /* LSEQ Mode independent page 0 setup. */ + asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_HEAD(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_TAIL(lseq), 0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_LINK_NUMBER(lseq), lseq); + asd_write_reg_byte(asd_ha, LmSEQ_SCRATCH_FLAGS(lseq), + ASD_NOTIFY_ENABLE_SPINUP); + asd_write_reg_dword(asd_ha, LmSEQ_CONNECTION_STATE(lseq),0x08000000); + asd_write_reg_word(asd_ha, LmSEQ_CONCTL(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_CONSTAT(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_CONNECTION_MODES(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_REG1_ISR(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_REG2_ISR(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_REG3_ISR(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq)+4, 0); + + /* LSEQ Mode independent page 1 setup. */ + asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR0(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR1(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR2(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR3(lseq), 0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE0(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE1(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE2(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE3(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_HEAD(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_TAIL(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_BUF_AVAIL(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_TIMEOUT_CONST(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_SINDEX(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_DINDEX(lseq), 0); + + /* LSEQ Mode Independent page 2 setup. */ + asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR0(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR1(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR2(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR3(lseq), 0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD0(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD1(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD2(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD3(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_HEAD(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_TAIL(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_BUFS_AVAIL(lseq), 0); + for (i = 0; i < 12; i += 4) + asd_write_reg_dword(asd_ha, LmSEQ_ATA_SCR_REGS(lseq) + i, 0); + + /* LSEQ Mode Independent page 3 setup. */ + + /* Device present timer timeout */ + asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TMR_TOUT_CONST(lseq), + ASD_DEV_PRESENT_TIMEOUT); + + /* SATA interlock timer disabled */ + asd_write_reg_dword(asd_ha, LmSEQ_SATA_INTERLOCK_TIMEOUT(lseq), + ASD_SATA_INTERLOCK_TIMEOUT); + + /* STP shutdown timer timeout constant, IGNORED by the sequencer, + * always 0. */ + asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMEOUT(lseq), + ASD_STP_SHUTDOWN_TIMEOUT); + + asd_write_reg_dword(asd_ha, LmSEQ_SRST_ASSERT_TIMEOUT(lseq), + ASD_SRST_ASSERT_TIMEOUT); + + asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMEOUT(lseq), + ASD_RCV_FIS_TIMEOUT); + + asd_write_reg_dword(asd_ha, LmSEQ_ONE_MILLISEC_TIMEOUT(lseq), + ASD_ONE_MILLISEC_TIMEOUT); + + /* COM_INIT timer */ + asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(lseq), + ASD_TEN_MILLISEC_TIMEOUT); + + asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMEOUT(lseq), + ASD_SMP_RCV_TIMEOUT); +} + +/** + * asd_init_lseq_mdp -- initialize LSEQ mode dependent pages. + * @asd_ha: pointer to host adapter structure + */ +static void asd_init_lseq_mdp(struct asd_ha_struct *asd_ha, int lseq) +{ + int i; + u32 moffs; + u16 ret_addr[] = { + 0xFFFF, /* mode 0 */ + 0xFFFF, /* mode 1 */ + mode2_task, /* mode 2 */ + 0, + 0xFFFF, /* mode 4/5 */ + 0xFFFF, /* mode 4/5 */ + }; + + /* + * Mode 0,1,2 and 4/5 have common field on page 0 for the first + * 14 bytes. + */ + for (i = 0; i < 3; i++) { + moffs = i * LSEQ_MODE_SCRATCH_SIZE; + asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq)+moffs, + ret_addr[i]); + asd_write_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq)+moffs, 0); + asd_write_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq)+moffs, 0); + asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq)+moffs,0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq)+moffs,0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq)+moffs,0); + asd_write_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq)+moffs,0); + } + /* + * Mode 5 page 0 overlaps the same scratch page with Mode 0 page 3. + */ + asd_write_reg_word(asd_ha, + LmSEQ_RET_ADDR(lseq)+LSEQ_MODE5_PAGE0_OFFSET, + ret_addr[5]); + asd_write_reg_word(asd_ha, + LmSEQ_REG0_MODE(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0); + asd_write_reg_word(asd_ha, + LmSEQ_MODE_FLAGS(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0); + asd_write_reg_word(asd_ha, + LmSEQ_RET_ADDR2(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF); + asd_write_reg_word(asd_ha, + LmSEQ_RET_ADDR1(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF); + asd_write_reg_byte(asd_ha, + LmSEQ_OPCODE_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0); + asd_write_reg_word(asd_ha, + LmSEQ_DATA_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0); + + /* LSEQ Mode dependent 0, page 0 setup. */ + asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_DDB_SITE(lseq), + (u16)asd_ha->hw_prof.max_ddbs); + asd_write_reg_word(asd_ha, LmSEQ_EMPTY_TRANS_CTX(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_RESP_LEN(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_SCB_SITE(lseq), + (u16)last_scb_site_no+1); + asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq), + (u16) LmM0INTEN_MASK & 0xFFFF0000 >> 16); + asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq) + 2, + (u16) LmM0INTEN_MASK & 0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_FRM_LEN(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_PROTOCOL(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_RESP_STATUS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_LAST_LOADED_SGE(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_SAVE_SCBPTR(lseq), 0); + + /* LSEQ mode dependent, mode 1, page 0 setup. */ + asd_write_reg_word(asd_ha, LmSEQ_Q_XMIT_HEAD(lseq), 0xFFFF); + asd_write_reg_word(asd_ha, LmSEQ_M1_EMPTY_TRANS_CTX(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_INI_CONN_TAG(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_FAILED_OPEN_STATUS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_XMIT_REQUEST_TYPE(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_M1_RESP_STATUS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_M1_LAST_LOADED_SGE(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_M1_SAVE_SCBPTR(lseq), 0); + + /* LSEQ Mode dependent mode 2, page 0 setup */ + asd_write_reg_word(asd_ha, LmSEQ_PORT_COUNTER(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_PM_TABLE_PTR(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_SATA_INTERLOCK_TMR_SAVE(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_IP_BITL(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_COPY_SMP_CONN_TAG(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_P0M2_OFFS1AH(lseq), 0); + + /* LSEQ Mode dependent, mode 4/5, page 0 setup. */ + asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_STATUS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_MODE(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_Q_LINK_HEAD(lseq), 0xFFFF); + asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_ERR(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_SIGNALS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_SAS_RESET_MODE(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_LINK_RESET_RETRY_COUNT(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_NUM_LINK_RESET_RETRIES(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_OOB_INT_ENABLES(lseq), 0); + /* + * Set the desired interval between transmissions of the NOTIFY + * (ENABLE SPINUP) primitive. Must be initilized to val - 1. + */ + asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_TIMEOUT(lseq), + ASD_NOTIFY_TIMEOUT - 1); + /* No delay for the first NOTIFY to be sent to the attached target. */ + asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_DOWN_COUNT(lseq), + ASD_NOTIFY_DOWN_COUNT); + + /* LSEQ Mode dependent, mode 0 and 1, page 1 setup. */ + for (i = 0; i < 2; i++) { + int j; + /* Start from Page 1 of Mode 0 and 1. */ + moffs = LSEQ_PAGE_SIZE + i*LSEQ_MODE_SCRATCH_SIZE; + /* All the fields of page 1 can be intialized to 0. */ + for (j = 0; j < LSEQ_PAGE_SIZE; j += 4) + asd_write_reg_dword(asd_ha, LmSCRATCH(lseq)+moffs+j,0); + } + + /* LSEQ Mode dependent, mode 2, page 1 setup. */ + asd_write_reg_dword(asd_ha, LmSEQ_INVALID_DWORD_COUNT(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_DISPARITY_ERROR_COUNT(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_LOSS_OF_SYNC_COUNT(lseq), 0); + + /* LSEQ Mode dependent, mode 4/5, page 1. */ + for (i = 0; i < LSEQ_PAGE_SIZE; i+=4) + asd_write_reg_dword(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq)+i, 0); + asd_write_reg_byte(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq), 0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq), 0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+1,0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+2,0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq), 0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+1, 0xFF); + asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+2, 0xFF); + asd_write_reg_dword(asd_ha, LmSEQ_DATA_OFFSET(lseq), 0xFFFFFFFF); + + /* LSEQ Mode dependent, mode 0, page 2 setup. */ + asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMER_TERM_TS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_DEVICE_BITS(lseq), 0); + asd_write_reg_word(asd_ha, LmSEQ_SDB_DDB(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_SDB_NUM_TAGS(lseq), 0); + asd_write_reg_byte(asd_ha, LmSEQ_SDB_CURR_TAG(lseq), 0); + + /* LSEQ Mode Dependent 1, page 2 setup. */ + asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq)+4, 0); + asd_write_reg_dword(asd_ha, LmSEQ_OPEN_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_SRST_AS_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_LAST_LOADED_SG_EL(lseq), 0); + + /* LSEQ Mode Dependent 2, page 2 setup. */ + /* The LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS is IGNORED by the sequencer, + * i.e. always 0. */ + asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(lseq),0); + asd_write_reg_dword(asd_ha, LmSEQ_CLOSE_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_BREAK_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_DWS_RESET_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha,LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(lseq),0); + asd_write_reg_dword(asd_ha, LmSEQ_MCTL_TIMER_TERM_TS(lseq), 0); + + /* LSEQ Mode Dependent 4/5, page 2 setup. */ + asd_write_reg_dword(asd_ha, LmSEQ_COMINIT_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_RCV_ID_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMER_TERM_TS(lseq), 0); + asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TIMER_TERM_TS(lseq), 0); +} + +/** + * asd_init_lseq_scratch -- setup and init link sequencers + * @asd_ha: pointer to host adapter struct + */ +static void asd_init_lseq_scratch(struct asd_ha_struct *asd_ha) +{ + u8 lseq; + u8 lseq_mask; + + lseq_mask = asd_ha->hw_prof.enabled_phys; + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + asd_init_lseq_mip(asd_ha, lseq); + asd_init_lseq_mdp(asd_ha, lseq); + } +} + +/** + * asd_init_scb_sites -- initialize sequencer SCB sites (memory). + * @asd_ha: pointer to host adapter structure + * + * This should be done before initializing common CSEQ and LSEQ + * scratch since those areas depend on some computed values here, + * last_scb_site_no, etc. + */ +static void asd_init_scb_sites(struct asd_ha_struct *asd_ha) +{ + u16 site_no; + u16 max_scbs = 0; + + for (site_no = asd_ha->hw_prof.max_scbs-1; + site_no != (u16) -1; + site_no--) { + u16 i; + + /* Initialize all fields in the SCB site to 0. */ + for (i = 0; i < ASD_SCB_SIZE; i += 4) + asd_scbsite_write_dword(asd_ha, site_no, i, 0); + + /* Workaround needed by SEQ to fix a SATA issue is to exclude + * certain SCB sites from the free list. */ + if (!SCB_SITE_VALID(site_no)) + continue; + + if (last_scb_site_no == 0) + last_scb_site_no = site_no; + + /* For every SCB site, we need to initialize the + * following fields: Q_NEXT, SCB_OPCODE, SCB_FLAGS, + * and SG Element Flag. */ + + /* Q_NEXT field of the last SCB is invalidated. */ + asd_scbsite_write_word(asd_ha, site_no, 0, first_scb_site_no); + + /* Initialize SCB Site Opcode field to invalid. */ + asd_scbsite_write_byte(asd_ha, site_no, + offsetof(struct scb_header, opcode), + 0xFF); + + /* Initialize SCB Site Flags field to mean a response + * frame has been received. This means inadvertent + * frames received to be dropped. */ + asd_scbsite_write_byte(asd_ha, site_no, 0x49, 0x01); + + first_scb_site_no = site_no; + max_scbs++; + } + asd_ha->hw_prof.max_scbs = max_scbs; + ASD_DPRINTK("max_scbs:%d\n", asd_ha->hw_prof.max_scbs); + ASD_DPRINTK("first_scb_site_no:0x%x\n", first_scb_site_no); + ASD_DPRINTK("last_scb_site_no:0x%x\n", last_scb_site_no); +} + +/** + * asd_init_cseq_cio - initialize CSEQ CIO registers + * @asd_ha: pointer to host adapter structure + */ +static void asd_init_cseq_cio(struct asd_ha_struct *asd_ha) +{ + int i; + + asd_write_reg_byte(asd_ha, CSEQCOMINTEN, 0); + asd_write_reg_byte(asd_ha, CSEQDLCTL, ASD_DL_SIZE_BITS); + asd_write_reg_byte(asd_ha, CSEQDLOFFS, 0); + asd_write_reg_byte(asd_ha, CSEQDLOFFS+1, 0); + asd_ha->seq.scbpro = 0; + asd_write_reg_dword(asd_ha, SCBPRO, 0); + asd_write_reg_dword(asd_ha, CSEQCON, 0); + + /* Intialize CSEQ Mode 11 Interrupt Vectors. + * The addresses are 16 bit wide and in dword units. + * The values of their macros are in byte units. + * Thus we have to divide by 4. */ + asd_write_reg_word(asd_ha, CM11INTVEC0, cseq_vecs[0]); + asd_write_reg_word(asd_ha, CM11INTVEC1, cseq_vecs[1]); + asd_write_reg_word(asd_ha, CM11INTVEC2, cseq_vecs[2]); + + /* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */ + asd_write_reg_byte(asd_ha, CARP2INTEN, EN_ARP2HALTC); + + /* Initialize CSEQ Scratch Page to 0x04. */ + asd_write_reg_byte(asd_ha, CSCRATCHPAGE, 0x04); + + /* Initialize CSEQ Mode[0-8] Dependent registers. */ + /* Initialize Scratch Page to 0. */ + for (i = 0; i < 9; i++) + asd_write_reg_byte(asd_ha, CMnSCRATCHPAGE(i), 0); + + /* Reset the ARP2 Program Count. */ + asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop); + + for (i = 0; i < 8; i++) { + /* Intialize Mode n Link m Interrupt Enable. */ + asd_write_reg_dword(asd_ha, CMnINTEN(i), EN_CMnRSPMBXF); + /* Initialize Mode n Request Mailbox. */ + asd_write_reg_dword(asd_ha, CMnREQMBX(i), 0); + } +} + +/** + * asd_init_lseq_cio -- initialize LmSEQ CIO registers + * @asd_ha: pointer to host adapter structure + */ +static void asd_init_lseq_cio(struct asd_ha_struct *asd_ha, int lseq) +{ + u8 *sas_addr; + int i; + + /* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */ + asd_write_reg_dword(asd_ha, LmARP2INTEN(lseq), EN_ARP2HALTC); + + asd_write_reg_byte(asd_ha, LmSCRATCHPAGE(lseq), 0); + + /* Initialize Mode 0,1, and 2 SCRATCHPAGE to 0. */ + for (i = 0; i < 3; i++) + asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, i), 0); + + /* Initialize Mode 5 SCRATCHPAGE to 0. */ + asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, 5), 0); + + asd_write_reg_dword(asd_ha, LmRSPMBX(lseq), 0); + /* Initialize Mode 0,1,2 and 5 Interrupt Enable and + * Interrupt registers. */ + asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 0), LmM0INTEN_MASK); + asd_write_reg_dword(asd_ha, LmMnINT(lseq, 0), 0xFFFFFFFF); + /* Mode 1 */ + asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 1), LmM1INTEN_MASK); + asd_write_reg_dword(asd_ha, LmMnINT(lseq, 1), 0xFFFFFFFF); + /* Mode 2 */ + asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 2), LmM2INTEN_MASK); + asd_write_reg_dword(asd_ha, LmMnINT(lseq, 2), 0xFFFFFFFF); + /* Mode 5 */ + asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 5), LmM5INTEN_MASK); + asd_write_reg_dword(asd_ha, LmMnINT(lseq, 5), 0xFFFFFFFF); + + /* Enable HW Timer status. */ + asd_write_reg_byte(asd_ha, LmHWTSTATEN(lseq), LmHWTSTATEN_MASK); + + /* Enable Primitive Status 0 and 1. */ + asd_write_reg_dword(asd_ha, LmPRIMSTAT0EN(lseq), LmPRIMSTAT0EN_MASK); + asd_write_reg_dword(asd_ha, LmPRIMSTAT1EN(lseq), LmPRIMSTAT1EN_MASK); + + /* Enable Frame Error. */ + asd_write_reg_dword(asd_ha, LmFRMERREN(lseq), LmFRMERREN_MASK); + asd_write_reg_byte(asd_ha, LmMnHOLDLVL(lseq, 0), 0x50); + + /* Initialize Mode 0 Transfer Level to 512. */ + asd_write_reg_byte(asd_ha, LmMnXFRLVL(lseq, 0), LmMnXFRLVL_512); + /* Initialize Mode 1 Transfer Level to 256. */ + asd_write_reg_byte(asd_ha, LmMnXFRLVL(lseq, 1), LmMnXFRLVL_256); + + /* Initialize Program Count. */ + asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop); + + /* Enable Blind SG Move. */ + asd_write_reg_dword(asd_ha, LmMODECTL(lseq), LmBLIND48); + asd_write_reg_word(asd_ha, LmM3SATATIMER(lseq), + ASD_SATA_INTERLOCK_TIMEOUT); + + (void) asd_read_reg_dword(asd_ha, LmREQMBX(lseq)); + + /* Clear Primitive Status 0 and 1. */ + asd_write_reg_dword(asd_ha, LmPRMSTAT0(lseq), 0xFFFFFFFF); + asd_write_reg_dword(asd_ha, LmPRMSTAT1(lseq), 0xFFFFFFFF); + + /* Clear HW Timer status. */ + asd_write_reg_byte(asd_ha, LmHWTSTAT(lseq), 0xFF); + + /* Clear DMA Errors for Mode 0 and 1. */ + asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 0), 0xFF); + asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 1), 0xFF); + + /* Clear SG DMA Errors for Mode 0 and 1. */ + asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 0), 0xFF); + asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 1), 0xFF); + + /* Clear Mode 0 Buffer Parity Error. */ + asd_write_reg_byte(asd_ha, LmMnBUFSTAT(lseq, 0), LmMnBUFPERR); + + /* Clear Mode 0 Frame Error register. */ + asd_write_reg_dword(asd_ha, LmMnFRMERR(lseq, 0), 0xFFFFFFFF); + + /* Reset LSEQ external interrupt arbiter. */ + asd_write_reg_byte(asd_ha, LmARP2INTCTL(lseq), RSTINTCTL); + + /* Set the Phy SAS for the LmSEQ WWN. */ + sas_addr = asd_ha->phys[lseq].phy_desc->sas_addr; + for (i = 0; i < SAS_ADDR_SIZE; i++) + asd_write_reg_byte(asd_ha, LmWWN(lseq) + i, sas_addr[i]); + + /* Set the Transmit Size to 1024 bytes, 0 = 256 Dwords. */ + asd_write_reg_byte(asd_ha, LmMnXMTSIZE(lseq, 1), 0); + + /* Set the Bus Inactivity Time Limit Timer. */ + asd_write_reg_word(asd_ha, LmBITL_TIMER(lseq), 9); + + /* Enable SATA Port Multiplier. */ + asd_write_reg_byte(asd_ha, LmMnSATAFS(lseq, 1), 0x80); + + /* Initialize Interrupt Vector[0-10] address in Mode 3. + * See the comment on CSEQ_INT_* */ + asd_write_reg_word(asd_ha, LmM3INTVEC0(lseq), lseq_vecs[0]); + asd_write_reg_word(asd_ha, LmM3INTVEC1(lseq), lseq_vecs[1]); + asd_write_reg_word(asd_ha, LmM3INTVEC2(lseq), lseq_vecs[2]); + asd_write_reg_word(asd_ha, LmM3INTVEC3(lseq), lseq_vecs[3]); + asd_write_reg_word(asd_ha, LmM3INTVEC4(lseq), lseq_vecs[4]); + asd_write_reg_word(asd_ha, LmM3INTVEC5(lseq), lseq_vecs[5]); + asd_write_reg_word(asd_ha, LmM3INTVEC6(lseq), lseq_vecs[6]); + asd_write_reg_word(asd_ha, LmM3INTVEC7(lseq), lseq_vecs[7]); + asd_write_reg_word(asd_ha, LmM3INTVEC8(lseq), lseq_vecs[8]); + asd_write_reg_word(asd_ha, LmM3INTVEC9(lseq), lseq_vecs[9]); + asd_write_reg_word(asd_ha, LmM3INTVEC10(lseq), lseq_vecs[10]); + /* + * Program the Link LED control, applicable only for + * Chip Rev. B or later. + */ + asd_write_reg_dword(asd_ha, LmCONTROL(lseq), + (LEDTIMER | LEDMODE_TXRX | LEDTIMERS_100ms)); + + /* Set the Align Rate for SAS and STP mode. */ + asd_write_reg_byte(asd_ha, LmM1SASALIGN(lseq), SAS_ALIGN_DEFAULT); + asd_write_reg_byte(asd_ha, LmM1STPALIGN(lseq), STP_ALIGN_DEFAULT); +} + + +/** + * asd_post_init_cseq -- clear CSEQ Mode n Int. status and Response mailbox + * @asd_ha: pointer to host adapter struct + */ +static void asd_post_init_cseq(struct asd_ha_struct *asd_ha) +{ + int i; + + for (i = 0; i < 8; i++) + asd_write_reg_dword(asd_ha, CMnINT(i), 0xFFFFFFFF); + for (i = 0; i < 8; i++) + asd_read_reg_dword(asd_ha, CMnRSPMBX(i)); + /* Reset the external interrupt arbiter. */ + asd_write_reg_byte(asd_ha, CARP2INTCTL, RSTINTCTL); +} + +/** + * asd_init_ddb_0 -- initialize DDB 0 + * @asd_ha: pointer to host adapter structure + * + * Initialize DDB site 0 which is used internally by the sequencer. + */ +static void asd_init_ddb_0(struct asd_ha_struct *asd_ha) +{ + int i; + + /* Zero out the DDB explicitly */ + for (i = 0; i < sizeof(struct asd_ddb_seq_shared); i+=4) + asd_ddbsite_write_dword(asd_ha, 0, i, 0); + + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, q_free_ddb_head), 0); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, q_free_ddb_tail), + asd_ha->hw_prof.max_ddbs-1); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, q_free_ddb_cnt), 0); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, q_used_ddb_head), 0xFFFF); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, q_used_ddb_tail), 0xFFFF); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, shared_mem_lock), 0); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, smp_conn_tag), 0); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, est_nexus_buf_cnt), 0); + asd_ddbsite_write_word(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, est_nexus_buf_thresh), + asd_ha->hw_prof.num_phys * 2); + asd_ddbsite_write_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, settable_max_contexts),0); + asd_ddbsite_write_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, conn_not_active), 0xFF); + asd_ddbsite_write_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, phy_is_up), 0x00); + /* DDB 0 is reserved */ + set_bit(0, asd_ha->hw_prof.ddb_bitmap); +} + +/** + * asd_seq_setup_seqs -- setup and initialize central and link sequencers + * @asd_ha: pointer to host adapter structure + */ +static void asd_seq_setup_seqs(struct asd_ha_struct *asd_ha) +{ + int lseq; + u8 lseq_mask; + + /* Initialize SCB sites. Done first to compute some values which + * the rest of the init code depends on. */ + asd_init_scb_sites(asd_ha); + + /* Initialize CSEQ Scratch RAM registers. */ + asd_init_cseq_scratch(asd_ha); + + /* Initialize LmSEQ Scratch RAM registers. */ + asd_init_lseq_scratch(asd_ha); + + /* Initialize CSEQ CIO registers. */ + asd_init_cseq_cio(asd_ha); + + asd_init_ddb_0(asd_ha); + + /* Initialize LmSEQ CIO registers. */ + lseq_mask = asd_ha->hw_prof.enabled_phys; + for_each_sequencer(lseq_mask, lseq_mask, lseq) + asd_init_lseq_cio(asd_ha, lseq); + asd_post_init_cseq(asd_ha); +} + + +/** + * asd_seq_start_cseq -- start the central sequencer, CSEQ + * @asd_ha: pointer to host adapter structure + */ +static int asd_seq_start_cseq(struct asd_ha_struct *asd_ha) +{ + /* Reset the ARP2 instruction to location zero. */ + asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop); + + /* Unpause the CSEQ */ + return asd_unpause_cseq(asd_ha); +} + +/** + * asd_seq_start_lseq -- start a link sequencer + * @asd_ha: pointer to host adapter structure + * @lseq: the link sequencer of interest + */ +static int asd_seq_start_lseq(struct asd_ha_struct *asd_ha, int lseq) +{ + /* Reset the ARP2 instruction to location zero. */ + asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop); + + /* Unpause the LmSEQ */ + return asd_seq_unpause_lseq(asd_ha, lseq); +} + +static int asd_request_firmware(struct asd_ha_struct *asd_ha) +{ + int err, i; + struct sequencer_file_header header, *hdr_ptr; + u32 csum = 0; + u16 *ptr_cseq_vecs, *ptr_lseq_vecs; + + if (sequencer_fw) + /* already loaded */ + return 0; + + err = request_firmware(&sequencer_fw, + SAS_RAZOR_SEQUENCER_FW_FILE, + &asd_ha->pcidev->dev); + if (err) + return err; + + hdr_ptr = (struct sequencer_file_header *)sequencer_fw->data; + + header.csum = le32_to_cpu(hdr_ptr->csum); + header.major = le32_to_cpu(hdr_ptr->major); + header.minor = le32_to_cpu(hdr_ptr->minor); + sequencer_version = hdr_ptr->version; + header.cseq_table_offset = le32_to_cpu(hdr_ptr->cseq_table_offset); + header.cseq_table_size = le32_to_cpu(hdr_ptr->cseq_table_size); + header.lseq_table_offset = le32_to_cpu(hdr_ptr->lseq_table_offset); + header.lseq_table_size = le32_to_cpu(hdr_ptr->lseq_table_size); + header.cseq_code_offset = le32_to_cpu(hdr_ptr->cseq_code_offset); + header.cseq_code_size = le32_to_cpu(hdr_ptr->cseq_code_size); + header.lseq_code_offset = le32_to_cpu(hdr_ptr->lseq_code_offset); + header.lseq_code_size = le32_to_cpu(hdr_ptr->lseq_code_size); + header.mode2_task = le16_to_cpu(hdr_ptr->mode2_task); + header.cseq_idle_loop = le16_to_cpu(hdr_ptr->cseq_idle_loop); + header.lseq_idle_loop = le16_to_cpu(hdr_ptr->lseq_idle_loop); + + for (i = sizeof(header.csum); i < sequencer_fw->size; i++) + csum += sequencer_fw->data[i]; + + if (csum != header.csum) { + asd_printk("Firmware file checksum mismatch\n"); + return -EINVAL; + } + + if (header.cseq_table_size != CSEQ_NUM_VECS || + header.lseq_table_size != LSEQ_NUM_VECS) { + asd_printk("Firmware file table size mismatch\n"); + return -EINVAL; + } + + ptr_cseq_vecs = (u16 *)&sequencer_fw->data[header.cseq_table_offset]; + ptr_lseq_vecs = (u16 *)&sequencer_fw->data[header.lseq_table_offset]; + mode2_task = header.mode2_task; + cseq_idle_loop = header.cseq_idle_loop; + lseq_idle_loop = header.lseq_idle_loop; + + for (i = 0; i < CSEQ_NUM_VECS; i++) + cseq_vecs[i] = le16_to_cpu(ptr_cseq_vecs[i]); + + for (i = 0; i < LSEQ_NUM_VECS; i++) + lseq_vecs[i] = le16_to_cpu(ptr_lseq_vecs[i]); + + cseq_code = &sequencer_fw->data[header.cseq_code_offset]; + cseq_code_size = header.cseq_code_size; + lseq_code = &sequencer_fw->data[header.lseq_code_offset]; + lseq_code_size = header.lseq_code_size; + + return 0; +} + +int asd_init_seqs(struct asd_ha_struct *asd_ha) +{ + int err; + + err = asd_request_firmware(asd_ha); + + if (err) { + asd_printk("Failed to load sequencer firmware file %s, error %d\n", + SAS_RAZOR_SEQUENCER_FW_FILE, err); + return err; + } + + asd_printk("using sequencer %s\n", sequencer_version); + err = asd_seq_download_seqs(asd_ha); + if (err) { + asd_printk("couldn't download sequencers for %s\n", + pci_name(asd_ha->pcidev)); + return err; + } + + asd_seq_setup_seqs(asd_ha); + + return 0; +} + +int asd_start_seqs(struct asd_ha_struct *asd_ha) +{ + int err; + u8 lseq_mask; + int lseq; + + err = asd_seq_start_cseq(asd_ha); + if (err) { + asd_printk("couldn't start CSEQ for %s\n", + pci_name(asd_ha->pcidev)); + return err; + } + + lseq_mask = asd_ha->hw_prof.enabled_phys; + for_each_sequencer(lseq_mask, lseq_mask, lseq) { + err = asd_seq_start_lseq(asd_ha, lseq); + if (err) { + asd_printk("coudln't start LSEQ %d for %s\n", lseq, + pci_name(asd_ha->pcidev)); + return err; + } + } + + return 0; +} + +/** + * asd_update_port_links -- update port_map_by_links and phy_is_up + * @sas_phy: pointer to the phy which has been added to a port + * + * 1) When a link reset has completed and we got BYTES DMAED with a + * valid frame we call this function for that phy, to indicate that + * the phy is up, i.e. we update the phy_is_up in DDB 0. The + * sequencer checks phy_is_up when pending SCBs are to be sent, and + * when an open address frame has been received. + * + * 2) When we know of ports, we call this function to update the map + * of phys participaing in that port, i.e. we update the + * port_map_by_links in DDB 0. When a HARD_RESET primitive has been + * received, the sequencer disables all phys in that port. + * port_map_by_links is also used as the conn_mask byte in the + * initiator/target port DDB. + */ +void asd_update_port_links(struct asd_sas_phy *sas_phy) +{ + struct asd_ha_struct *asd_ha = sas_phy->ha->lldd_ha; + const u8 phy_mask = (u8) sas_phy->port->phy_mask; + u8 phy_is_up; + u8 mask; + int i, err; + + for_each_phy(phy_mask, mask, i) + asd_ddbsite_write_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, + port_map_by_links)+i,phy_mask); + + for (i = 0; i < 12; i++) { + phy_is_up = asd_ddbsite_read_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, phy_is_up)); + err = asd_ddbsite_update_byte(asd_ha, 0, + offsetof(struct asd_ddb_seq_shared, phy_is_up), + phy_is_up, + phy_is_up | phy_mask); + if (!err) + break; + else if (err == -EFAULT) { + asd_printk("phy_is_up: parity error in DDB 0\n"); + break; + } + } + + if (err) + asd_printk("couldn't update DDB 0:error:%d\n", err); +} diff --git a/drivers/scsi/aic94xx/aic94xx_seq.h b/drivers/scsi/aic94xx/aic94xx_seq.h new file mode 100644 index 000000000000..42281c36153b --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_seq.h @@ -0,0 +1,70 @@ +/* + * Aic94xx SAS/SATA driver sequencer interface header file. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _AIC94XX_SEQ_H_ +#define _AIC94XX_SEQ_H_ + +#define CSEQ_NUM_VECS 3 +#define LSEQ_NUM_VECS 11 + +#define SAS_RAZOR_SEQUENCER_FW_FILE "aic94xx-seq.fw" + +/* Note: All quantites in the sequencer file are little endian */ +struct sequencer_file_header { + /* Checksum of the entire contents of the sequencer excluding + * these four bytes */ + u32 csum; + /* numeric major version */ + u32 major; + /* numeric minor version */ + u32 minor; + /* version string printed by driver */ + char version[16]; + u32 cseq_table_offset; + u32 cseq_table_size; + u32 lseq_table_offset; + u32 lseq_table_size; + u32 cseq_code_offset; + u32 cseq_code_size; + u32 lseq_code_offset; + u32 lseq_code_size; + u16 mode2_task; + u16 cseq_idle_loop; + u16 lseq_idle_loop; +} __attribute__((packed)); + +#ifdef __KERNEL__ +int asd_pause_cseq(struct asd_ha_struct *asd_ha); +int asd_unpause_cseq(struct asd_ha_struct *asd_ha); +int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask); +int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask); +int asd_init_seqs(struct asd_ha_struct *asd_ha); +int asd_start_seqs(struct asd_ha_struct *asd_ha); + +void asd_update_port_links(struct asd_sas_phy *phy); +#endif + +#endif diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c new file mode 100644 index 000000000000..285e70dae933 --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_task.c @@ -0,0 +1,642 @@ +/* + * Aic94xx SAS/SATA Tasks + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include "aic94xx.h" +#include "aic94xx_sas.h" +#include "aic94xx_hwi.h" + +static void asd_unbuild_ata_ascb(struct asd_ascb *a); +static void asd_unbuild_smp_ascb(struct asd_ascb *a); +static void asd_unbuild_ssp_ascb(struct asd_ascb *a); + +static inline void asd_can_dequeue(struct asd_ha_struct *asd_ha, int num) +{ + unsigned long flags; + + spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags); + asd_ha->seq.can_queue += num; + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); +} + +/* PCI_DMA_... to our direction translation. + */ +static const u8 data_dir_flags[] = { + [PCI_DMA_BIDIRECTIONAL] = DATA_DIR_BYRECIPIENT, /* UNSPECIFIED */ + [PCI_DMA_TODEVICE] = DATA_DIR_OUT, /* OUTBOUND */ + [PCI_DMA_FROMDEVICE] = DATA_DIR_IN, /* INBOUND */ + [PCI_DMA_NONE] = DATA_DIR_NONE, /* NO TRANSFER */ +}; + +static inline int asd_map_scatterlist(struct sas_task *task, + struct sg_el *sg_arr, + unsigned long gfp_flags) +{ + struct asd_ascb *ascb = task->lldd_task; + struct asd_ha_struct *asd_ha = ascb->ha; + struct scatterlist *sc; + int num_sg, res; + + if (task->data_dir == PCI_DMA_NONE) + return 0; + + if (task->num_scatter == 0) { + void *p = task->scatter; + dma_addr_t dma = pci_map_single(asd_ha->pcidev, p, + task->total_xfer_len, + task->data_dir); + sg_arr[0].bus_addr = cpu_to_le64((u64)dma); + sg_arr[0].size = cpu_to_le32(task->total_xfer_len); + sg_arr[0].flags |= ASD_SG_EL_LIST_EOL; + return 0; + } + + num_sg = pci_map_sg(asd_ha->pcidev, task->scatter, task->num_scatter, + task->data_dir); + if (num_sg == 0) + return -ENOMEM; + + if (num_sg > 3) { + int i; + + ascb->sg_arr = asd_alloc_coherent(asd_ha, + num_sg*sizeof(struct sg_el), + gfp_flags); + if (!ascb->sg_arr) { + res = -ENOMEM; + goto err_unmap; + } + for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) { + struct sg_el *sg = + &((struct sg_el *)ascb->sg_arr->vaddr)[i]; + sg->bus_addr = cpu_to_le64((u64)sg_dma_address(sc)); + sg->size = cpu_to_le32((u32)sg_dma_len(sc)); + if (i == num_sg-1) + sg->flags |= ASD_SG_EL_LIST_EOL; + } + + for (sc = task->scatter, i = 0; i < 2; i++, sc++) { + sg_arr[i].bus_addr = + cpu_to_le64((u64)sg_dma_address(sc)); + sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc)); + } + sg_arr[1].next_sg_offs = 2 * sizeof(*sg_arr); + sg_arr[1].flags |= ASD_SG_EL_LIST_EOS; + + memset(&sg_arr[2], 0, sizeof(*sg_arr)); + sg_arr[2].bus_addr=cpu_to_le64((u64)ascb->sg_arr->dma_handle); + } else { + int i; + for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) { + sg_arr[i].bus_addr = + cpu_to_le64((u64)sg_dma_address(sc)); + sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc)); + } + sg_arr[i-1].flags |= ASD_SG_EL_LIST_EOL; + } + + return 0; +err_unmap: + pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter, + task->data_dir); + return res; +} + +static inline void asd_unmap_scatterlist(struct asd_ascb *ascb) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_task *task = ascb->uldd_task; + + if (task->data_dir == PCI_DMA_NONE) + return; + + if (task->num_scatter == 0) { + dma_addr_t dma = (dma_addr_t) + le64_to_cpu(ascb->scb->ssp_task.sg_element[0].bus_addr); + pci_unmap_single(ascb->ha->pcidev, dma, task->total_xfer_len, + task->data_dir); + return; + } + + asd_free_coherent(asd_ha, ascb->sg_arr); + pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter, + task->data_dir); +} + +/* ---------- Task complete tasklet ---------- */ + +static void asd_get_response_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct sas_task *task = ascb->uldd_task; + struct task_status_struct *ts = &task->task_status; + unsigned long flags; + struct tc_resp_sb_struct { + __le16 index_escb; + u8 len_lsb; + u8 flags; + } __attribute__ ((packed)) *resp_sb = (void *) dl->status_block; + +/* int size = ((resp_sb->flags & 7) << 8) | resp_sb->len_lsb; */ + int edb_id = ((resp_sb->flags & 0x70) >> 4)-1; + struct asd_ascb *escb; + struct asd_dma_tok *edb; + void *r; + + spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags); + escb = asd_tc_index_find(&asd_ha->seq, + (int)le16_to_cpu(resp_sb->index_escb)); + spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags); + + if (!escb) { + ASD_DPRINTK("Uh-oh! No escb for this dl?!\n"); + return; + } + + ts->buf_valid_size = 0; + edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index]; + r = edb->vaddr; + if (task->task_proto == SAS_PROTO_SSP) { + struct ssp_response_iu *iu = + r + 16 + sizeof(struct ssp_frame_hdr); + + ts->residual = le32_to_cpu(*(__le32 *)r); + ts->resp = SAS_TASK_COMPLETE; + if (iu->datapres == 0) + ts->stat = iu->status; + else if (iu->datapres == 1) + ts->stat = iu->resp_data[3]; + else if (iu->datapres == 2) { + ts->stat = SAM_CHECK_COND; + ts->buf_valid_size = min((u32) SAS_STATUS_BUF_SIZE, + be32_to_cpu(iu->sense_data_len)); + memcpy(ts->buf, iu->sense_data, ts->buf_valid_size); + if (iu->status != SAM_CHECK_COND) { + ASD_DPRINTK("device %llx sent sense data, but " + "stat(0x%x) is not CHECK_CONDITION" + "\n", + SAS_ADDR(task->dev->sas_addr), + ts->stat); + } + } + } else { + struct ata_task_resp *resp = (void *) &ts->buf[0]; + + ts->residual = le32_to_cpu(*(__le32 *)r); + + if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) { + resp->frame_len = le16_to_cpu(*(__le16 *)(r+6)); + memcpy(&resp->ending_fis[0], r+16, 24); + ts->buf_valid_size = sizeof(*resp); + } + } + + asd_invalidate_edb(escb, edb_id); +} + +static void asd_task_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct sas_task *task = ascb->uldd_task; + struct task_status_struct *ts = &task->task_status; + unsigned long flags; + u8 opcode = dl->opcode; + + asd_can_dequeue(ascb->ha, 1); + +Again: + switch (opcode) { + case TC_NO_ERROR: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAM_GOOD; + break; + case TC_UNDERRUN: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_UNDERRUN; + ts->residual = le32_to_cpu(*(__le32 *)dl->status_block); + break; + case TC_OVERRUN: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_DATA_OVERRUN; + ts->residual = 0; + break; + case TC_SSP_RESP: + case TC_ATA_RESP: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_PROTO_RESPONSE; + asd_get_response_tasklet(ascb, dl); + break; + case TF_OPEN_REJECT: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_OPEN_REJECT; + if (dl->status_block[1] & 2) + ts->open_rej_reason = 1 + dl->status_block[2]; + else if (dl->status_block[1] & 1) + ts->open_rej_reason = (dl->status_block[2] >> 4)+10; + else + ts->open_rej_reason = SAS_OREJ_UNKNOWN; + break; + case TF_OPEN_TO: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_OPEN_TO; + break; + case TF_PHY_DOWN: + case TU_PHY_DOWN: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_PHY_DOWN; + break; + case TI_PHY_DOWN: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_PHY_DOWN; + break; + case TI_BREAK: + case TI_PROTO_ERR: + case TI_NAK: + case TI_ACK_NAK_TO: + case TF_SMP_XMIT_RCV_ERR: + case TC_ATA_R_ERR_RECV: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_INTERRUPTED; + break; + case TF_BREAK: + case TU_BREAK: + case TU_ACK_NAK_TO: + case TF_SMPRSP_TO: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + case TF_NAK_RECV: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_NAK_R_ERR; + break; + case TA_I_T_NEXUS_LOSS: + opcode = dl->status_block[0]; + goto Again; + break; + case TF_INV_CONN_HANDLE: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_DEVICE_UNKNOWN; + break; + case TF_REQUESTED_N_PENDING: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_PENDING; + break; + case TC_TASK_CLEARED: + case TA_ON_REQ: + ts->resp = SAS_TASK_COMPLETE; + ts->stat = SAS_ABORTED_TASK; + break; + + case TF_NO_SMP_CONN: + case TF_TMF_NO_CTX: + case TF_TMF_NO_TAG: + case TF_TMF_TAG_FREE: + case TF_TMF_TASK_DONE: + case TF_TMF_NO_CONN_HANDLE: + case TF_IRTT_TO: + case TF_IU_SHORT: + case TF_DATA_OFFS_ERR: + ts->resp = SAS_TASK_UNDELIVERED; + ts->stat = SAS_DEV_NO_RESPONSE; + break; + + case TC_LINK_ADM_RESP: + case TC_CONTROL_PHY: + case TC_RESUME: + case TC_PARTIAL_SG_LIST: + default: + ASD_DPRINTK("%s: dl opcode: 0x%x?\n", __FUNCTION__, opcode); + break; + } + + switch (task->task_proto) { + case SATA_PROTO: + case SAS_PROTO_STP: + asd_unbuild_ata_ascb(ascb); + break; + case SAS_PROTO_SMP: + asd_unbuild_smp_ascb(ascb); + break; + case SAS_PROTO_SSP: + asd_unbuild_ssp_ascb(ascb); + default: + break; + } + + spin_lock_irqsave(&task->task_state_lock, flags); + task->task_state_flags &= ~SAS_TASK_STATE_PENDING; + task->task_state_flags |= SAS_TASK_STATE_DONE; + if (unlikely((task->task_state_flags & SAS_TASK_STATE_ABORTED))) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + ASD_DPRINTK("task 0x%p done with opcode 0x%x resp 0x%x " + "stat 0x%x but aborted by upper layer!\n", + task, opcode, ts->resp, ts->stat); + complete(&ascb->completion); + } else { + spin_unlock_irqrestore(&task->task_state_lock, flags); + task->lldd_task = NULL; + asd_ascb_free(ascb); + mb(); + task->task_done(task); + } +} + +/* ---------- ATA ---------- */ + +static int asd_build_ata_ascb(struct asd_ascb *ascb, struct sas_task *task, + unsigned long gfp_flags) +{ + struct domain_device *dev = task->dev; + struct scb *scb; + u8 flags; + int res = 0; + + scb = ascb->scb; + + if (unlikely(task->ata_task.device_control_reg_update)) + scb->header.opcode = CONTROL_ATA_DEV; + else if (dev->sata_dev.command_set == ATA_COMMAND_SET) + scb->header.opcode = INITIATE_ATA_TASK; + else + scb->header.opcode = INITIATE_ATAPI_TASK; + + scb->ata_task.proto_conn_rate = (1 << 5); /* STP */ + if (dev->port->oob_mode == SAS_OOB_MODE) + scb->ata_task.proto_conn_rate |= dev->linkrate; + + scb->ata_task.total_xfer_len = cpu_to_le32(task->total_xfer_len); + scb->ata_task.fis = task->ata_task.fis; + scb->ata_task.fis.fis_type = 0x27; + if (likely(!task->ata_task.device_control_reg_update)) + scb->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */ + scb->ata_task.fis.flags &= 0xF0; /* PM_PORT field shall be 0 */ + if (dev->sata_dev.command_set == ATAPI_COMMAND_SET) + memcpy(scb->ata_task.atapi_packet, task->ata_task.atapi_packet, + 16); + scb->ata_task.sister_scb = cpu_to_le16(0xFFFF); + scb->ata_task.conn_handle = cpu_to_le16( + (u16)(unsigned long)dev->lldd_dev); + + if (likely(!task->ata_task.device_control_reg_update)) { + flags = 0; + if (task->ata_task.dma_xfer) + flags |= DATA_XFER_MODE_DMA; + if (task->ata_task.use_ncq && + dev->sata_dev.command_set != ATAPI_COMMAND_SET) + flags |= ATA_Q_TYPE_NCQ; + flags |= data_dir_flags[task->data_dir]; + scb->ata_task.ata_flags = flags; + + scb->ata_task.retry_count = task->ata_task.retry_count; + + flags = 0; + if (task->ata_task.set_affil_pol) + flags |= SET_AFFIL_POLICY; + if (task->ata_task.stp_affil_pol) + flags |= STP_AFFIL_POLICY; + scb->ata_task.flags = flags; + } + ascb->tasklet_complete = asd_task_tasklet_complete; + + if (likely(!task->ata_task.device_control_reg_update)) + res = asd_map_scatterlist(task, scb->ata_task.sg_element, + gfp_flags); + + return res; +} + +static void asd_unbuild_ata_ascb(struct asd_ascb *a) +{ + asd_unmap_scatterlist(a); +} + +/* ---------- SMP ---------- */ + +static int asd_build_smp_ascb(struct asd_ascb *ascb, struct sas_task *task, + unsigned long gfp_flags) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + struct domain_device *dev = task->dev; + struct scb *scb; + + pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_req, 1, + PCI_DMA_FROMDEVICE); + pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_resp, 1, + PCI_DMA_FROMDEVICE); + + scb = ascb->scb; + + scb->header.opcode = INITIATE_SMP_TASK; + + scb->smp_task.proto_conn_rate = dev->linkrate; + + scb->smp_task.smp_req.bus_addr = + cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req)); + scb->smp_task.smp_req.size = + cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_req)-4); + + scb->smp_task.smp_resp.bus_addr = + cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_resp)); + scb->smp_task.smp_resp.size = + cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_resp)-4); + + scb->smp_task.sister_scb = cpu_to_le16(0xFFFF); + scb->smp_task.conn_handle = cpu_to_le16((u16) + (unsigned long)dev->lldd_dev); + + ascb->tasklet_complete = asd_task_tasklet_complete; + + return 0; +} + +static void asd_unbuild_smp_ascb(struct asd_ascb *a) +{ + struct sas_task *task = a->uldd_task; + + BUG_ON(!task); + pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_req, 1, + PCI_DMA_FROMDEVICE); + pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_resp, 1, + PCI_DMA_FROMDEVICE); +} + +/* ---------- SSP ---------- */ + +static int asd_build_ssp_ascb(struct asd_ascb *ascb, struct sas_task *task, + unsigned long gfp_flags) +{ + struct domain_device *dev = task->dev; + struct scb *scb; + int res = 0; + + scb = ascb->scb; + + scb->header.opcode = INITIATE_SSP_TASK; + + scb->ssp_task.proto_conn_rate = (1 << 4); /* SSP */ + scb->ssp_task.proto_conn_rate |= dev->linkrate; + scb->ssp_task.total_xfer_len = cpu_to_le32(task->total_xfer_len); + scb->ssp_task.ssp_frame.frame_type = SSP_DATA; + memcpy(scb->ssp_task.ssp_frame.hashed_dest_addr, dev->hashed_sas_addr, + HASHED_SAS_ADDR_SIZE); + memcpy(scb->ssp_task.ssp_frame.hashed_src_addr, + dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE); + scb->ssp_task.ssp_frame.tptt = cpu_to_be16(0xFFFF); + + memcpy(scb->ssp_task.ssp_cmd.lun, task->ssp_task.LUN, 8); + if (task->ssp_task.enable_first_burst) + scb->ssp_task.ssp_cmd.efb_prio_attr |= EFB_MASK; + scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_prio << 3); + scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_attr & 7); + memcpy(scb->ssp_task.ssp_cmd.cdb, task->ssp_task.cdb, 16); + + scb->ssp_task.sister_scb = cpu_to_le16(0xFFFF); + scb->ssp_task.conn_handle = cpu_to_le16( + (u16)(unsigned long)dev->lldd_dev); + scb->ssp_task.data_dir = data_dir_flags[task->data_dir]; + scb->ssp_task.retry_count = scb->ssp_task.retry_count; + + ascb->tasklet_complete = asd_task_tasklet_complete; + + res = asd_map_scatterlist(task, scb->ssp_task.sg_element, gfp_flags); + + return res; +} + +static void asd_unbuild_ssp_ascb(struct asd_ascb *a) +{ + asd_unmap_scatterlist(a); +} + +/* ---------- Execute Task ---------- */ + +static inline int asd_can_queue(struct asd_ha_struct *asd_ha, int num) +{ + int res = 0; + unsigned long flags; + + spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags); + if ((asd_ha->seq.can_queue - num) < 0) + res = -SAS_QUEUE_FULL; + else + asd_ha->seq.can_queue -= num; + spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags); + + return res; +} + +int asd_execute_task(struct sas_task *task, const int num, + unsigned long gfp_flags) +{ + int res = 0; + LIST_HEAD(alist); + struct sas_task *t = task; + struct asd_ascb *ascb = NULL, *a; + struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha; + + res = asd_can_queue(asd_ha, num); + if (res) + return res; + + res = num; + ascb = asd_ascb_alloc_list(asd_ha, &res, gfp_flags); + if (res) { + res = -ENOMEM; + goto out_err; + } + + __list_add(&alist, ascb->list.prev, &ascb->list); + list_for_each_entry(a, &alist, list) { + a->uldd_task = t; + t->lldd_task = a; + t = list_entry(t->list.next, struct sas_task, list); + } + list_for_each_entry(a, &alist, list) { + t = a->uldd_task; + a->uldd_timer = 1; + if (t->task_proto & SAS_PROTO_STP) + t->task_proto = SAS_PROTO_STP; + switch (t->task_proto) { + case SATA_PROTO: + case SAS_PROTO_STP: + res = asd_build_ata_ascb(a, t, gfp_flags); + break; + case SAS_PROTO_SMP: + res = asd_build_smp_ascb(a, t, gfp_flags); + break; + case SAS_PROTO_SSP: + res = asd_build_ssp_ascb(a, t, gfp_flags); + break; + default: + asd_printk("unknown sas_task proto: 0x%x\n", + t->task_proto); + res = -ENOMEM; + break; + } + if (res) + goto out_err_unmap; + } + list_del_init(&alist); + + res = asd_post_ascb_list(asd_ha, ascb, num); + if (unlikely(res)) { + a = NULL; + __list_add(&alist, ascb->list.prev, &ascb->list); + goto out_err_unmap; + } + + return 0; +out_err_unmap: + { + struct asd_ascb *b = a; + list_for_each_entry(a, &alist, list) { + if (a == b) + break; + t = a->uldd_task; + switch (t->task_proto) { + case SATA_PROTO: + case SAS_PROTO_STP: + asd_unbuild_ata_ascb(a); + break; + case SAS_PROTO_SMP: + asd_unbuild_smp_ascb(a); + break; + case SAS_PROTO_SSP: + asd_unbuild_ssp_ascb(a); + default: + break; + } + t->lldd_task = NULL; + } + } + list_del_init(&alist); +out_err: + if (ascb) + asd_ascb_free_list(ascb); + asd_can_dequeue(asd_ha, num); + return res; +} diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c b/drivers/scsi/aic94xx/aic94xx_tmf.c new file mode 100644 index 000000000000..61234384503b --- /dev/null +++ b/drivers/scsi/aic94xx/aic94xx_tmf.c @@ -0,0 +1,636 @@ +/* + * Aic94xx Task Management Functions + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This file is part of the aic94xx driver. + * + * The aic94xx driver is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of the + * License. + * + * The aic94xx driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the aic94xx driver; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include "aic94xx.h" +#include "aic94xx_sas.h" +#include "aic94xx_hwi.h" + +/* ---------- Internal enqueue ---------- */ + +static int asd_enqueue_internal(struct asd_ascb *ascb, + void (*tasklet_complete)(struct asd_ascb *, + struct done_list_struct *), + void (*timed_out)(unsigned long)) +{ + int res; + + ascb->tasklet_complete = tasklet_complete; + ascb->uldd_timer = 1; + + ascb->timer.data = (unsigned long) ascb; + ascb->timer.function = timed_out; + ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT; + + add_timer(&ascb->timer); + + res = asd_post_ascb_list(ascb->ha, ascb, 1); + if (unlikely(res)) + del_timer(&ascb->timer); + return res; +} + +static inline void asd_timedout_common(unsigned long data) +{ + struct asd_ascb *ascb = (void *) data; + struct asd_seq_data *seq = &ascb->ha->seq; + unsigned long flags; + + spin_lock_irqsave(&seq->pend_q_lock, flags); + seq->pending--; + list_del_init(&ascb->list); + spin_unlock_irqrestore(&seq->pend_q_lock, flags); +} + +/* ---------- CLEAR NEXUS ---------- */ + +static void asd_clear_nexus_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + ASD_DPRINTK("%s: here\n", __FUNCTION__); + if (!del_timer(&ascb->timer)) { + ASD_DPRINTK("%s: couldn't delete timer\n", __FUNCTION__); + return; + } + ASD_DPRINTK("%s: opcode: 0x%x\n", __FUNCTION__, dl->opcode); + ascb->uldd_task = (void *) (unsigned long) dl->opcode; + complete(&ascb->completion); +} + +static void asd_clear_nexus_timedout(unsigned long data) +{ + struct asd_ascb *ascb = (void *) data; + + ASD_DPRINTK("%s: here\n", __FUNCTION__); + asd_timedout_common(data); + ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED; + complete(&ascb->completion); +} + +#define CLEAR_NEXUS_PRE \ + ASD_DPRINTK("%s: PRE\n", __FUNCTION__); \ + res = 1; \ + ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); \ + if (!ascb) \ + return -ENOMEM; \ + \ + scb = ascb->scb; \ + scb->header.opcode = CLEAR_NEXUS + +#define CLEAR_NEXUS_POST \ + ASD_DPRINTK("%s: POST\n", __FUNCTION__); \ + res = asd_enqueue_internal(ascb, asd_clear_nexus_tasklet_complete, \ + asd_clear_nexus_timedout); \ + if (res) \ + goto out_err; \ + ASD_DPRINTK("%s: clear nexus posted, waiting...\n", __FUNCTION__); \ + wait_for_completion(&ascb->completion); \ + res = (int) (unsigned long) ascb->uldd_task; \ + if (res == TC_NO_ERROR) \ + res = TMF_RESP_FUNC_COMPLETE; \ +out_err: \ + asd_ascb_free(ascb); \ + return res + +int asd_clear_nexus_ha(struct sas_ha_struct *sas_ha) +{ + struct asd_ha_struct *asd_ha = sas_ha->lldd_ha; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_ADAPTER; + CLEAR_NEXUS_POST; +} + +int asd_clear_nexus_port(struct asd_sas_port *port) +{ + struct asd_ha_struct *asd_ha = port->ha->lldd_ha; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_PORT; + scb->clear_nexus.conn_mask = port->phy_mask; + CLEAR_NEXUS_POST; +} + +#if 0 +static int asd_clear_nexus_I_T(struct domain_device *dev) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_I_T; + scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ; + if (dev->tproto) + scb->clear_nexus.flags |= SUSPEND_TX; + scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long) + dev->lldd_dev); + CLEAR_NEXUS_POST; +} +#endif + +static int asd_clear_nexus_I_T_L(struct domain_device *dev, u8 *lun) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_I_T_L; + scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ; + if (dev->tproto) + scb->clear_nexus.flags |= SUSPEND_TX; + memcpy(scb->clear_nexus.ssp_task.lun, lun, 8); + scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long) + dev->lldd_dev); + CLEAR_NEXUS_POST; +} + +static int asd_clear_nexus_tag(struct sas_task *task) +{ + struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha; + struct asd_ascb *tascb = task->lldd_task; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_TAG; + memcpy(scb->clear_nexus.ssp_task.lun, task->ssp_task.LUN, 8); + scb->clear_nexus.ssp_task.tag = tascb->tag; + if (task->dev->tproto) + scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long) + task->dev->lldd_dev); + CLEAR_NEXUS_POST; +} + +static int asd_clear_nexus_index(struct sas_task *task) +{ + struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha; + struct asd_ascb *tascb = task->lldd_task; + struct asd_ascb *ascb; + struct scb *scb; + int res; + + CLEAR_NEXUS_PRE; + scb->clear_nexus.nexus = NEXUS_TRANS_CX; + if (task->dev->tproto) + scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long) + task->dev->lldd_dev); + scb->clear_nexus.index = cpu_to_le16(tascb->tc_index); + CLEAR_NEXUS_POST; +} + +/* ---------- TMFs ---------- */ + +static void asd_tmf_timedout(unsigned long data) +{ + struct asd_ascb *ascb = (void *) data; + + ASD_DPRINTK("tmf timed out\n"); + asd_timedout_common(data); + ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED; + complete(&ascb->completion); +} + +static int asd_get_tmf_resp_tasklet(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + struct asd_ha_struct *asd_ha = ascb->ha; + unsigned long flags; + struct tc_resp_sb_struct { + __le16 index_escb; + u8 len_lsb; + u8 flags; + } __attribute__ ((packed)) *resp_sb = (void *) dl->status_block; + + int edb_id = ((resp_sb->flags & 0x70) >> 4)-1; + struct asd_ascb *escb; + struct asd_dma_tok *edb; + struct ssp_frame_hdr *fh; + struct ssp_response_iu *ru; + int res = TMF_RESP_FUNC_FAILED; + + ASD_DPRINTK("tmf resp tasklet\n"); + + spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags); + escb = asd_tc_index_find(&asd_ha->seq, + (int)le16_to_cpu(resp_sb->index_escb)); + spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags); + + if (!escb) { + ASD_DPRINTK("Uh-oh! No escb for this dl?!\n"); + return res; + } + + edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index]; + ascb->tag = *(__be16 *)(edb->vaddr+4); + fh = edb->vaddr + 16; + ru = edb->vaddr + 16 + sizeof(*fh); + res = ru->status; + if (ru->datapres == 1) /* Response data present */ + res = ru->resp_data[3]; +#if 0 + ascb->tag = fh->tag; +#endif + ascb->tag_valid = 1; + + asd_invalidate_edb(escb, edb_id); + return res; +} + +static void asd_tmf_tasklet_complete(struct asd_ascb *ascb, + struct done_list_struct *dl) +{ + if (!del_timer(&ascb->timer)) + return; + + ASD_DPRINTK("tmf tasklet complete\n"); + + if (dl->opcode == TC_SSP_RESP) + ascb->uldd_task = (void *) (unsigned long) + asd_get_tmf_resp_tasklet(ascb, dl); + else + ascb->uldd_task = (void *) 0xFF00 + (unsigned long) dl->opcode; + + complete(&ascb->completion); +} + +static inline int asd_clear_nexus(struct sas_task *task) +{ + int res = TMF_RESP_FUNC_FAILED; + struct asd_ascb *tascb = task->lldd_task; + unsigned long flags; + + ASD_DPRINTK("task not done, clearing nexus\n"); + if (tascb->tag_valid) + res = asd_clear_nexus_tag(task); + else + res = asd_clear_nexus_index(task); + wait_for_completion_timeout(&tascb->completion, + AIC94XX_SCB_TIMEOUT); + ASD_DPRINTK("came back from clear nexus\n"); + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) + res = TMF_RESP_FUNC_COMPLETE; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + return res; +} + +/** + * asd_abort_task -- ABORT TASK TMF + * @task: the task to be aborted + * + * Before calling ABORT TASK the task state flags should be ORed with + * SAS_TASK_STATE_ABORTED (unless SAS_TASK_STATE_DONE is set) under + * the task_state_lock IRQ spinlock, then ABORT TASK *must* be called. + * + * Implements the ABORT TASK TMF, I_T_L_Q nexus. + * Returns: SAS TMF responses (see sas_task.h), + * -ENOMEM, + * -SAS_QUEUE_FULL. + * + * When ABORT TASK returns, the caller of ABORT TASK checks first the + * task->task_state_flags, and then the return value of ABORT TASK. + * + * If the task has task state bit SAS_TASK_STATE_DONE set, then the + * task was completed successfully prior to it being aborted. The + * caller of ABORT TASK has responsibility to call task->task_done() + * xor free the task, depending on their framework. The return code + * is TMF_RESP_FUNC_FAILED in this case. + * + * Else the SAS_TASK_STATE_DONE bit is not set, + * If the return code is TMF_RESP_FUNC_COMPLETE, then + * the task was aborted successfully. The caller of + * ABORT TASK has responsibility to call task->task_done() + * to finish the task, xor free the task depending on their + * framework. + * else + * the ABORT TASK returned some kind of error. The task + * was _not_ cancelled. Nothing can be assumed. + * The caller of ABORT TASK may wish to retry. + */ +int asd_abort_task(struct sas_task *task) +{ + struct asd_ascb *tascb = task->lldd_task; + struct asd_ha_struct *asd_ha = tascb->ha; + int res = 1; + unsigned long flags; + struct asd_ascb *ascb = NULL; + struct scb *scb; + + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + res = TMF_RESP_FUNC_COMPLETE; + ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task); + goto out_done; + } + spin_unlock_irqrestore(&task->task_state_lock, flags); + + ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); + if (!ascb) + return -ENOMEM; + scb = ascb->scb; + + scb->header.opcode = ABORT_TASK; + + switch (task->task_proto) { + case SATA_PROTO: + case SAS_PROTO_STP: + scb->abort_task.proto_conn_rate = (1 << 5); /* STP */ + break; + case SAS_PROTO_SSP: + scb->abort_task.proto_conn_rate = (1 << 4); /* SSP */ + scb->abort_task.proto_conn_rate |= task->dev->linkrate; + break; + case SAS_PROTO_SMP: + break; + default: + break; + } + + if (task->task_proto == SAS_PROTO_SSP) { + scb->abort_task.ssp_frame.frame_type = SSP_TASK; + memcpy(scb->abort_task.ssp_frame.hashed_dest_addr, + task->dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE); + memcpy(scb->abort_task.ssp_frame.hashed_src_addr, + task->dev->port->ha->hashed_sas_addr, + HASHED_SAS_ADDR_SIZE); + scb->abort_task.ssp_frame.tptt = cpu_to_be16(0xFFFF); + + memcpy(scb->abort_task.ssp_task.lun, task->ssp_task.LUN, 8); + scb->abort_task.ssp_task.tmf = TMF_ABORT_TASK; + scb->abort_task.ssp_task.tag = cpu_to_be16(0xFFFF); + } + + scb->abort_task.sister_scb = cpu_to_le16(0xFFFF); + scb->abort_task.conn_handle = cpu_to_le16( + (u16)(unsigned long)task->dev->lldd_dev); + scb->abort_task.retry_count = 1; + scb->abort_task.index = cpu_to_le16((u16)tascb->tc_index); + scb->abort_task.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST); + + res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete, + asd_tmf_timedout); + if (res) + goto out; + wait_for_completion(&ascb->completion); + ASD_DPRINTK("tmf came back\n"); + + res = (int) (unsigned long) ascb->uldd_task; + tascb->tag = ascb->tag; + tascb->tag_valid = ascb->tag_valid; + + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + res = TMF_RESP_FUNC_COMPLETE; + ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task); + goto out_done; + } + spin_unlock_irqrestore(&task->task_state_lock, flags); + + switch (res) { + /* The task to be aborted has been sent to the device. + * We got a Response IU for the ABORT TASK TMF. */ + case TC_NO_ERROR + 0xFF00: + case TMF_RESP_FUNC_COMPLETE: + case TMF_RESP_FUNC_FAILED: + res = asd_clear_nexus(task); + break; + case TMF_RESP_INVALID_FRAME: + case TMF_RESP_OVERLAPPED_TAG: + case TMF_RESP_FUNC_ESUPP: + case TMF_RESP_NO_LUN: + goto out_done; break; + } + /* In the following we assume that the managing layer + * will _never_ make a mistake, when issuing ABORT TASK. + */ + switch (res) { + default: + res = asd_clear_nexus(task); + /* fallthrough */ + case TC_NO_ERROR + 0xFF00: + case TMF_RESP_FUNC_COMPLETE: + break; + /* The task hasn't been sent to the device xor we never got + * a (sane) Response IU for the ABORT TASK TMF. + */ + case TF_NAK_RECV + 0xFF00: + res = TMF_RESP_INVALID_FRAME; + break; + case TF_TMF_TASK_DONE + 0xFF00: /* done but not reported yet */ + res = TMF_RESP_FUNC_FAILED; + wait_for_completion_timeout(&tascb->completion, + AIC94XX_SCB_TIMEOUT); + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) + res = TMF_RESP_FUNC_COMPLETE; + spin_unlock_irqrestore(&task->task_state_lock, flags); + goto out_done; + case TF_TMF_NO_TAG + 0xFF00: + case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */ + case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */ + res = TMF_RESP_FUNC_COMPLETE; + goto out_done; + case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */ + res = TMF_RESP_FUNC_ESUPP; + goto out; + } +out_done: + if (res == TMF_RESP_FUNC_COMPLETE) { + task->lldd_task = NULL; + mb(); + asd_ascb_free(tascb); + } +out: + asd_ascb_free(ascb); + ASD_DPRINTK("task 0x%p aborted, res: 0x%x\n", task, res); + return res; +} + +/** + * asd_initiate_ssp_tmf -- send a TMF to an I_T_L or I_T_L_Q nexus + * @dev: pointer to struct domain_device of interest + * @lun: pointer to u8[8] which is the LUN + * @tmf: the TMF to be performed (see sas_task.h or the SAS spec) + * @index: the transaction context of the task to be queried if QT TMF + * + * This function is used to send ABORT TASK SET, CLEAR ACA, + * CLEAR TASK SET, LU RESET and QUERY TASK TMFs. + * + * No SCBs should be queued to the I_T_L nexus when this SCB is + * pending. + * + * Returns: TMF response code (see sas_task.h or the SAS spec) + */ +static int asd_initiate_ssp_tmf(struct domain_device *dev, u8 *lun, + int tmf, int index) +{ + struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha; + struct asd_ascb *ascb; + int res = 1; + struct scb *scb; + + if (!(dev->tproto & SAS_PROTO_SSP)) + return TMF_RESP_FUNC_ESUPP; + + ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); + if (!ascb) + return -ENOMEM; + scb = ascb->scb; + + if (tmf == TMF_QUERY_TASK) + scb->header.opcode = QUERY_SSP_TASK; + else + scb->header.opcode = INITIATE_SSP_TMF; + + scb->ssp_tmf.proto_conn_rate = (1 << 4); /* SSP */ + scb->ssp_tmf.proto_conn_rate |= dev->linkrate; + /* SSP frame header */ + scb->ssp_tmf.ssp_frame.frame_type = SSP_TASK; + memcpy(scb->ssp_tmf.ssp_frame.hashed_dest_addr, + dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE); + memcpy(scb->ssp_tmf.ssp_frame.hashed_src_addr, + dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE); + scb->ssp_tmf.ssp_frame.tptt = cpu_to_be16(0xFFFF); + /* SSP Task IU */ + memcpy(scb->ssp_tmf.ssp_task.lun, lun, 8); + scb->ssp_tmf.ssp_task.tmf = tmf; + + scb->ssp_tmf.sister_scb = cpu_to_le16(0xFFFF); + scb->ssp_tmf.conn_handle= cpu_to_le16((u16)(unsigned long) + dev->lldd_dev); + scb->ssp_tmf.retry_count = 1; + scb->ssp_tmf.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST); + if (tmf == TMF_QUERY_TASK) + scb->ssp_tmf.index = cpu_to_le16(index); + + res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete, + asd_tmf_timedout); + if (res) + goto out_err; + wait_for_completion(&ascb->completion); + res = (int) (unsigned long) ascb->uldd_task; + + switch (res) { + case TC_NO_ERROR + 0xFF00: + res = TMF_RESP_FUNC_COMPLETE; + break; + case TF_NAK_RECV + 0xFF00: + res = TMF_RESP_INVALID_FRAME; + break; + case TF_TMF_TASK_DONE + 0xFF00: + res = TMF_RESP_FUNC_FAILED; + break; + case TF_TMF_NO_TAG + 0xFF00: + case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */ + case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */ + res = TMF_RESP_FUNC_COMPLETE; + break; + case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */ + res = TMF_RESP_FUNC_ESUPP; + break; + default: + ASD_DPRINTK("%s: converting result 0x%x to TMF_RESP_FUNC_FAILED\n", + __FUNCTION__, res); + res = TMF_RESP_FUNC_FAILED; + break; + } +out_err: + asd_ascb_free(ascb); + return res; +} + +int asd_abort_task_set(struct domain_device *dev, u8 *lun) +{ + int res = asd_initiate_ssp_tmf(dev, lun, TMF_ABORT_TASK_SET, 0); + + if (res == TMF_RESP_FUNC_COMPLETE) + asd_clear_nexus_I_T_L(dev, lun); + return res; +} + +int asd_clear_aca(struct domain_device *dev, u8 *lun) +{ + int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_ACA, 0); + + if (res == TMF_RESP_FUNC_COMPLETE) + asd_clear_nexus_I_T_L(dev, lun); + return res; +} + +int asd_clear_task_set(struct domain_device *dev, u8 *lun) +{ + int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_TASK_SET, 0); + + if (res == TMF_RESP_FUNC_COMPLETE) + asd_clear_nexus_I_T_L(dev, lun); + return res; +} + +int asd_lu_reset(struct domain_device *dev, u8 *lun) +{ + int res = asd_initiate_ssp_tmf(dev, lun, TMF_LU_RESET, 0); + + if (res == TMF_RESP_FUNC_COMPLETE) + asd_clear_nexus_I_T_L(dev, lun); + return res; +} + +/** + * asd_query_task -- send a QUERY TASK TMF to an I_T_L_Q nexus + * task: pointer to sas_task struct of interest + * + * Returns: TMF_RESP_FUNC_COMPLETE if the task is not in the task set, + * or TMF_RESP_FUNC_SUCC if the task is in the task set. + * + * Normally the management layer sets the task to aborted state, + * and then calls query task and then abort task. + */ +int asd_query_task(struct sas_task *task) +{ + struct asd_ascb *ascb = task->lldd_task; + int index; + + if (ascb) { + index = ascb->tc_index; + return asd_initiate_ssp_tmf(task->dev, task->ssp_task.LUN, + TMF_QUERY_TASK, index); + } + return TMF_RESP_FUNC_COMPLETE; +} diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig new file mode 100644 index 000000000000..aafdc92f8312 --- /dev/null +++ b/drivers/scsi/libsas/Kconfig @@ -0,0 +1,39 @@ +# +# Kernel configuration file for the SAS Class +# +# Copyright (C) 2005 Adaptec, Inc. All rights reserved. +# Copyright (C) 2005 Luben Tuikov +# +# This file is licensed under GPLv2. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 of the +# License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# + +config SCSI_SAS_LIBSAS + tristate "SAS Domain Transport Attributes" + depends on SCSI + select SCSI_SAS_ATTRS + help + This provides transport specific helpers for SAS drivers which + use the domain device construct (like the aic94xxx). + +config SCSI_SAS_LIBSAS_DEBUG + bool "Compile the SAS Domain Transport Attributes in debug mode" + default y + depends on SCSI_SAS_LIBSAS + help + Compiles the SAS Layer in debug mode. In debug mode, the + SAS Layer prints diagnostic and debug messages. diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile new file mode 100644 index 000000000000..44d972a3b4bd --- /dev/null +++ b/drivers/scsi/libsas/Makefile @@ -0,0 +1,36 @@ +# +# Kernel Makefile for the libsas helpers +# +# Copyright (C) 2005 Adaptec, Inc. All rights reserved. +# Copyright (C) 2005 Luben Tuikov +# +# This file is licensed under GPLv2. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2 of the +# License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +ifeq ($(CONFIG_SCSI_SAS_LIBSAS_DEBUG),y) + EXTRA_CFLAGS += -DSAS_DEBUG +endif + +obj-$(CONFIG_SCSI_SAS_LIBSAS) += libsas.o +libsas-y += sas_init.o \ + sas_phy.o \ + sas_port.o \ + sas_event.o \ + sas_dump.o \ + sas_discover.o \ + sas_expander.o \ + sas_scsi_host.o diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c new file mode 100644 index 000000000000..d977bd492d8d --- /dev/null +++ b/drivers/scsi/libsas/sas_discover.c @@ -0,0 +1,749 @@ +/* + * Serial Attached SCSI (SAS) Discover process + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include "sas_internal.h" + +#include +#include +#include "../scsi_sas_internal.h" + +/* ---------- Basic task processing for discovery purposes ---------- */ + +void sas_init_dev(struct domain_device *dev) +{ + INIT_LIST_HEAD(&dev->siblings); + INIT_LIST_HEAD(&dev->dev_list_node); + switch (dev->dev_type) { + case SAS_END_DEV: + break; + case EDGE_DEV: + case FANOUT_DEV: + INIT_LIST_HEAD(&dev->ex_dev.children); + break; + case SATA_DEV: + case SATA_PM: + case SATA_PM_PORT: + INIT_LIST_HEAD(&dev->sata_dev.children); + break; + default: + break; + } +} + +static void sas_task_timedout(unsigned long _task) +{ + struct sas_task *task = (void *) _task; + unsigned long flags; + + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + complete(&task->completion); +} + +static void sas_disc_task_done(struct sas_task *task) +{ + if (!del_timer(&task->timer)) + return; + complete(&task->completion); +} + +#define SAS_DEV_TIMEOUT 10 + +/** + * sas_execute_task -- Basic task processing for discovery + * @task: the task to be executed + * @buffer: pointer to buffer to do I/O + * @size: size of @buffer + * @pci_dma_dir: PCI_DMA_... + */ +static int sas_execute_task(struct sas_task *task, void *buffer, int size, + int pci_dma_dir) +{ + int res = 0; + struct scatterlist *scatter = NULL; + struct task_status_struct *ts = &task->task_status; + int num_scatter = 0; + int retries = 0; + struct sas_internal *i = + to_sas_internal(task->dev->port->ha->core.shost->transportt); + + if (pci_dma_dir != PCI_DMA_NONE) { + scatter = kzalloc(sizeof(*scatter), GFP_KERNEL); + if (!scatter) + goto out; + + sg_init_one(scatter, buffer, size); + num_scatter = 1; + } + + task->task_proto = task->dev->tproto; + task->scatter = scatter; + task->num_scatter = num_scatter; + task->total_xfer_len = size; + task->data_dir = pci_dma_dir; + task->task_done = sas_disc_task_done; + + for (retries = 0; retries < 5; retries++) { + task->task_state_flags = SAS_TASK_STATE_PENDING; + init_completion(&task->completion); + + task->timer.data = (unsigned long) task; + task->timer.function = sas_task_timedout; + task->timer.expires = jiffies + SAS_DEV_TIMEOUT*HZ; + add_timer(&task->timer); + + res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL); + if (res) { + del_timer(&task->timer); + SAS_DPRINTK("executing SAS discovery task failed:%d\n", + res); + goto ex_err; + } + wait_for_completion(&task->completion); + res = -ETASK; + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + int res2; + SAS_DPRINTK("task aborted, flags:0x%x\n", + task->task_state_flags); + res2 = i->dft->lldd_abort_task(task); + SAS_DPRINTK("came back from abort task\n"); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + if (res2 == TMF_RESP_FUNC_COMPLETE) + continue; /* Retry the task */ + else + goto ex_err; + } + } + if (task->task_status.stat == SAM_BUSY || + task->task_status.stat == SAM_TASK_SET_FULL || + task->task_status.stat == SAS_QUEUE_FULL) { + SAS_DPRINTK("task: q busy, sleeping...\n"); + schedule_timeout_interruptible(HZ); + } else if (task->task_status.stat == SAM_CHECK_COND) { + struct scsi_sense_hdr shdr; + + if (!scsi_normalize_sense(ts->buf, ts->buf_valid_size, + &shdr)) { + SAS_DPRINTK("couldn't normalize sense\n"); + continue; + } + if ((shdr.sense_key == 6 && shdr.asc == 0x29) || + (shdr.sense_key == 2 && shdr.asc == 4 && + shdr.ascq == 1)) { + SAS_DPRINTK("device %016llx LUN: %016llx " + "powering up or not ready yet, " + "sleeping...\n", + SAS_ADDR(task->dev->sas_addr), + SAS_ADDR(task->ssp_task.LUN)); + + schedule_timeout_interruptible(5*HZ); + } else if (shdr.sense_key == 1) { + res = 0; + break; + } else if (shdr.sense_key == 5) { + break; + } else { + SAS_DPRINTK("dev %016llx LUN: %016llx " + "sense key:0x%x ASC:0x%x ASCQ:0x%x" + "\n", + SAS_ADDR(task->dev->sas_addr), + SAS_ADDR(task->ssp_task.LUN), + shdr.sense_key, + shdr.asc, shdr.ascq); + } + } else if (task->task_status.resp != SAS_TASK_COMPLETE || + task->task_status.stat != SAM_GOOD) { + SAS_DPRINTK("task finished with resp:0x%x, " + "stat:0x%x\n", + task->task_status.resp, + task->task_status.stat); + goto ex_err; + } else { + res = 0; + break; + } + } +ex_err: + if (pci_dma_dir != PCI_DMA_NONE) + kfree(scatter); +out: + return res; +} + +/* ---------- Domain device discovery ---------- */ + +/** + * sas_get_port_device -- Discover devices which caused port creation + * @port: pointer to struct sas_port of interest + * + * Devices directly attached to a HA port, have no parent. This is + * how we know they are (domain) "root" devices. All other devices + * do, and should have their "parent" pointer set appropriately as + * soon as a child device is discovered. + */ +static int sas_get_port_device(struct asd_sas_port *port) +{ + unsigned long flags; + struct asd_sas_phy *phy; + struct sas_rphy *rphy; + struct domain_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_irqsave(&port->phy_list_lock, flags); + if (list_empty(&port->phy_list)) { + spin_unlock_irqrestore(&port->phy_list_lock, flags); + kfree(dev); + return -ENODEV; + } + phy = container_of(port->phy_list.next, struct asd_sas_phy, port_phy_el); + spin_lock(&phy->frame_rcvd_lock); + memcpy(dev->frame_rcvd, phy->frame_rcvd, min(sizeof(dev->frame_rcvd), + (size_t)phy->frame_rcvd_size)); + spin_unlock(&phy->frame_rcvd_lock); + spin_unlock_irqrestore(&port->phy_list_lock, flags); + + if (dev->frame_rcvd[0] == 0x34 && port->oob_mode == SATA_OOB_MODE) { + struct dev_to_host_fis *fis = + (struct dev_to_host_fis *) dev->frame_rcvd; + if (fis->interrupt_reason == 1 && fis->lbal == 1 && + fis->byte_count_low==0x69 && fis->byte_count_high == 0x96 + && (fis->device & ~0x10) == 0) + dev->dev_type = SATA_PM; + else + dev->dev_type = SATA_DEV; + dev->tproto = SATA_PROTO; + } else { + struct sas_identify_frame *id = + (struct sas_identify_frame *) dev->frame_rcvd; + dev->dev_type = id->dev_type; + dev->iproto = id->initiator_bits; + dev->tproto = id->target_bits; + } + + sas_init_dev(dev); + + switch (dev->dev_type) { + case SAS_END_DEV: + rphy = sas_end_device_alloc(port->port); + break; + case EDGE_DEV: + rphy = sas_expander_alloc(port->port, + SAS_EDGE_EXPANDER_DEVICE); + break; + case FANOUT_DEV: + rphy = sas_expander_alloc(port->port, + SAS_FANOUT_EXPANDER_DEVICE); + break; + case SATA_DEV: + default: + printk("ERROR: Unidentified device type %d\n", dev->dev_type); + rphy = NULL; + break; + } + + if (!rphy) { + kfree(dev); + return -ENODEV; + } + rphy->identify.phy_identifier = phy->phy->identify.phy_identifier; + memcpy(dev->sas_addr, port->attached_sas_addr, SAS_ADDR_SIZE); + sas_fill_in_rphy(dev, rphy); + sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr); + port->port_dev = dev; + dev->port = port; + dev->linkrate = port->linkrate; + dev->min_linkrate = port->linkrate; + dev->max_linkrate = port->linkrate; + dev->pathways = port->num_phys; + memset(port->disc.fanout_sas_addr, 0, SAS_ADDR_SIZE); + memset(port->disc.eeds_a, 0, SAS_ADDR_SIZE); + memset(port->disc.eeds_b, 0, SAS_ADDR_SIZE); + port->disc.max_level = 0; + + dev->rphy = rphy; + spin_lock(&port->dev_list_lock); + list_add_tail(&dev->dev_list_node, &port->dev_list); + spin_unlock(&port->dev_list_lock); + + return 0; +} + +/* ---------- Discover and Revalidate ---------- */ + +/* ---------- SATA ---------- */ + +static void sas_get_ata_command_set(struct domain_device *dev) +{ + struct dev_to_host_fis *fis = + (struct dev_to_host_fis *) dev->frame_rcvd; + + if ((fis->sector_count == 1 && /* ATA */ + fis->lbal == 1 && + fis->lbam == 0 && + fis->lbah == 0 && + fis->device == 0) + || + (fis->sector_count == 0 && /* CE-ATA (mATA) */ + fis->lbal == 0 && + fis->lbam == 0xCE && + fis->lbah == 0xAA && + (fis->device & ~0x10) == 0)) + + dev->sata_dev.command_set = ATA_COMMAND_SET; + + else if ((fis->interrupt_reason == 1 && /* ATAPI */ + fis->lbal == 1 && + fis->byte_count_low == 0x14 && + fis->byte_count_high == 0xEB && + (fis->device & ~0x10) == 0)) + + dev->sata_dev.command_set = ATAPI_COMMAND_SET; + + else if ((fis->sector_count == 1 && /* SEMB */ + fis->lbal == 1 && + fis->lbam == 0x3C && + fis->lbah == 0xC3 && + fis->device == 0) + || + (fis->interrupt_reason == 1 && /* SATA PM */ + fis->lbal == 1 && + fis->byte_count_low == 0x69 && + fis->byte_count_high == 0x96 && + (fis->device & ~0x10) == 0)) + + /* Treat it as a superset? */ + dev->sata_dev.command_set = ATAPI_COMMAND_SET; +} + +/** + * sas_issue_ata_cmd -- Basic SATA command processing for discovery + * @dev: the device to send the command to + * @command: the command register + * @features: the features register + * @buffer: pointer to buffer to do I/O + * @size: size of @buffer + * @pci_dma_dir: PCI_DMA_... + */ +static int sas_issue_ata_cmd(struct domain_device *dev, u8 command, + u8 features, void *buffer, int size, + int pci_dma_dir) +{ + int res = 0; + struct sas_task *task; + struct dev_to_host_fis *d2h_fis = (struct dev_to_host_fis *) + &dev->frame_rcvd[0]; + + res = -ENOMEM; + task = sas_alloc_task(GFP_KERNEL); + if (!task) + goto out; + + task->dev = dev; + + task->ata_task.fis.command = command; + task->ata_task.fis.features = features; + task->ata_task.fis.device = d2h_fis->device; + task->ata_task.retry_count = 1; + + res = sas_execute_task(task, buffer, size, pci_dma_dir); + + sas_free_task(task); +out: + return res; +} + +static void sas_sata_propagate_sas_addr(struct domain_device *dev) +{ + unsigned long flags; + struct asd_sas_port *port = dev->port; + struct asd_sas_phy *phy; + + BUG_ON(dev->parent); + + memcpy(port->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE); + spin_lock_irqsave(&port->phy_list_lock, flags); + list_for_each_entry(phy, &port->phy_list, port_phy_el) + memcpy(phy->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE); + spin_unlock_irqrestore(&port->phy_list_lock, flags); +} + +#define ATA_IDENTIFY_DEV 0xEC +#define ATA_IDENTIFY_PACKET_DEV 0xA1 +#define ATA_SET_FEATURES 0xEF +#define ATA_FEATURE_PUP_STBY_SPIN_UP 0x07 + +/** + * sas_discover_sata_dev -- discover a STP/SATA device (SATA_DEV) + * @dev: STP/SATA device of interest (ATA/ATAPI) + * + * The LLDD has already been notified of this device, so that we can + * send FISes to it. Here we try to get IDENTIFY DEVICE or IDENTIFY + * PACKET DEVICE, if ATAPI device, so that the LLDD can fine-tune its + * performance for this device. + */ +static int sas_discover_sata_dev(struct domain_device *dev) +{ + int res; + __le16 *identify_x; + u8 command; + + identify_x = kzalloc(512, GFP_KERNEL); + if (!identify_x) + return -ENOMEM; + + if (dev->sata_dev.command_set == ATA_COMMAND_SET) { + dev->sata_dev.identify_device = identify_x; + command = ATA_IDENTIFY_DEV; + } else { + dev->sata_dev.identify_packet_device = identify_x; + command = ATA_IDENTIFY_PACKET_DEV; + } + + res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512, + PCI_DMA_FROMDEVICE); + if (res) + goto out_err; + + /* lives on the media? */ + if (le16_to_cpu(identify_x[0]) & 4) { + /* incomplete response */ + SAS_DPRINTK("sending SET FEATURE/PUP_STBY_SPIN_UP to " + "dev %llx\n", SAS_ADDR(dev->sas_addr)); + if (!le16_to_cpu(identify_x[83] & (1<<6))) + goto cont1; + res = sas_issue_ata_cmd(dev, ATA_SET_FEATURES, + ATA_FEATURE_PUP_STBY_SPIN_UP, + NULL, 0, PCI_DMA_NONE); + if (res) + goto cont1; + + schedule_timeout_interruptible(5*HZ); /* More time? */ + res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512, + PCI_DMA_FROMDEVICE); + if (res) + goto out_err; + } +cont1: + /* Get WWN */ + if (dev->port->oob_mode != SATA_OOB_MODE) { + memcpy(dev->sas_addr, dev->sata_dev.rps_resp.rps.stp_sas_addr, + SAS_ADDR_SIZE); + } else if (dev->sata_dev.command_set == ATA_COMMAND_SET && + (le16_to_cpu(dev->sata_dev.identify_device[108]) & 0xF000) + == 0x5000) { + int i; + + for (i = 0; i < 4; i++) { + dev->sas_addr[2*i] = + (le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0xFF00) >> 8; + dev->sas_addr[2*i+1] = + le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0x00FF; + } + } + sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr); + if (!dev->parent) + sas_sata_propagate_sas_addr(dev); + + /* XXX Hint: register this SATA device with SATL. + When this returns, dev->sata_dev->lu is alive and + present. + sas_satl_register_dev(dev); + */ + return 0; +out_err: + dev->sata_dev.identify_packet_device = NULL; + dev->sata_dev.identify_device = NULL; + kfree(identify_x); + return res; +} + +static int sas_discover_sata_pm(struct domain_device *dev) +{ + return -ENODEV; +} + +int sas_notify_lldd_dev_found(struct domain_device *dev) +{ + int res = 0; + struct sas_ha_struct *sas_ha = dev->port->ha; + struct Scsi_Host *shost = sas_ha->core.shost; + struct sas_internal *i = to_sas_internal(shost->transportt); + + if (i->dft->lldd_dev_found) { + res = i->dft->lldd_dev_found(dev); + if (res) { + printk("sas: driver on pcidev %s cannot handle " + "device %llx, error:%d\n", + pci_name(sas_ha->pcidev), + SAS_ADDR(dev->sas_addr), res); + } + } + return res; +} + + +void sas_notify_lldd_dev_gone(struct domain_device *dev) +{ + struct sas_ha_struct *sas_ha = dev->port->ha; + struct Scsi_Host *shost = sas_ha->core.shost; + struct sas_internal *i = to_sas_internal(shost->transportt); + + if (i->dft->lldd_dev_gone) + i->dft->lldd_dev_gone(dev); +} + +/* ---------- Common/dispatchers ---------- */ + +/** + * sas_discover_sata -- discover an STP/SATA domain device + * @dev: pointer to struct domain_device of interest + * + * First we notify the LLDD of this device, so we can send frames to + * it. Then depending on the type of device we call the appropriate + * discover functions. Once device discover is done, we notify the + * LLDD so that it can fine-tune its parameters for the device, by + * removing it and then adding it. That is, the second time around, + * the driver would have certain fields, that it is looking at, set. + * Finally we initialize the kobj so that the device can be added to + * the system at registration time. Devices directly attached to a HA + * port, have no parents. All other devices do, and should have their + * "parent" pointer set appropriately before calling this function. + */ +int sas_discover_sata(struct domain_device *dev) +{ + int res; + + sas_get_ata_command_set(dev); + + res = sas_notify_lldd_dev_found(dev); + if (res) + return res; + + switch (dev->dev_type) { + case SATA_DEV: + res = sas_discover_sata_dev(dev); + break; + case SATA_PM: + res = sas_discover_sata_pm(dev); + break; + default: + break; + } + + sas_notify_lldd_dev_gone(dev); + if (!res) { + sas_notify_lldd_dev_found(dev); + } + return res; +} + +/** + * sas_discover_end_dev -- discover an end device (SSP, etc) + * @end: pointer to domain device of interest + * + * See comment in sas_discover_sata(). + */ +int sas_discover_end_dev(struct domain_device *dev) +{ + int res; + + res = sas_notify_lldd_dev_found(dev); + if (res) + return res; + + res = sas_rphy_add(dev->rphy); + if (res) + goto out_err; + + /* do this to get the end device port attributes which will have + * been scanned in sas_rphy_add */ + sas_notify_lldd_dev_gone(dev); + sas_notify_lldd_dev_found(dev); + + return 0; + +out_err: + sas_notify_lldd_dev_gone(dev); + return res; +} + +/* ---------- Device registration and unregistration ---------- */ + +static inline void sas_unregister_common_dev(struct domain_device *dev) +{ + sas_notify_lldd_dev_gone(dev); + if (!dev->parent) + dev->port->port_dev = NULL; + else + list_del_init(&dev->siblings); + list_del_init(&dev->dev_list_node); +} + +void sas_unregister_dev(struct domain_device *dev) +{ + if (dev->rphy) { + sas_remove_children(&dev->rphy->dev); + sas_rphy_delete(dev->rphy); + dev->rphy = NULL; + } + if (dev->dev_type == EDGE_DEV || dev->dev_type == FANOUT_DEV) { + /* remove the phys and ports, everything else should be gone */ + kfree(dev->ex_dev.ex_phy); + dev->ex_dev.ex_phy = NULL; + } + sas_unregister_common_dev(dev); +} + +void sas_unregister_domain_devices(struct asd_sas_port *port) +{ + struct domain_device *dev, *n; + + list_for_each_entry_safe_reverse(dev,n,&port->dev_list,dev_list_node) + sas_unregister_dev(dev); + + port->port->rphy = NULL; + +} + +/* ---------- Discovery and Revalidation ---------- */ + +/** + * sas_discover_domain -- discover the domain + * @port: port to the domain of interest + * + * NOTE: this process _must_ quit (return) as soon as any connection + * errors are encountered. Connection recovery is done elsewhere. + * Discover process only interrogates devices in order to discover the + * domain. + */ +static void sas_discover_domain(void *data) +{ + int error = 0; + struct asd_sas_port *port = data; + + sas_begin_event(DISCE_DISCOVER_DOMAIN, &port->disc.disc_event_lock, + &port->disc.pending); + + if (port->port_dev) + return ; + else { + error = sas_get_port_device(port); + if (error) + return; + } + + SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id, + current->pid); + + switch (port->port_dev->dev_type) { + case SAS_END_DEV: + error = sas_discover_end_dev(port->port_dev); + break; + case EDGE_DEV: + case FANOUT_DEV: + error = sas_discover_root_expander(port->port_dev); + break; + case SATA_DEV: + case SATA_PM: + error = sas_discover_sata(port->port_dev); + break; + default: + SAS_DPRINTK("unhandled device %d\n", port->port_dev->dev_type); + break; + } + + if (error) { + kfree(port->port_dev); /* not kobject_register-ed yet */ + port->port_dev = NULL; + } + + SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id, + current->pid, error); +} + +static void sas_revalidate_domain(void *data) +{ + int res = 0; + struct asd_sas_port *port = data; + + sas_begin_event(DISCE_REVALIDATE_DOMAIN, &port->disc.disc_event_lock, + &port->disc.pending); + + SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, + current->pid); + if (port->port_dev) + res = sas_ex_revalidate_domain(port->port_dev); + + SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", + port->id, current->pid, res); +} + +/* ---------- Events ---------- */ + +int sas_discover_event(struct asd_sas_port *port, enum discover_event ev) +{ + struct sas_discovery *disc; + + if (!port) + return 0; + disc = &port->disc; + + BUG_ON(ev >= DISC_NUM_EVENTS); + + sas_queue_event(ev, &disc->disc_event_lock, &disc->pending, + &disc->disc_work[ev], port->ha->core.shost); + + return 0; +} + +/** + * sas_init_disc -- initialize the discovery struct in the port + * @port: pointer to struct port + * + * Called when the ports are being initialized. + */ +void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port) +{ + int i; + + static void (*sas_event_fns[DISC_NUM_EVENTS])(void *) = { + [DISCE_DISCOVER_DOMAIN] = sas_discover_domain, + [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain, + }; + + spin_lock_init(&disc->disc_event_lock); + disc->pending = 0; + for (i = 0; i < DISC_NUM_EVENTS; i++) + INIT_WORK(&disc->disc_work[i], sas_event_fns[i], port); +} diff --git a/drivers/scsi/libsas/sas_dump.c b/drivers/scsi/libsas/sas_dump.c new file mode 100644 index 000000000000..f1246d2c9bef --- /dev/null +++ b/drivers/scsi/libsas/sas_dump.c @@ -0,0 +1,76 @@ +/* + * Serial Attached SCSI (SAS) Dump/Debugging routines + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "sas_dump.h" + +#ifdef SAS_DEBUG + +static const char *sas_hae_str[] = { + [0] = "HAE_RESET", +}; + +static const char *sas_porte_str[] = { + [0] = "PORTE_BYTES_DMAED", + [1] = "PORTE_BROADCAST_RCVD", + [2] = "PORTE_LINK_RESET_ERR", + [3] = "PORTE_TIMER_EVENT", + [4] = "PORTE_HARD_RESET", +}; + +static const char *sas_phye_str[] = { + [0] = "PHYE_LOSS_OF_SIGNAL", + [1] = "PHYE_OOB_DONE", + [2] = "PHYE_OOB_ERROR", + [3] = "PHYE_SPINUP_HOLD", +}; + +void sas_dprint_porte(int phyid, enum port_event pe) +{ + SAS_DPRINTK("phy%d: port event: %s\n", phyid, sas_porte_str[pe]); +} +void sas_dprint_phye(int phyid, enum phy_event pe) +{ + SAS_DPRINTK("phy%d: phy event: %s\n", phyid, sas_phye_str[pe]); +} + +void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he) +{ + SAS_DPRINTK("ha %s: %s event\n", pci_name(sas_ha->pcidev), + sas_hae_str[he]); +} + +void sas_dump_port(struct asd_sas_port *port) +{ + SAS_DPRINTK("port%d: class:0x%x\n", port->id, port->class); + SAS_DPRINTK("port%d: sas_addr:%llx\n", port->id, + SAS_ADDR(port->sas_addr)); + SAS_DPRINTK("port%d: attached_sas_addr:%llx\n", port->id, + SAS_ADDR(port->attached_sas_addr)); + SAS_DPRINTK("port%d: iproto:0x%x\n", port->id, port->iproto); + SAS_DPRINTK("port%d: tproto:0x%x\n", port->id, port->tproto); + SAS_DPRINTK("port%d: oob_mode:0x%x\n", port->id, port->oob_mode); + SAS_DPRINTK("port%d: num_phys:%d\n", port->id, port->num_phys); +} + +#endif /* SAS_DEBUG */ diff --git a/drivers/scsi/libsas/sas_dump.h b/drivers/scsi/libsas/sas_dump.h new file mode 100644 index 000000000000..47b45d4f5258 --- /dev/null +++ b/drivers/scsi/libsas/sas_dump.h @@ -0,0 +1,42 @@ +/* + * Serial Attached SCSI (SAS) Dump/Debugging routines header file + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "sas_internal.h" + +#ifdef SAS_DEBUG + +void sas_dprint_porte(int phyid, enum port_event pe); +void sas_dprint_phye(int phyid, enum phy_event pe); +void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he); +void sas_dump_port(struct asd_sas_port *port); + +#else /* SAS_DEBUG */ + +static inline void sas_dprint_porte(int phyid, enum port_event pe) { } +static inline void sas_dprint_phye(int phyid, enum phy_event pe) { } +static inline void sas_dprint_hae(struct sas_ha_struct *sas_ha, + enum ha_event he) { } +static inline void sas_dump_port(struct asd_sas_port *port) { } + +#endif /* SAS_DEBUG */ diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c new file mode 100644 index 000000000000..19110ed1c89c --- /dev/null +++ b/drivers/scsi/libsas/sas_event.c @@ -0,0 +1,75 @@ +/* + * Serial Attached SCSI (SAS) Event processing + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include "sas_internal.h" +#include "sas_dump.h" + +static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event) +{ + BUG_ON(event >= HA_NUM_EVENTS); + + sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending, + &sas_ha->ha_events[event], sas_ha->core.shost); +} + +static void notify_port_event(struct asd_sas_phy *phy, enum port_event event) +{ + struct sas_ha_struct *ha = phy->ha; + + BUG_ON(event >= PORT_NUM_EVENTS); + + sas_queue_event(event, &ha->event_lock, &phy->port_events_pending, + &phy->port_events[event], ha->core.shost); +} + +static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event) +{ + struct sas_ha_struct *ha = phy->ha; + + BUG_ON(event >= PHY_NUM_EVENTS); + + sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending, + &phy->phy_events[event], ha->core.shost); +} + +int sas_init_events(struct sas_ha_struct *sas_ha) +{ + static void (*sas_ha_event_fns[HA_NUM_EVENTS])(void *) = { + [HAE_RESET] = sas_hae_reset, + }; + + int i; + + spin_lock_init(&sas_ha->event_lock); + + for (i = 0; i < HA_NUM_EVENTS; i++) + INIT_WORK(&sas_ha->ha_events[i], sas_ha_event_fns[i], sas_ha); + + sas_ha->notify_ha_event = notify_ha_event; + sas_ha->notify_port_event = notify_port_event; + sas_ha->notify_phy_event = notify_phy_event; + + return 0; +} diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c new file mode 100644 index 000000000000..b653a263f76a --- /dev/null +++ b/drivers/scsi/libsas/sas_expander.c @@ -0,0 +1,1862 @@ +/* + * Serial Attached SCSI (SAS) Expander discovery and configuration + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include + +#include "sas_internal.h" + +#include +#include +#include "../scsi_sas_internal.h" + +static int sas_discover_expander(struct domain_device *dev); +static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr); +static int sas_configure_phy(struct domain_device *dev, int phy_id, + u8 *sas_addr, int include); +static int sas_disable_routing(struct domain_device *dev, u8 *sas_addr); + +#if 0 +/* FIXME: smp needs to migrate into the sas class */ +static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t); +static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t); +#endif + +/* ---------- SMP task management ---------- */ + +static void smp_task_timedout(unsigned long _task) +{ + struct sas_task *task = (void *) _task; + unsigned long flags; + + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + complete(&task->completion); +} + +static void smp_task_done(struct sas_task *task) +{ + if (!del_timer(&task->timer)) + return; + complete(&task->completion); +} + +/* Give it some long enough timeout. In seconds. */ +#define SMP_TIMEOUT 10 + +static int smp_execute_task(struct domain_device *dev, void *req, int req_size, + void *resp, int resp_size) +{ + int res; + struct sas_task *task = sas_alloc_task(GFP_KERNEL); + struct sas_internal *i = + to_sas_internal(dev->port->ha->core.shost->transportt); + + if (!task) + return -ENOMEM; + + task->dev = dev; + task->task_proto = dev->tproto; + sg_init_one(&task->smp_task.smp_req, req, req_size); + sg_init_one(&task->smp_task.smp_resp, resp, resp_size); + + task->task_done = smp_task_done; + + task->timer.data = (unsigned long) task; + task->timer.function = smp_task_timedout; + task->timer.expires = jiffies + SMP_TIMEOUT*HZ; + add_timer(&task->timer); + + res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL); + + if (res) { + del_timer(&task->timer); + SAS_DPRINTK("executing SMP task failed:%d\n", res); + goto ex_err; + } + + wait_for_completion(&task->completion); + res = -ETASK; + if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { + SAS_DPRINTK("smp task timed out or aborted\n"); + i->dft->lldd_abort_task(task); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + SAS_DPRINTK("SMP task aborted and not done\n"); + goto ex_err; + } + } + if (task->task_status.resp == SAS_TASK_COMPLETE && + task->task_status.stat == SAM_GOOD) + res = 0; + else + SAS_DPRINTK("%s: task to dev %016llx response: 0x%x " + "status 0x%x\n", __FUNCTION__, + SAS_ADDR(dev->sas_addr), + task->task_status.resp, + task->task_status.stat); +ex_err: + sas_free_task(task); + return res; +} + +/* ---------- Allocations ---------- */ + +static inline void *alloc_smp_req(int size) +{ + u8 *p = kzalloc(size, GFP_KERNEL); + if (p) + p[0] = SMP_REQUEST; + return p; +} + +static inline void *alloc_smp_resp(int size) +{ + return kzalloc(size, GFP_KERNEL); +} + +/* ---------- Expander configuration ---------- */ + +static void sas_set_ex_phy(struct domain_device *dev, int phy_id, + void *disc_resp) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *phy = &ex->ex_phy[phy_id]; + struct smp_resp *resp = disc_resp; + struct discover_resp *dr = &resp->disc; + struct sas_rphy *rphy = dev->rphy; + int rediscover = (phy->phy != NULL); + + if (!rediscover) { + phy->phy = sas_phy_alloc(&rphy->dev, phy_id); + + /* FIXME: error_handling */ + BUG_ON(!phy->phy); + } + + switch (resp->result) { + case SMP_RESP_PHY_VACANT: + phy->phy_state = PHY_VACANT; + return; + default: + phy->phy_state = PHY_NOT_PRESENT; + return; + case SMP_RESP_FUNC_ACC: + phy->phy_state = PHY_EMPTY; /* do not know yet */ + break; + } + + phy->phy_id = phy_id; + phy->attached_dev_type = dr->attached_dev_type; + phy->linkrate = dr->linkrate; + phy->attached_sata_host = dr->attached_sata_host; + phy->attached_sata_dev = dr->attached_sata_dev; + phy->attached_sata_ps = dr->attached_sata_ps; + phy->attached_iproto = dr->iproto << 1; + phy->attached_tproto = dr->tproto << 1; + memcpy(phy->attached_sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE); + phy->attached_phy_id = dr->attached_phy_id; + phy->phy_change_count = dr->change_count; + phy->routing_attr = dr->routing_attr; + phy->virtual = dr->virtual; + phy->last_da_index = -1; + + phy->phy->identify.initiator_port_protocols = phy->attached_iproto; + phy->phy->identify.target_port_protocols = phy->attached_tproto; + phy->phy->identify.phy_identifier = phy_id; + phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; + phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; + phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; + switch (phy->linkrate) { + case PHY_LINKRATE_1_5: + phy->phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + case PHY_LINKRATE_3: + phy->phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + case PHY_LINKRATE_6: + phy->phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; + break; + default: + phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; + break; + } + + if (!rediscover) + sas_phy_add(phy->phy); + + SAS_DPRINTK("ex %016llx phy%02d:%c attached: %016llx\n", + SAS_ADDR(dev->sas_addr), phy->phy_id, + phy->routing_attr == TABLE_ROUTING ? 'T' : + phy->routing_attr == DIRECT_ROUTING ? 'D' : + phy->routing_attr == SUBTRACTIVE_ROUTING ? 'S' : '?', + SAS_ADDR(phy->attached_sas_addr)); + + return; +} + +#define DISCOVER_REQ_SIZE 16 +#define DISCOVER_RESP_SIZE 56 + +static int sas_ex_phy_discover(struct domain_device *dev, int single) +{ + struct expander_device *ex = &dev->ex_dev; + int res = 0; + u8 *disc_req; + u8 *disc_resp; + + disc_req = alloc_smp_req(DISCOVER_REQ_SIZE); + if (!disc_req) + return -ENOMEM; + + disc_resp = alloc_smp_req(DISCOVER_RESP_SIZE); + if (!disc_resp) { + kfree(disc_req); + return -ENOMEM; + } + + disc_req[1] = SMP_DISCOVER; + + if (0 <= single && single < ex->num_phys) { + disc_req[9] = single; + res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE, + disc_resp, DISCOVER_RESP_SIZE); + if (res) + goto out_err; + sas_set_ex_phy(dev, single, disc_resp); + } else { + int i; + + for (i = 0; i < ex->num_phys; i++) { + disc_req[9] = i; + res = smp_execute_task(dev, disc_req, + DISCOVER_REQ_SIZE, disc_resp, + DISCOVER_RESP_SIZE); + if (res) + goto out_err; + sas_set_ex_phy(dev, i, disc_resp); + } + } +out_err: + kfree(disc_resp); + kfree(disc_req); + return res; +} + +static int sas_expander_discover(struct domain_device *dev) +{ + struct expander_device *ex = &dev->ex_dev; + int res = -ENOMEM; + + ex->ex_phy = kzalloc(sizeof(*ex->ex_phy)*ex->num_phys, GFP_KERNEL); + if (!ex->ex_phy) + return -ENOMEM; + + res = sas_ex_phy_discover(dev, -1); + if (res) + goto out_err; + + return 0; + out_err: + kfree(ex->ex_phy); + ex->ex_phy = NULL; + return res; +} + +#define MAX_EXPANDER_PHYS 128 + +static void ex_assign_report_general(struct domain_device *dev, + struct smp_resp *resp) +{ + struct report_general_resp *rg = &resp->rg; + + dev->ex_dev.ex_change_count = be16_to_cpu(rg->change_count); + dev->ex_dev.max_route_indexes = be16_to_cpu(rg->route_indexes); + dev->ex_dev.num_phys = min(rg->num_phys, (u8)MAX_EXPANDER_PHYS); + dev->ex_dev.conf_route_table = rg->conf_route_table; + dev->ex_dev.configuring = rg->configuring; + memcpy(dev->ex_dev.enclosure_logical_id, rg->enclosure_logical_id, 8); +} + +#define RG_REQ_SIZE 8 +#define RG_RESP_SIZE 32 + +static int sas_ex_general(struct domain_device *dev) +{ + u8 *rg_req; + struct smp_resp *rg_resp; + int res; + int i; + + rg_req = alloc_smp_req(RG_REQ_SIZE); + if (!rg_req) + return -ENOMEM; + + rg_resp = alloc_smp_resp(RG_RESP_SIZE); + if (!rg_resp) { + kfree(rg_req); + return -ENOMEM; + } + + rg_req[1] = SMP_REPORT_GENERAL; + + for (i = 0; i < 5; i++) { + res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp, + RG_RESP_SIZE); + + if (res) { + SAS_DPRINTK("RG to ex %016llx failed:0x%x\n", + SAS_ADDR(dev->sas_addr), res); + goto out; + } else if (rg_resp->result != SMP_RESP_FUNC_ACC) { + SAS_DPRINTK("RG:ex %016llx returned SMP result:0x%x\n", + SAS_ADDR(dev->sas_addr), rg_resp->result); + res = rg_resp->result; + goto out; + } + + ex_assign_report_general(dev, rg_resp); + + if (dev->ex_dev.configuring) { + SAS_DPRINTK("RG: ex %llx self-configuring...\n", + SAS_ADDR(dev->sas_addr)); + schedule_timeout_interruptible(5*HZ); + } else + break; + } +out: + kfree(rg_req); + kfree(rg_resp); + return res; +} + +static void ex_assign_manuf_info(struct domain_device *dev, void + *_mi_resp) +{ + u8 *mi_resp = _mi_resp; + struct sas_rphy *rphy = dev->rphy; + struct sas_expander_device *edev = rphy_to_expander_device(rphy); + + memcpy(edev->vendor_id, mi_resp + 12, SAS_EXPANDER_VENDOR_ID_LEN); + memcpy(edev->product_id, mi_resp + 20, SAS_EXPANDER_PRODUCT_ID_LEN); + memcpy(edev->product_rev, mi_resp + 36, + SAS_EXPANDER_PRODUCT_REV_LEN); + + if (mi_resp[8] & 1) { + memcpy(edev->component_vendor_id, mi_resp + 40, + SAS_EXPANDER_COMPONENT_VENDOR_ID_LEN); + edev->component_id = mi_resp[48] << 8 | mi_resp[49]; + edev->component_revision_id = mi_resp[50]; + } +} + +#define MI_REQ_SIZE 8 +#define MI_RESP_SIZE 64 + +static int sas_ex_manuf_info(struct domain_device *dev) +{ + u8 *mi_req; + u8 *mi_resp; + int res; + + mi_req = alloc_smp_req(MI_REQ_SIZE); + if (!mi_req) + return -ENOMEM; + + mi_resp = alloc_smp_resp(MI_RESP_SIZE); + if (!mi_resp) { + kfree(mi_req); + return -ENOMEM; + } + + mi_req[1] = SMP_REPORT_MANUF_INFO; + + res = smp_execute_task(dev, mi_req, MI_REQ_SIZE, mi_resp,MI_RESP_SIZE); + if (res) { + SAS_DPRINTK("MI: ex %016llx failed:0x%x\n", + SAS_ADDR(dev->sas_addr), res); + goto out; + } else if (mi_resp[2] != SMP_RESP_FUNC_ACC) { + SAS_DPRINTK("MI ex %016llx returned SMP result:0x%x\n", + SAS_ADDR(dev->sas_addr), mi_resp[2]); + goto out; + } + + ex_assign_manuf_info(dev, mi_resp); +out: + kfree(mi_req); + kfree(mi_resp); + return res; +} + +#define PC_REQ_SIZE 44 +#define PC_RESP_SIZE 8 + +int sas_smp_phy_control(struct domain_device *dev, int phy_id, + enum phy_func phy_func) +{ + u8 *pc_req; + u8 *pc_resp; + int res; + + pc_req = alloc_smp_req(PC_REQ_SIZE); + if (!pc_req) + return -ENOMEM; + + pc_resp = alloc_smp_resp(PC_RESP_SIZE); + if (!pc_resp) { + kfree(pc_req); + return -ENOMEM; + } + + pc_req[1] = SMP_PHY_CONTROL; + pc_req[9] = phy_id; + pc_req[10]= phy_func; + + res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE); + + kfree(pc_resp); + kfree(pc_req); + return res; +} + +static void sas_ex_disable_phy(struct domain_device *dev, int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *phy = &ex->ex_phy[phy_id]; + + sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE); + phy->linkrate = PHY_DISABLED; +} + +static void sas_ex_disable_port(struct domain_device *dev, u8 *sas_addr) +{ + struct expander_device *ex = &dev->ex_dev; + int i; + + for (i = 0; i < ex->num_phys; i++) { + struct ex_phy *phy = &ex->ex_phy[i]; + + if (phy->phy_state == PHY_VACANT || + phy->phy_state == PHY_NOT_PRESENT) + continue; + + if (SAS_ADDR(phy->attached_sas_addr) == SAS_ADDR(sas_addr)) + sas_ex_disable_phy(dev, i); + } +} + +static int sas_dev_present_in_domain(struct asd_sas_port *port, + u8 *sas_addr) +{ + struct domain_device *dev; + + if (SAS_ADDR(port->sas_addr) == SAS_ADDR(sas_addr)) + return 1; + list_for_each_entry(dev, &port->dev_list, dev_list_node) { + if (SAS_ADDR(dev->sas_addr) == SAS_ADDR(sas_addr)) + return 1; + } + return 0; +} + +#define RPEL_REQ_SIZE 16 +#define RPEL_RESP_SIZE 32 +int sas_smp_get_phy_events(struct sas_phy *phy) +{ + int res; + struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent); + struct domain_device *dev = sas_find_dev_by_rphy(rphy); + u8 *req = alloc_smp_req(RPEL_REQ_SIZE); + u8 *resp = kzalloc(RPEL_RESP_SIZE, GFP_KERNEL); + + if (!resp) + return -ENOMEM; + + req[1] = SMP_REPORT_PHY_ERR_LOG; + req[9] = phy->number; + + res = smp_execute_task(dev, req, RPEL_REQ_SIZE, + resp, RPEL_RESP_SIZE); + + if (!res) + goto out; + + phy->invalid_dword_count = scsi_to_u32(&resp[12]); + phy->running_disparity_error_count = scsi_to_u32(&resp[16]); + phy->loss_of_dword_sync_count = scsi_to_u32(&resp[20]); + phy->phy_reset_problem_count = scsi_to_u32(&resp[24]); + + out: + kfree(resp); + return res; + +} + +#define RPS_REQ_SIZE 16 +#define RPS_RESP_SIZE 60 + +static int sas_get_report_phy_sata(struct domain_device *dev, + int phy_id, + struct smp_resp *rps_resp) +{ + int res; + u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE); + + if (!rps_req) + return -ENOMEM; + + rps_req[1] = SMP_REPORT_PHY_SATA; + rps_req[9] = phy_id; + + res = smp_execute_task(dev, rps_req, RPS_REQ_SIZE, + rps_resp, RPS_RESP_SIZE); + + kfree(rps_req); + return 0; +} + +static void sas_ex_get_linkrate(struct domain_device *parent, + struct domain_device *child, + struct ex_phy *parent_phy) +{ + struct expander_device *parent_ex = &parent->ex_dev; + struct sas_port *port; + int i; + + child->pathways = 0; + + port = parent_phy->port; + + for (i = 0; i < parent_ex->num_phys; i++) { + struct ex_phy *phy = &parent_ex->ex_phy[i]; + + if (phy->phy_state == PHY_VACANT || + phy->phy_state == PHY_NOT_PRESENT) + continue; + + if (SAS_ADDR(phy->attached_sas_addr) == + SAS_ADDR(child->sas_addr)) { + + child->min_linkrate = min(parent->min_linkrate, + phy->linkrate); + child->max_linkrate = max(parent->max_linkrate, + phy->linkrate); + child->pathways++; + sas_port_add_phy(port, phy->phy); + } + } + child->linkrate = min(parent_phy->linkrate, child->max_linkrate); + child->pathways = min(child->pathways, parent->pathways); +} + +static struct domain_device *sas_ex_discover_end_dev( + struct domain_device *parent, int phy_id) +{ + struct expander_device *parent_ex = &parent->ex_dev; + struct ex_phy *phy = &parent_ex->ex_phy[phy_id]; + struct domain_device *child = NULL; + struct sas_rphy *rphy; + int res; + + if (phy->attached_sata_host || phy->attached_sata_ps) + return NULL; + + child = kzalloc(sizeof(*child), GFP_KERNEL); + if (!child) + return NULL; + + child->parent = parent; + child->port = parent->port; + child->iproto = phy->attached_iproto; + memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE); + sas_hash_addr(child->hashed_sas_addr, child->sas_addr); + phy->port = sas_port_alloc(&parent->rphy->dev, phy_id); + BUG_ON(!phy->port); + /* FIXME: better error handling*/ + BUG_ON(sas_port_add(phy->port) != 0); + sas_ex_get_linkrate(parent, child, phy); + + if ((phy->attached_tproto & SAS_PROTO_STP) || phy->attached_sata_dev) { + child->dev_type = SATA_DEV; + if (phy->attached_tproto & SAS_PROTO_STP) + child->tproto = phy->attached_tproto; + if (phy->attached_sata_dev) + child->tproto |= SATA_DEV; + res = sas_get_report_phy_sata(parent, phy_id, + &child->sata_dev.rps_resp); + if (res) { + SAS_DPRINTK("report phy sata to %016llx:0x%x returned " + "0x%x\n", SAS_ADDR(parent->sas_addr), + phy_id, res); + kfree(child); + return NULL; + } + memcpy(child->frame_rcvd, &child->sata_dev.rps_resp.rps.fis, + sizeof(struct dev_to_host_fis)); + sas_init_dev(child); + res = sas_discover_sata(child); + if (res) { + SAS_DPRINTK("sas_discover_sata() for device %16llx at " + "%016llx:0x%x returned 0x%x\n", + SAS_ADDR(child->sas_addr), + SAS_ADDR(parent->sas_addr), phy_id, res); + kfree(child); + return NULL; + } + } else if (phy->attached_tproto & SAS_PROTO_SSP) { + child->dev_type = SAS_END_DEV; + rphy = sas_end_device_alloc(phy->port); + /* FIXME: error handling */ + BUG_ON(!rphy); + child->tproto = phy->attached_tproto; + sas_init_dev(child); + + child->rphy = rphy; + sas_fill_in_rphy(child, rphy); + + spin_lock(&parent->port->dev_list_lock); + list_add_tail(&child->dev_list_node, &parent->port->dev_list); + spin_unlock(&parent->port->dev_list_lock); + + res = sas_discover_end_dev(child); + if (res) { + SAS_DPRINTK("sas_discover_end_dev() for device %16llx " + "at %016llx:0x%x returned 0x%x\n", + SAS_ADDR(child->sas_addr), + SAS_ADDR(parent->sas_addr), phy_id, res); + /* FIXME: this kfrees list elements without removing them */ + //kfree(child); + return NULL; + } + } else { + SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n", + phy->attached_tproto, SAS_ADDR(parent->sas_addr), + phy_id); + } + + list_add_tail(&child->siblings, &parent_ex->children); + return child; +} + +static struct domain_device *sas_ex_discover_expander( + struct domain_device *parent, int phy_id) +{ + struct sas_expander_device *parent_ex = rphy_to_expander_device(parent->rphy); + struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id]; + struct domain_device *child = NULL; + struct sas_rphy *rphy; + struct sas_expander_device *edev; + struct asd_sas_port *port; + int res; + + if (phy->routing_attr == DIRECT_ROUTING) { + SAS_DPRINTK("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not " + "allowed\n", + SAS_ADDR(parent->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr), + phy->attached_phy_id); + return NULL; + } + child = kzalloc(sizeof(*child), GFP_KERNEL); + if (!child) + return NULL; + + phy->port = sas_port_alloc(&parent->rphy->dev, phy_id); + /* FIXME: better error handling */ + BUG_ON(sas_port_add(phy->port) != 0); + + + switch (phy->attached_dev_type) { + case EDGE_DEV: + rphy = sas_expander_alloc(phy->port, + SAS_EDGE_EXPANDER_DEVICE); + break; + case FANOUT_DEV: + rphy = sas_expander_alloc(phy->port, + SAS_FANOUT_EXPANDER_DEVICE); + break; + default: + rphy = NULL; /* shut gcc up */ + BUG(); + } + port = parent->port; + child->rphy = rphy; + edev = rphy_to_expander_device(rphy); + child->dev_type = phy->attached_dev_type; + child->parent = parent; + child->port = port; + child->iproto = phy->attached_iproto; + child->tproto = phy->attached_tproto; + memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE); + sas_hash_addr(child->hashed_sas_addr, child->sas_addr); + sas_ex_get_linkrate(parent, child, phy); + edev->level = parent_ex->level + 1; + parent->port->disc.max_level = max(parent->port->disc.max_level, + edev->level); + sas_init_dev(child); + sas_fill_in_rphy(child, rphy); + sas_rphy_add(rphy); + + spin_lock(&parent->port->dev_list_lock); + list_add_tail(&child->dev_list_node, &parent->port->dev_list); + spin_unlock(&parent->port->dev_list_lock); + + res = sas_discover_expander(child); + if (res) { + kfree(child); + return NULL; + } + list_add_tail(&child->siblings, &parent->ex_dev.children); + return child; +} + +static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *ex_phy = &ex->ex_phy[phy_id]; + struct domain_device *child = NULL; + int res = 0; + + /* Phy state */ + if (ex_phy->linkrate == PHY_SPINUP_HOLD) { + if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET)) + res = sas_ex_phy_discover(dev, phy_id); + if (res) + return res; + } + + /* Parent and domain coherency */ + if (!dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) == + SAS_ADDR(dev->port->sas_addr))) { + sas_add_parent_port(dev, phy_id); + return 0; + } + if (dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) == + SAS_ADDR(dev->parent->sas_addr))) { + sas_add_parent_port(dev, phy_id); + if (ex_phy->routing_attr == TABLE_ROUTING) + sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1); + return 0; + } + + if (sas_dev_present_in_domain(dev->port, ex_phy->attached_sas_addr)) + sas_ex_disable_port(dev, ex_phy->attached_sas_addr); + + if (ex_phy->attached_dev_type == NO_DEVICE) { + if (ex_phy->routing_attr == DIRECT_ROUTING) { + memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + sas_configure_routing(dev, ex_phy->attached_sas_addr); + } + return 0; + } else if (ex_phy->linkrate == PHY_LINKRATE_UNKNOWN) + return 0; + + if (ex_phy->attached_dev_type != SAS_END_DEV && + ex_phy->attached_dev_type != FANOUT_DEV && + ex_phy->attached_dev_type != EDGE_DEV) { + SAS_DPRINTK("unknown device type(0x%x) attached to ex %016llx " + "phy 0x%x\n", ex_phy->attached_dev_type, + SAS_ADDR(dev->sas_addr), + phy_id); + return 0; + } + + res = sas_configure_routing(dev, ex_phy->attached_sas_addr); + if (res) { + SAS_DPRINTK("configure routing for dev %016llx " + "reported 0x%x. Forgotten\n", + SAS_ADDR(ex_phy->attached_sas_addr), res); + sas_disable_routing(dev, ex_phy->attached_sas_addr); + return res; + } + + switch (ex_phy->attached_dev_type) { + case SAS_END_DEV: + child = sas_ex_discover_end_dev(dev, phy_id); + break; + case FANOUT_DEV: + if (SAS_ADDR(dev->port->disc.fanout_sas_addr)) { + SAS_DPRINTK("second fanout expander %016llx phy 0x%x " + "attached to ex %016llx phy 0x%x\n", + SAS_ADDR(ex_phy->attached_sas_addr), + ex_phy->attached_phy_id, + SAS_ADDR(dev->sas_addr), + phy_id); + sas_ex_disable_phy(dev, phy_id); + break; + } else + memcpy(dev->port->disc.fanout_sas_addr, + ex_phy->attached_sas_addr, SAS_ADDR_SIZE); + /* fallthrough */ + case EDGE_DEV: + child = sas_ex_discover_expander(dev, phy_id); + break; + default: + break; + } + + if (child) { + int i; + + for (i = 0; i < ex->num_phys; i++) { + if (ex->ex_phy[i].phy_state == PHY_VACANT || + ex->ex_phy[i].phy_state == PHY_NOT_PRESENT) + continue; + + if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) == + SAS_ADDR(child->sas_addr)) + ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED; + } + } + + return res; +} + +static int sas_find_sub_addr(struct domain_device *dev, u8 *sub_addr) +{ + struct expander_device *ex = &dev->ex_dev; + int i; + + for (i = 0; i < ex->num_phys; i++) { + struct ex_phy *phy = &ex->ex_phy[i]; + + if (phy->phy_state == PHY_VACANT || + phy->phy_state == PHY_NOT_PRESENT) + continue; + + if ((phy->attached_dev_type == EDGE_DEV || + phy->attached_dev_type == FANOUT_DEV) && + phy->routing_attr == SUBTRACTIVE_ROUTING) { + + memcpy(sub_addr, phy->attached_sas_addr,SAS_ADDR_SIZE); + + return 1; + } + } + return 0; +} + +static int sas_check_level_subtractive_boundary(struct domain_device *dev) +{ + struct expander_device *ex = &dev->ex_dev; + struct domain_device *child; + u8 sub_addr[8] = {0, }; + + list_for_each_entry(child, &ex->children, siblings) { + if (child->dev_type != EDGE_DEV && + child->dev_type != FANOUT_DEV) + continue; + if (sub_addr[0] == 0) { + sas_find_sub_addr(child, sub_addr); + continue; + } else { + u8 s2[8]; + + if (sas_find_sub_addr(child, s2) && + (SAS_ADDR(sub_addr) != SAS_ADDR(s2))) { + + SAS_DPRINTK("ex %016llx->%016llx-?->%016llx " + "diverges from subtractive " + "boundary %016llx\n", + SAS_ADDR(dev->sas_addr), + SAS_ADDR(child->sas_addr), + SAS_ADDR(s2), + SAS_ADDR(sub_addr)); + + sas_ex_disable_port(child, s2); + } + } + } + return 0; +} +/** + * sas_ex_discover_devices -- discover devices attached to this expander + * dev: pointer to the expander domain device + * single: if you want to do a single phy, else set to -1; + * + * Configure this expander for use with its devices and register the + * devices of this expander. + */ +static int sas_ex_discover_devices(struct domain_device *dev, int single) +{ + struct expander_device *ex = &dev->ex_dev; + int i = 0, end = ex->num_phys; + int res = 0; + + if (0 <= single && single < end) { + i = single; + end = i+1; + } + + for ( ; i < end; i++) { + struct ex_phy *ex_phy = &ex->ex_phy[i]; + + if (ex_phy->phy_state == PHY_VACANT || + ex_phy->phy_state == PHY_NOT_PRESENT || + ex_phy->phy_state == PHY_DEVICE_DISCOVERED) + continue; + + switch (ex_phy->linkrate) { + case PHY_DISABLED: + case PHY_RESET_PROBLEM: + case PHY_PORT_SELECTOR: + continue; + default: + res = sas_ex_discover_dev(dev, i); + if (res) + break; + continue; + } + } + + if (!res) + sas_check_level_subtractive_boundary(dev); + + return res; +} + +static int sas_check_ex_subtractive_boundary(struct domain_device *dev) +{ + struct expander_device *ex = &dev->ex_dev; + int i; + u8 *sub_sas_addr = NULL; + + if (dev->dev_type != EDGE_DEV) + return 0; + + for (i = 0; i < ex->num_phys; i++) { + struct ex_phy *phy = &ex->ex_phy[i]; + + if (phy->phy_state == PHY_VACANT || + phy->phy_state == PHY_NOT_PRESENT) + continue; + + if ((phy->attached_dev_type == FANOUT_DEV || + phy->attached_dev_type == EDGE_DEV) && + phy->routing_attr == SUBTRACTIVE_ROUTING) { + + if (!sub_sas_addr) + sub_sas_addr = &phy->attached_sas_addr[0]; + else if (SAS_ADDR(sub_sas_addr) != + SAS_ADDR(phy->attached_sas_addr)) { + + SAS_DPRINTK("ex %016llx phy 0x%x " + "diverges(%016llx) on subtractive " + "boundary(%016llx). Disabled\n", + SAS_ADDR(dev->sas_addr), i, + SAS_ADDR(phy->attached_sas_addr), + SAS_ADDR(sub_sas_addr)); + sas_ex_disable_phy(dev, i); + } + } + } + return 0; +} + +static void sas_print_parent_topology_bug(struct domain_device *child, + struct ex_phy *parent_phy, + struct ex_phy *child_phy) +{ + static const char ra_char[] = { + [DIRECT_ROUTING] = 'D', + [SUBTRACTIVE_ROUTING] = 'S', + [TABLE_ROUTING] = 'T', + }; + static const char *ex_type[] = { + [EDGE_DEV] = "edge", + [FANOUT_DEV] = "fanout", + }; + struct domain_device *parent = child->parent; + + sas_printk("%s ex %016llx phy 0x%x <--> %s ex %016llx phy 0x%x " + "has %c:%c routing link!\n", + + ex_type[parent->dev_type], + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, + + ex_type[child->dev_type], + SAS_ADDR(child->sas_addr), + child_phy->phy_id, + + ra_char[parent_phy->routing_attr], + ra_char[child_phy->routing_attr]); +} + +static int sas_check_eeds(struct domain_device *child, + struct ex_phy *parent_phy, + struct ex_phy *child_phy) +{ + int res = 0; + struct domain_device *parent = child->parent; + + if (SAS_ADDR(parent->port->disc.fanout_sas_addr) != 0) { + res = -ENODEV; + SAS_DPRINTK("edge ex %016llx phy S:0x%x <--> edge ex %016llx " + "phy S:0x%x, while there is a fanout ex %016llx\n", + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, + SAS_ADDR(child->sas_addr), + child_phy->phy_id, + SAS_ADDR(parent->port->disc.fanout_sas_addr)); + } else if (SAS_ADDR(parent->port->disc.eeds_a) == 0) { + memcpy(parent->port->disc.eeds_a, parent->sas_addr, + SAS_ADDR_SIZE); + memcpy(parent->port->disc.eeds_b, child->sas_addr, + SAS_ADDR_SIZE); + } else if (((SAS_ADDR(parent->port->disc.eeds_a) == + SAS_ADDR(parent->sas_addr)) || + (SAS_ADDR(parent->port->disc.eeds_a) == + SAS_ADDR(child->sas_addr))) + && + ((SAS_ADDR(parent->port->disc.eeds_b) == + SAS_ADDR(parent->sas_addr)) || + (SAS_ADDR(parent->port->disc.eeds_b) == + SAS_ADDR(child->sas_addr)))) + ; + else { + res = -ENODEV; + SAS_DPRINTK("edge ex %016llx phy 0x%x <--> edge ex %016llx " + "phy 0x%x link forms a third EEDS!\n", + SAS_ADDR(parent->sas_addr), + parent_phy->phy_id, + SAS_ADDR(child->sas_addr), + child_phy->phy_id); + } + + return res; +} + +/* Here we spill over 80 columns. It is intentional. + */ +static int sas_check_parent_topology(struct domain_device *child) +{ + struct expander_device *child_ex = &child->ex_dev; + struct expander_device *parent_ex; + int i; + int res = 0; + + if (!child->parent) + return 0; + + if (child->parent->dev_type != EDGE_DEV && + child->parent->dev_type != FANOUT_DEV) + return 0; + + parent_ex = &child->parent->ex_dev; + + for (i = 0; i < parent_ex->num_phys; i++) { + struct ex_phy *parent_phy = &parent_ex->ex_phy[i]; + struct ex_phy *child_phy; + + if (parent_phy->phy_state == PHY_VACANT || + parent_phy->phy_state == PHY_NOT_PRESENT) + continue; + + if (SAS_ADDR(parent_phy->attached_sas_addr) != SAS_ADDR(child->sas_addr)) + continue; + + child_phy = &child_ex->ex_phy[parent_phy->attached_phy_id]; + + switch (child->parent->dev_type) { + case EDGE_DEV: + if (child->dev_type == FANOUT_DEV) { + if (parent_phy->routing_attr != SUBTRACTIVE_ROUTING || + child_phy->routing_attr != TABLE_ROUTING) { + sas_print_parent_topology_bug(child, parent_phy, child_phy); + res = -ENODEV; + } + } else if (parent_phy->routing_attr == SUBTRACTIVE_ROUTING) { + if (child_phy->routing_attr == SUBTRACTIVE_ROUTING) { + res = sas_check_eeds(child, parent_phy, child_phy); + } else if (child_phy->routing_attr != TABLE_ROUTING) { + sas_print_parent_topology_bug(child, parent_phy, child_phy); + res = -ENODEV; + } + } else if (parent_phy->routing_attr == TABLE_ROUTING && + child_phy->routing_attr != SUBTRACTIVE_ROUTING) { + sas_print_parent_topology_bug(child, parent_phy, child_phy); + res = -ENODEV; + } + break; + case FANOUT_DEV: + if (parent_phy->routing_attr != TABLE_ROUTING || + child_phy->routing_attr != SUBTRACTIVE_ROUTING) { + sas_print_parent_topology_bug(child, parent_phy, child_phy); + res = -ENODEV; + } + break; + default: + break; + } + } + + return res; +} + +#define RRI_REQ_SIZE 16 +#define RRI_RESP_SIZE 44 + +static int sas_configure_present(struct domain_device *dev, int phy_id, + u8 *sas_addr, int *index, int *present) +{ + int i, res = 0; + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *phy = &ex->ex_phy[phy_id]; + u8 *rri_req; + u8 *rri_resp; + + *present = 0; + *index = 0; + + rri_req = alloc_smp_req(RRI_REQ_SIZE); + if (!rri_req) + return -ENOMEM; + + rri_resp = alloc_smp_resp(RRI_RESP_SIZE); + if (!rri_resp) { + kfree(rri_req); + return -ENOMEM; + } + + rri_req[1] = SMP_REPORT_ROUTE_INFO; + rri_req[9] = phy_id; + + for (i = 0; i < ex->max_route_indexes ; i++) { + *(__be16 *)(rri_req+6) = cpu_to_be16(i); + res = smp_execute_task(dev, rri_req, RRI_REQ_SIZE, rri_resp, + RRI_RESP_SIZE); + if (res) + goto out; + res = rri_resp[2]; + if (res == SMP_RESP_NO_INDEX) { + SAS_DPRINTK("overflow of indexes: dev %016llx " + "phy 0x%x index 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, i); + goto out; + } else if (res != SMP_RESP_FUNC_ACC) { + SAS_DPRINTK("%s: dev %016llx phy 0x%x index 0x%x " + "result 0x%x\n", __FUNCTION__, + SAS_ADDR(dev->sas_addr), phy_id, i, res); + goto out; + } + if (SAS_ADDR(sas_addr) != 0) { + if (SAS_ADDR(rri_resp+16) == SAS_ADDR(sas_addr)) { + *index = i; + if ((rri_resp[12] & 0x80) == 0x80) + *present = 0; + else + *present = 1; + goto out; + } else if (SAS_ADDR(rri_resp+16) == 0) { + *index = i; + *present = 0; + goto out; + } + } else if (SAS_ADDR(rri_resp+16) == 0 && + phy->last_da_index < i) { + phy->last_da_index = i; + *index = i; + *present = 0; + goto out; + } + } + res = -1; +out: + kfree(rri_req); + kfree(rri_resp); + return res; +} + +#define CRI_REQ_SIZE 44 +#define CRI_RESP_SIZE 8 + +static int sas_configure_set(struct domain_device *dev, int phy_id, + u8 *sas_addr, int index, int include) +{ + int res; + u8 *cri_req; + u8 *cri_resp; + + cri_req = alloc_smp_req(CRI_REQ_SIZE); + if (!cri_req) + return -ENOMEM; + + cri_resp = alloc_smp_resp(CRI_RESP_SIZE); + if (!cri_resp) { + kfree(cri_req); + return -ENOMEM; + } + + cri_req[1] = SMP_CONF_ROUTE_INFO; + *(__be16 *)(cri_req+6) = cpu_to_be16(index); + cri_req[9] = phy_id; + if (SAS_ADDR(sas_addr) == 0 || !include) + cri_req[12] |= 0x80; + memcpy(cri_req+16, sas_addr, SAS_ADDR_SIZE); + + res = smp_execute_task(dev, cri_req, CRI_REQ_SIZE, cri_resp, + CRI_RESP_SIZE); + if (res) + goto out; + res = cri_resp[2]; + if (res == SMP_RESP_NO_INDEX) { + SAS_DPRINTK("overflow of indexes: dev %016llx phy 0x%x " + "index 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, index); + } +out: + kfree(cri_req); + kfree(cri_resp); + return res; +} + +static int sas_configure_phy(struct domain_device *dev, int phy_id, + u8 *sas_addr, int include) +{ + int index; + int present; + int res; + + res = sas_configure_present(dev, phy_id, sas_addr, &index, &present); + if (res) + return res; + if (include ^ present) + return sas_configure_set(dev, phy_id, sas_addr, index,include); + + return res; +} + +/** + * sas_configure_parent -- configure routing table of parent + * parent: parent expander + * child: child expander + * sas_addr: SAS port identifier of device directly attached to child + */ +static int sas_configure_parent(struct domain_device *parent, + struct domain_device *child, + u8 *sas_addr, int include) +{ + struct expander_device *ex_parent = &parent->ex_dev; + int res = 0; + int i; + + if (parent->parent) { + res = sas_configure_parent(parent->parent, parent, sas_addr, + include); + if (res) + return res; + } + + if (ex_parent->conf_route_table == 0) { + SAS_DPRINTK("ex %016llx has self-configuring routing table\n", + SAS_ADDR(parent->sas_addr)); + return 0; + } + + for (i = 0; i < ex_parent->num_phys; i++) { + struct ex_phy *phy = &ex_parent->ex_phy[i]; + + if ((phy->routing_attr == TABLE_ROUTING) && + (SAS_ADDR(phy->attached_sas_addr) == + SAS_ADDR(child->sas_addr))) { + res = sas_configure_phy(parent, i, sas_addr, include); + if (res) + return res; + } + } + + return res; +} + +/** + * sas_configure_routing -- configure routing + * dev: expander device + * sas_addr: port identifier of device directly attached to the expander device + */ +static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr) +{ + if (dev->parent) + return sas_configure_parent(dev->parent, dev, sas_addr, 1); + return 0; +} + +static int sas_disable_routing(struct domain_device *dev, u8 *sas_addr) +{ + if (dev->parent) + return sas_configure_parent(dev->parent, dev, sas_addr, 0); + return 0; +} + +#if 0 +#define SMP_BIN_ATTR_NAME "smp_portal" + +static void sas_ex_smp_hook(struct domain_device *dev) +{ + struct expander_device *ex_dev = &dev->ex_dev; + struct bin_attribute *bin_attr = &ex_dev->smp_bin_attr; + + memset(bin_attr, 0, sizeof(*bin_attr)); + + bin_attr->attr.name = SMP_BIN_ATTR_NAME; + bin_attr->attr.owner = THIS_MODULE; + bin_attr->attr.mode = 0600; + + bin_attr->size = 0; + bin_attr->private = NULL; + bin_attr->read = smp_portal_read; + bin_attr->write= smp_portal_write; + bin_attr->mmap = NULL; + + ex_dev->smp_portal_pid = -1; + init_MUTEX(&ex_dev->smp_sema); +} +#endif + +/** + * sas_discover_expander -- expander discovery + * @ex: pointer to expander domain device + * + * See comment in sas_discover_sata(). + */ +static int sas_discover_expander(struct domain_device *dev) +{ + int res; + + res = sas_notify_lldd_dev_found(dev); + if (res) + return res; + + res = sas_ex_general(dev); + if (res) + goto out_err; + res = sas_ex_manuf_info(dev); + if (res) + goto out_err; + + res = sas_expander_discover(dev); + if (res) { + SAS_DPRINTK("expander %016llx discovery failed(0x%x)\n", + SAS_ADDR(dev->sas_addr), res); + goto out_err; + } + + sas_check_ex_subtractive_boundary(dev); + res = sas_check_parent_topology(dev); + if (res) + goto out_err; + return 0; +out_err: + sas_notify_lldd_dev_gone(dev); + return res; +} + +static int sas_ex_level_discovery(struct asd_sas_port *port, const int level) +{ + int res = 0; + struct domain_device *dev; + + list_for_each_entry(dev, &port->dev_list, dev_list_node) { + if (dev->dev_type == EDGE_DEV || + dev->dev_type == FANOUT_DEV) { + struct sas_expander_device *ex = + rphy_to_expander_device(dev->rphy); + + if (level == ex->level) + res = sas_ex_discover_devices(dev, -1); + else if (level > 0) + res = sas_ex_discover_devices(port->port_dev, -1); + + } + } + + return res; +} + +static int sas_ex_bfs_disc(struct asd_sas_port *port) +{ + int res; + int level; + + do { + level = port->disc.max_level; + res = sas_ex_level_discovery(port, level); + mb(); + } while (level < port->disc.max_level); + + return res; +} + +int sas_discover_root_expander(struct domain_device *dev) +{ + int res; + struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy); + + sas_rphy_add(dev->rphy); + + ex->level = dev->port->disc.max_level; /* 0 */ + res = sas_discover_expander(dev); + if (!res) + sas_ex_bfs_disc(dev->port); + + return res; +} + +/* ---------- Domain revalidation ---------- */ + +static int sas_get_phy_discover(struct domain_device *dev, + int phy_id, struct smp_resp *disc_resp) +{ + int res; + u8 *disc_req; + + disc_req = alloc_smp_req(DISCOVER_REQ_SIZE); + if (!disc_req) + return -ENOMEM; + + disc_req[1] = SMP_DISCOVER; + disc_req[9] = phy_id; + + res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE, + disc_resp, DISCOVER_RESP_SIZE); + if (res) + goto out; + else if (disc_resp->result != SMP_RESP_FUNC_ACC) { + res = disc_resp->result; + goto out; + } +out: + kfree(disc_req); + return res; +} + +static int sas_get_phy_change_count(struct domain_device *dev, + int phy_id, int *pcc) +{ + int res; + struct smp_resp *disc_resp; + + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); + if (!res) + *pcc = disc_resp->disc.change_count; + + kfree(disc_resp); + return res; +} + +static int sas_get_phy_attached_sas_addr(struct domain_device *dev, + int phy_id, u8 *attached_sas_addr) +{ + int res; + struct smp_resp *disc_resp; + struct discover_resp *dr; + + disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE); + if (!disc_resp) + return -ENOMEM; + dr = &disc_resp->disc; + + res = sas_get_phy_discover(dev, phy_id, disc_resp); + if (!res) { + memcpy(attached_sas_addr,disc_resp->disc.attached_sas_addr,8); + if (dr->attached_dev_type == 0) + memset(attached_sas_addr, 0, 8); + } + kfree(disc_resp); + return res; +} + +static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id, + int from_phy) +{ + struct expander_device *ex = &dev->ex_dev; + int res = 0; + int i; + + for (i = from_phy; i < ex->num_phys; i++) { + int phy_change_count = 0; + + res = sas_get_phy_change_count(dev, i, &phy_change_count); + if (res) + goto out; + else if (phy_change_count != ex->ex_phy[i].phy_change_count) { + ex->ex_phy[i].phy_change_count = phy_change_count; + *phy_id = i; + return 0; + } + } +out: + return res; +} + +static int sas_get_ex_change_count(struct domain_device *dev, int *ecc) +{ + int res; + u8 *rg_req; + struct smp_resp *rg_resp; + + rg_req = alloc_smp_req(RG_REQ_SIZE); + if (!rg_req) + return -ENOMEM; + + rg_resp = alloc_smp_resp(RG_RESP_SIZE); + if (!rg_resp) { + kfree(rg_req); + return -ENOMEM; + } + + rg_req[1] = SMP_REPORT_GENERAL; + + res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp, + RG_RESP_SIZE); + if (res) + goto out; + if (rg_resp->result != SMP_RESP_FUNC_ACC) { + res = rg_resp->result; + goto out; + } + + *ecc = be16_to_cpu(rg_resp->rg.change_count); +out: + kfree(rg_resp); + kfree(rg_req); + return res; +} + +static int sas_find_bcast_dev(struct domain_device *dev, + struct domain_device **src_dev) +{ + struct expander_device *ex = &dev->ex_dev; + int ex_change_count = -1; + int res; + + res = sas_get_ex_change_count(dev, &ex_change_count); + if (res) + goto out; + if (ex_change_count != -1 && + ex_change_count != ex->ex_change_count) { + *src_dev = dev; + ex->ex_change_count = ex_change_count; + } else { + struct domain_device *ch; + + list_for_each_entry(ch, &ex->children, siblings) { + if (ch->dev_type == EDGE_DEV || + ch->dev_type == FANOUT_DEV) { + res = sas_find_bcast_dev(ch, src_dev); + if (src_dev) + return res; + } + } + } +out: + return res; +} + +static void sas_unregister_ex_tree(struct domain_device *dev) +{ + struct expander_device *ex = &dev->ex_dev; + struct domain_device *child, *n; + + list_for_each_entry_safe(child, n, &ex->children, siblings) { + if (child->dev_type == EDGE_DEV || + child->dev_type == FANOUT_DEV) + sas_unregister_ex_tree(child); + else + sas_unregister_dev(child); + } + sas_unregister_dev(dev); +} + +static void sas_unregister_devs_sas_addr(struct domain_device *parent, + int phy_id) +{ + struct expander_device *ex_dev = &parent->ex_dev; + struct ex_phy *phy = &ex_dev->ex_phy[phy_id]; + struct domain_device *child, *n; + + list_for_each_entry_safe(child, n, &ex_dev->children, siblings) { + if (SAS_ADDR(child->sas_addr) == + SAS_ADDR(phy->attached_sas_addr)) { + if (child->dev_type == EDGE_DEV || + child->dev_type == FANOUT_DEV) + sas_unregister_ex_tree(child); + else + sas_unregister_dev(child); + break; + } + } + sas_disable_routing(parent, phy->attached_sas_addr); + memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + sas_port_delete_phy(phy->port, phy->phy); + if (phy->port->num_phys == 0) + sas_port_delete(phy->port); + phy->port = NULL; +} + +static int sas_discover_bfs_by_root_level(struct domain_device *root, + const int level) +{ + struct expander_device *ex_root = &root->ex_dev; + struct domain_device *child; + int res = 0; + + list_for_each_entry(child, &ex_root->children, siblings) { + if (child->dev_type == EDGE_DEV || + child->dev_type == FANOUT_DEV) { + struct sas_expander_device *ex = + rphy_to_expander_device(child->rphy); + + if (level > ex->level) + res = sas_discover_bfs_by_root_level(child, + level); + else if (level == ex->level) + res = sas_ex_discover_devices(child, -1); + } + } + return res; +} + +static int sas_discover_bfs_by_root(struct domain_device *dev) +{ + int res; + struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy); + int level = ex->level+1; + + res = sas_ex_discover_devices(dev, -1); + if (res) + goto out; + do { + res = sas_discover_bfs_by_root_level(dev, level); + mb(); + level += 1; + } while (level <= dev->port->disc.max_level); +out: + return res; +} + +static int sas_discover_new(struct domain_device *dev, int phy_id) +{ + struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id]; + struct domain_device *child; + int res; + + SAS_DPRINTK("ex %016llx phy%d new device attached\n", + SAS_ADDR(dev->sas_addr), phy_id); + res = sas_ex_phy_discover(dev, phy_id); + if (res) + goto out; + res = sas_ex_discover_devices(dev, phy_id); + if (res) + goto out; + list_for_each_entry(child, &dev->ex_dev.children, siblings) { + if (SAS_ADDR(child->sas_addr) == + SAS_ADDR(ex_phy->attached_sas_addr)) { + if (child->dev_type == EDGE_DEV || + child->dev_type == FANOUT_DEV) + res = sas_discover_bfs_by_root(child); + break; + } + } +out: + return res; +} + +static int sas_rediscover_dev(struct domain_device *dev, int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *phy = &ex->ex_phy[phy_id]; + u8 attached_sas_addr[8]; + int res; + + res = sas_get_phy_attached_sas_addr(dev, phy_id, attached_sas_addr); + switch (res) { + case SMP_RESP_NO_PHY: + phy->phy_state = PHY_NOT_PRESENT; + sas_unregister_devs_sas_addr(dev, phy_id); + goto out; break; + case SMP_RESP_PHY_VACANT: + phy->phy_state = PHY_VACANT; + sas_unregister_devs_sas_addr(dev, phy_id); + goto out; break; + case SMP_RESP_FUNC_ACC: + break; + } + + if (SAS_ADDR(attached_sas_addr) == 0) { + phy->phy_state = PHY_EMPTY; + sas_unregister_devs_sas_addr(dev, phy_id); + } else if (SAS_ADDR(attached_sas_addr) == + SAS_ADDR(phy->attached_sas_addr)) { + SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", + SAS_ADDR(dev->sas_addr), phy_id); + } else + res = sas_discover_new(dev, phy_id); +out: + return res; +} + +static int sas_rediscover(struct domain_device *dev, const int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *changed_phy = &ex->ex_phy[phy_id]; + int res = 0; + int i; + + SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n", + SAS_ADDR(dev->sas_addr), phy_id); + + if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) { + for (i = 0; i < ex->num_phys; i++) { + struct ex_phy *phy = &ex->ex_phy[i]; + + if (i == phy_id) + continue; + if (SAS_ADDR(phy->attached_sas_addr) == + SAS_ADDR(changed_phy->attached_sas_addr)) { + SAS_DPRINTK("phy%d part of wide port with " + "phy%d\n", phy_id, i); + goto out; + } + } + res = sas_rediscover_dev(dev, phy_id); + } else + res = sas_discover_new(dev, phy_id); +out: + return res; +} + +/** + * sas_revalidate_domain -- revalidate the domain + * @port: port to the domain of interest + * + * NOTE: this process _must_ quit (return) as soon as any connection + * errors are encountered. Connection recovery is done elsewhere. + * Discover process only interrogates devices in order to discover the + * domain. + */ +int sas_ex_revalidate_domain(struct domain_device *port_dev) +{ + int res; + struct domain_device *dev = NULL; + + res = sas_find_bcast_dev(port_dev, &dev); + if (res) + goto out; + if (dev) { + struct expander_device *ex = &dev->ex_dev; + int i = 0, phy_id; + + do { + phy_id = -1; + res = sas_find_bcast_phy(dev, &phy_id, i); + if (phy_id == -1) + break; + res = sas_rediscover(dev, phy_id); + i = phy_id + 1; + } while (i < ex->num_phys); + } +out: + return res; +} + +#if 0 +/* ---------- SMP portal ---------- */ + +static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs, + size_t size) +{ + struct domain_device *dev = to_dom_device(kobj); + struct expander_device *ex = &dev->ex_dev; + + if (offs != 0) + return -EFBIG; + else if (size == 0) + return 0; + + down_interruptible(&ex->smp_sema); + if (ex->smp_req) + kfree(ex->smp_req); + ex->smp_req = kzalloc(size, GFP_USER); + if (!ex->smp_req) { + up(&ex->smp_sema); + return -ENOMEM; + } + memcpy(ex->smp_req, buf, size); + ex->smp_req_size = size; + ex->smp_portal_pid = current->pid; + up(&ex->smp_sema); + + return size; +} + +static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs, + size_t size) +{ + struct domain_device *dev = to_dom_device(kobj); + struct expander_device *ex = &dev->ex_dev; + u8 *smp_resp; + int res = -EINVAL; + + /* XXX: sysfs gives us an offset of 0x10 or 0x8 while in fact + * it should be 0. + */ + + down_interruptible(&ex->smp_sema); + if (!ex->smp_req || ex->smp_portal_pid != current->pid) + goto out; + + res = 0; + if (size == 0) + goto out; + + res = -ENOMEM; + smp_resp = alloc_smp_resp(size); + if (!smp_resp) + goto out; + res = smp_execute_task(dev, ex->smp_req, ex->smp_req_size, + smp_resp, size); + if (!res) { + memcpy(buf, smp_resp, size); + res = size; + } + + kfree(smp_resp); +out: + kfree(ex->smp_req); + ex->smp_req = NULL; + ex->smp_req_size = 0; + ex->smp_portal_pid = -1; + up(&ex->smp_sema); + return res; +} +#endif diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c new file mode 100644 index 000000000000..b961664b8106 --- /dev/null +++ b/drivers/scsi/libsas/sas_init.c @@ -0,0 +1,227 @@ +/* + * Serial Attached SCSI (SAS) Transport Layer initialization + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sas_internal.h" + +#include "../scsi_sas_internal.h" + +kmem_cache_t *sas_task_cache; + +/*------------ SAS addr hash -----------*/ +void sas_hash_addr(u8 *hashed, const u8 *sas_addr) +{ + const u32 poly = 0x00DB2777; + u32 r = 0; + int i; + + for (i = 0; i < 8; i++) { + int b; + for (b = 7; b >= 0; b--) { + r <<= 1; + if ((1 << b) & sas_addr[i]) { + if (!(r & 0x01000000)) + r ^= poly; + } else if (r & 0x01000000) + r ^= poly; + } + } + + hashed[0] = (r >> 16) & 0xFF; + hashed[1] = (r >> 8) & 0xFF ; + hashed[2] = r & 0xFF; +} + + +/* ---------- HA events ---------- */ + +void sas_hae_reset(void *data) +{ + struct sas_ha_struct *ha = data; + + sas_begin_event(HAE_RESET, &ha->event_lock, + &ha->pending); +} + +int sas_register_ha(struct sas_ha_struct *sas_ha) +{ + int error = 0; + + spin_lock_init(&sas_ha->phy_port_lock); + sas_hash_addr(sas_ha->hashed_sas_addr, sas_ha->sas_addr); + + if (sas_ha->lldd_queue_size == 0) + sas_ha->lldd_queue_size = 1; + else if (sas_ha->lldd_queue_size == -1) + sas_ha->lldd_queue_size = 128; /* Sanity */ + + error = sas_register_phys(sas_ha); + if (error) { + printk(KERN_NOTICE "couldn't register sas phys:%d\n", error); + return error; + } + + error = sas_register_ports(sas_ha); + if (error) { + printk(KERN_NOTICE "couldn't register sas ports:%d\n", error); + goto Undo_phys; + } + + error = sas_init_events(sas_ha); + if (error) { + printk(KERN_NOTICE "couldn't start event thread:%d\n", error); + goto Undo_ports; + } + + if (sas_ha->lldd_max_execute_num > 1) { + error = sas_init_queue(sas_ha); + if (error) { + printk(KERN_NOTICE "couldn't start queue thread:%d, " + "running in direct mode\n", error); + sas_ha->lldd_max_execute_num = 1; + } + } + + return 0; + +Undo_ports: + sas_unregister_ports(sas_ha); +Undo_phys: + + return error; +} + +int sas_unregister_ha(struct sas_ha_struct *sas_ha) +{ + if (sas_ha->lldd_max_execute_num > 1) { + sas_shutdown_queue(sas_ha); + } + + sas_unregister_ports(sas_ha); + + return 0; +} + +static int sas_get_linkerrors(struct sas_phy *phy) +{ + if (scsi_is_sas_phy_local(phy)) + /* FIXME: we have no local phy stats + * gathering at this time */ + return -EINVAL; + + return sas_smp_get_phy_events(phy); +} + +static int sas_phy_reset(struct sas_phy *phy, int hard_reset) +{ + int ret; + enum phy_func reset_type; + + if (hard_reset) + reset_type = PHY_FUNC_HARD_RESET; + else + reset_type = PHY_FUNC_LINK_RESET; + + if (scsi_is_sas_phy_local(phy)) { + struct Scsi_Host *shost = dev_to_shost(phy->dev.parent); + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); + struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number]; + struct sas_internal *i = + to_sas_internal(sas_ha->core.shost->transportt); + + ret = i->dft->lldd_control_phy(asd_phy, reset_type); + } else { + struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent); + struct domain_device *ddev = sas_find_dev_by_rphy(rphy); + ret = sas_smp_phy_control(ddev, phy->number, reset_type); + } + return ret; +} + +static struct sas_function_template sft = { + .phy_reset = sas_phy_reset, + .get_linkerrors = sas_get_linkerrors, +}; + +struct scsi_transport_template * +sas_domain_attach_transport(struct sas_domain_function_template *dft) +{ + struct scsi_transport_template *stt = sas_attach_transport(&sft); + struct sas_internal *i; + + if (!stt) + return stt; + + i = to_sas_internal(stt); + i->dft = dft; + stt->create_work_queue = 1; + stt->eh_timed_out = sas_scsi_timed_out; + stt->eh_strategy_handler = sas_scsi_recover_host; + + return stt; +} +EXPORT_SYMBOL_GPL(sas_domain_attach_transport); + + +void sas_domain_release_transport(struct scsi_transport_template *stt) +{ + sas_release_transport(stt); +} +EXPORT_SYMBOL_GPL(sas_domain_release_transport); + +/* ---------- SAS Class register/unregister ---------- */ + +static int __init sas_class_init(void) +{ + sas_task_cache = kmem_cache_create("sas_task", sizeof(struct sas_task), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!sas_task_cache) + return -ENOMEM; + + return 0; +} + +static void __exit sas_class_exit(void) +{ + kmem_cache_destroy(sas_task_cache); +} + +MODULE_AUTHOR("Luben Tuikov "); +MODULE_DESCRIPTION("SAS Transport Layer"); +MODULE_LICENSE("GPL v2"); + +module_init(sas_class_init); +module_exit(sas_class_exit); + +EXPORT_SYMBOL_GPL(sas_register_ha); +EXPORT_SYMBOL_GPL(sas_unregister_ha); diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h new file mode 100644 index 000000000000..89c397680846 --- /dev/null +++ b/drivers/scsi/libsas/sas_internal.h @@ -0,0 +1,146 @@ +/* + * Serial Attached SCSI (SAS) class internal header file + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#ifndef _SAS_INTERNAL_H_ +#define _SAS_INTERNAL_H_ + +#include +#include +#include +#include + +#define sas_printk(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__) + +#ifdef SAS_DEBUG +#define SAS_DPRINTK(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__) +#else +#define SAS_DPRINTK(fmt, ...) +#endif + +void sas_scsi_recover_host(struct Scsi_Host *shost); + +int sas_show_class(enum sas_class class, char *buf); +int sas_show_proto(enum sas_proto proto, char *buf); +int sas_show_linkrate(enum sas_phy_linkrate linkrate, char *buf); +int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf); + +int sas_register_phys(struct sas_ha_struct *sas_ha); +void sas_unregister_phys(struct sas_ha_struct *sas_ha); + +int sas_register_ports(struct sas_ha_struct *sas_ha); +void sas_unregister_ports(struct sas_ha_struct *sas_ha); + +enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); + +int sas_init_queue(struct sas_ha_struct *sas_ha); +int sas_init_events(struct sas_ha_struct *sas_ha); +void sas_shutdown_queue(struct sas_ha_struct *sas_ha); + +void sas_deform_port(struct asd_sas_phy *phy); + +void sas_porte_bytes_dmaed(void *); +void sas_porte_broadcast_rcvd(void *); +void sas_porte_link_reset_err(void *); +void sas_porte_timer_event(void *); +void sas_porte_hard_reset(void *); + +int sas_notify_lldd_dev_found(struct domain_device *); +void sas_notify_lldd_dev_gone(struct domain_device *); + +int sas_smp_phy_control(struct domain_device *dev, int phy_id, + enum phy_func phy_func); +int sas_smp_get_phy_events(struct sas_phy *phy); + +struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy); + +void sas_hae_reset(void *); + +static inline void sas_queue_event(int event, spinlock_t *lock, + unsigned long *pending, + struct work_struct *work, + struct Scsi_Host *shost) +{ + unsigned long flags; + + spin_lock_irqsave(lock, flags); + if (test_bit(event, pending)) { + spin_unlock_irqrestore(lock, flags); + return; + } + __set_bit(event, pending); + spin_unlock_irqrestore(lock, flags); + scsi_queue_work(shost, work); +} + +static inline void sas_begin_event(int event, spinlock_t *lock, + unsigned long *pending) +{ + unsigned long flags; + + spin_lock_irqsave(lock, flags); + __clear_bit(event, pending); + spin_unlock_irqrestore(lock, flags); +} + +static inline void sas_fill_in_rphy(struct domain_device *dev, + struct sas_rphy *rphy) +{ + rphy->identify.sas_address = SAS_ADDR(dev->sas_addr); + rphy->identify.initiator_port_protocols = dev->iproto; + rphy->identify.target_port_protocols = dev->tproto; + switch (dev->dev_type) { + case SATA_DEV: + /* FIXME: need sata device type */ + case SAS_END_DEV: + rphy->identify.device_type = SAS_END_DEVICE; + break; + case EDGE_DEV: + rphy->identify.device_type = SAS_EDGE_EXPANDER_DEVICE; + break; + case FANOUT_DEV: + rphy->identify.device_type = SAS_FANOUT_EXPANDER_DEVICE; + break; + default: + rphy->identify.device_type = SAS_PHY_UNUSED; + break; + } +} + +static inline void sas_add_parent_port(struct domain_device *dev, int phy_id) +{ + struct expander_device *ex = &dev->ex_dev; + struct ex_phy *ex_phy = &ex->ex_phy[phy_id]; + + if (!ex->parent_port) { + ex->parent_port = sas_port_alloc(&dev->rphy->dev, phy_id); + /* FIXME: error handling */ + BUG_ON(!ex->parent_port); + BUG_ON(sas_port_add(ex->parent_port)); + sas_port_mark_backlink(ex->parent_port); + } + sas_port_add_phy(ex->parent_port, ex_phy->phy); +} + +#endif /* _SAS_INTERNAL_H_ */ diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c new file mode 100644 index 000000000000..024ab00e70d2 --- /dev/null +++ b/drivers/scsi/libsas/sas_phy.c @@ -0,0 +1,157 @@ +/* + * Serial Attached SCSI (SAS) Phy class + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "sas_internal.h" +#include +#include +#include +#include "../scsi_sas_internal.h" + +/* ---------- Phy events ---------- */ + +static void sas_phye_loss_of_signal(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock, + &phy->phy_events_pending); + phy->error = 0; + sas_deform_port(phy); +} + +static void sas_phye_oob_done(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PHYE_OOB_DONE, &phy->ha->event_lock, + &phy->phy_events_pending); + phy->error = 0; +} + +static void sas_phye_oob_error(void *data) +{ + struct asd_sas_phy *phy = data; + struct sas_ha_struct *sas_ha = phy->ha; + struct asd_sas_port *port = phy->port; + struct sas_internal *i = + to_sas_internal(sas_ha->core.shost->transportt); + + sas_begin_event(PHYE_OOB_ERROR, &phy->ha->event_lock, + &phy->phy_events_pending); + + sas_deform_port(phy); + + if (!port && phy->enabled && i->dft->lldd_control_phy) { + phy->error++; + switch (phy->error) { + case 1: + case 2: + i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET); + break; + case 3: + default: + phy->error = 0; + phy->enabled = 0; + i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE); + break; + } + } +} + +static void sas_phye_spinup_hold(void *data) +{ + struct asd_sas_phy *phy = data; + struct sas_ha_struct *sas_ha = phy->ha; + struct sas_internal *i = + to_sas_internal(sas_ha->core.shost->transportt); + + sas_begin_event(PHYE_SPINUP_HOLD, &phy->ha->event_lock, + &phy->phy_events_pending); + + phy->error = 0; + i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD); +} + +/* ---------- Phy class registration ---------- */ + +int sas_register_phys(struct sas_ha_struct *sas_ha) +{ + int i; + + static void (*sas_phy_event_fns[PHY_NUM_EVENTS])(void *) = { + [PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal, + [PHYE_OOB_DONE] = sas_phye_oob_done, + [PHYE_OOB_ERROR] = sas_phye_oob_error, + [PHYE_SPINUP_HOLD] = sas_phye_spinup_hold, + }; + + static void (*sas_port_event_fns[PORT_NUM_EVENTS])(void *) = { + [PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed, + [PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd, + [PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err, + [PORTE_TIMER_EVENT] = sas_porte_timer_event, + [PORTE_HARD_RESET] = sas_porte_hard_reset, + }; + + /* Now register the phys. */ + for (i = 0; i < sas_ha->num_phys; i++) { + int k; + struct asd_sas_phy *phy = sas_ha->sas_phy[i]; + + phy->error = 0; + INIT_LIST_HEAD(&phy->port_phy_el); + for (k = 0; k < PORT_NUM_EVENTS; k++) + INIT_WORK(&phy->port_events[k], sas_port_event_fns[k], + phy); + + for (k = 0; k < PHY_NUM_EVENTS; k++) + INIT_WORK(&phy->phy_events[k], sas_phy_event_fns[k], + phy); + phy->port = NULL; + phy->ha = sas_ha; + spin_lock_init(&phy->frame_rcvd_lock); + spin_lock_init(&phy->sas_prim_lock); + phy->frame_rcvd_size = 0; + + phy->phy = sas_phy_alloc(&sas_ha->core.shost->shost_gendev, + i); + if (!phy->phy) + return -ENOMEM; + + phy->phy->identify.initiator_port_protocols = + phy->iproto; + phy->phy->identify.target_port_protocols = phy->tproto; + phy->phy->identify.sas_address = SAS_ADDR(sas_ha->sas_addr); + phy->phy->identify.phy_identifier = i; + phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; + phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; + phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; + phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; + + sas_phy_add(phy->phy); + } + + return 0; +} diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c new file mode 100644 index 000000000000..253cdcf306a2 --- /dev/null +++ b/drivers/scsi/libsas/sas_port.c @@ -0,0 +1,279 @@ +/* + * Serial Attached SCSI (SAS) Port class + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "sas_internal.h" + +#include +#include +#include "../scsi_sas_internal.h" + +/** + * sas_form_port -- add this phy to a port + * @phy: the phy of interest + * + * This function adds this phy to an existing port, thus creating a wide + * port, or it creates a port and adds the phy to the port. + */ +static void sas_form_port(struct asd_sas_phy *phy) +{ + int i; + struct sas_ha_struct *sas_ha = phy->ha; + struct asd_sas_port *port = phy->port; + struct sas_internal *si = + to_sas_internal(sas_ha->core.shost->transportt); + + if (port) { + if (memcmp(port->attached_sas_addr, phy->attached_sas_addr, + SAS_ADDR_SIZE) == 0) + sas_deform_port(phy); + else { + SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n", + __FUNCTION__, phy->id, phy->port->id, + phy->port->num_phys); + return; + } + } + + /* find a port */ + spin_lock(&sas_ha->phy_port_lock); + for (i = 0; i < sas_ha->num_phys; i++) { + port = sas_ha->sas_port[i]; + spin_lock(&port->phy_list_lock); + if (*(u64 *) port->sas_addr && + memcmp(port->attached_sas_addr, + phy->attached_sas_addr, SAS_ADDR_SIZE) == 0 && + port->num_phys > 0) { + /* wide port */ + SAS_DPRINTK("phy%d matched wide port%d\n", phy->id, + port->id); + break; + } else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) { + memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE); + break; + } + spin_unlock(&port->phy_list_lock); + } + + if (i >= sas_ha->num_phys) { + printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n", + __FUNCTION__); + spin_unlock(&sas_ha->phy_port_lock); + return; + } + + /* add the phy to the port */ + list_add_tail(&phy->port_phy_el, &port->phy_list); + phy->port = port; + port->num_phys++; + port->phy_mask |= (1U << phy->id); + + if (!port->phy) + port->phy = phy->phy; + + SAS_DPRINTK("phy%d added to port%d, phy_mask:0x%x\n", phy->id, + port->id, port->phy_mask); + + if (*(u64 *)port->attached_sas_addr == 0) { + port->class = phy->class; + memcpy(port->attached_sas_addr, phy->attached_sas_addr, + SAS_ADDR_SIZE); + port->iproto = phy->iproto; + port->tproto = phy->tproto; + port->oob_mode = phy->oob_mode; + port->linkrate = phy->linkrate; + } else + port->linkrate = max(port->linkrate, phy->linkrate); + spin_unlock(&port->phy_list_lock); + spin_unlock(&sas_ha->phy_port_lock); + + if (!port->port) { + port->port = sas_port_alloc(phy->phy->dev.parent, port->id); + BUG_ON(!port->port); + sas_port_add(port->port); + } + sas_port_add_phy(port->port, phy->phy); + + if (port->port_dev) + port->port_dev->pathways = port->num_phys; + + /* Tell the LLDD about this port formation. */ + if (si->dft->lldd_port_formed) + si->dft->lldd_port_formed(phy); + + sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN); +} + +/** + * sas_deform_port -- remove this phy from the port it belongs to + * @phy: the phy of interest + * + * This is called when the physical link to the other phy has been + * lost (on this phy), in Event thread context. We cannot delay here. + */ +void sas_deform_port(struct asd_sas_phy *phy) +{ + struct sas_ha_struct *sas_ha = phy->ha; + struct asd_sas_port *port = phy->port; + struct sas_internal *si = + to_sas_internal(sas_ha->core.shost->transportt); + + if (!port) + return; /* done by a phy event */ + + if (port->port_dev) + port->port_dev->pathways--; + + if (port->num_phys == 1) { + sas_unregister_domain_devices(port); + sas_port_delete(port->port); + port->port = NULL; + } else + sas_port_delete_phy(port->port, phy->phy); + + + if (si->dft->lldd_port_deformed) + si->dft->lldd_port_deformed(phy); + + spin_lock(&sas_ha->phy_port_lock); + spin_lock(&port->phy_list_lock); + + list_del_init(&phy->port_phy_el); + phy->port = NULL; + port->num_phys--; + port->phy_mask &= ~(1U << phy->id); + + if (port->num_phys == 0) { + INIT_LIST_HEAD(&port->phy_list); + memset(port->sas_addr, 0, SAS_ADDR_SIZE); + memset(port->attached_sas_addr, 0, SAS_ADDR_SIZE); + port->class = 0; + port->iproto = 0; + port->tproto = 0; + port->oob_mode = 0; + port->phy_mask = 0; + } + spin_unlock(&port->phy_list_lock); + spin_unlock(&sas_ha->phy_port_lock); + + return; +} + +/* ---------- SAS port events ---------- */ + +void sas_porte_bytes_dmaed(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PORTE_BYTES_DMAED, &phy->ha->event_lock, + &phy->port_events_pending); + + sas_form_port(phy); +} + +void sas_porte_broadcast_rcvd(void *data) +{ + unsigned long flags; + u32 prim; + struct asd_sas_phy *phy = data; + + sas_begin_event(PORTE_BROADCAST_RCVD, &phy->ha->event_lock, + &phy->port_events_pending); + + spin_lock_irqsave(&phy->sas_prim_lock, flags); + prim = phy->sas_prim; + spin_unlock_irqrestore(&phy->sas_prim_lock, flags); + + SAS_DPRINTK("broadcast received: %d\n", prim); + sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN); +} + +void sas_porte_link_reset_err(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock, + &phy->port_events_pending); + + sas_deform_port(phy); +} + +void sas_porte_timer_event(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock, + &phy->port_events_pending); + + sas_deform_port(phy); +} + +void sas_porte_hard_reset(void *data) +{ + struct asd_sas_phy *phy = data; + + sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock, + &phy->port_events_pending); + + sas_deform_port(phy); +} + +/* ---------- SAS port registration ---------- */ + +static void sas_init_port(struct asd_sas_port *port, + struct sas_ha_struct *sas_ha, int i) +{ + port->id = i; + INIT_LIST_HEAD(&port->dev_list); + spin_lock_init(&port->phy_list_lock); + INIT_LIST_HEAD(&port->phy_list); + port->num_phys = 0; + port->phy_mask = 0; + port->ha = sas_ha; + + spin_lock_init(&port->dev_list_lock); +} + +int sas_register_ports(struct sas_ha_struct *sas_ha) +{ + int i; + + /* initialize the ports and discovery */ + for (i = 0; i < sas_ha->num_phys; i++) { + struct asd_sas_port *port = sas_ha->sas_port[i]; + + sas_init_port(port, sas_ha, i); + sas_init_disc(&port->disc, port); + } + return 0; +} + +void sas_unregister_ports(struct sas_ha_struct *sas_ha) +{ + int i; + + for (i = 0; i < sas_ha->num_phys; i++) + if (sas_ha->sas_phy[i]->port) + sas_deform_port(sas_ha->sas_phy[i]); + +} diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c new file mode 100644 index 000000000000..43e0e4e36934 --- /dev/null +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -0,0 +1,786 @@ +/* + * Serial Attached SCSI (SAS) class SCSI Host glue. + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include "sas_internal.h" + +#include +#include +#include +#include +#include +#include +#include "../scsi_sas_internal.h" + +#include +#include +#include + +/* ---------- SCSI Host glue ---------- */ + +#define TO_SAS_TASK(_scsi_cmd) ((void *)(_scsi_cmd)->host_scribble) +#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0) + +static void sas_scsi_task_done(struct sas_task *task) +{ + struct task_status_struct *ts = &task->task_status; + struct scsi_cmnd *sc = task->uldd_task; + unsigned ts_flags = task->task_state_flags; + int hs = 0, stat = 0; + + if (unlikely(!sc)) { + SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n"); + list_del_init(&task->list); + sas_free_task(task); + return; + } + + if (ts->resp == SAS_TASK_UNDELIVERED) { + /* transport error */ + hs = DID_NO_CONNECT; + } else { /* ts->resp == SAS_TASK_COMPLETE */ + /* task delivered, what happened afterwards? */ + switch (ts->stat) { + case SAS_DEV_NO_RESPONSE: + case SAS_INTERRUPTED: + case SAS_PHY_DOWN: + case SAS_NAK_R_ERR: + case SAS_OPEN_TO: + hs = DID_NO_CONNECT; + break; + case SAS_DATA_UNDERRUN: + sc->resid = ts->residual; + if (sc->request_bufflen - sc->resid < sc->underflow) + hs = DID_ERROR; + break; + case SAS_DATA_OVERRUN: + hs = DID_ERROR; + break; + case SAS_QUEUE_FULL: + hs = DID_SOFT_ERROR; /* retry */ + break; + case SAS_DEVICE_UNKNOWN: + hs = DID_BAD_TARGET; + break; + case SAS_SG_ERR: + hs = DID_PARITY; + break; + case SAS_OPEN_REJECT: + if (ts->open_rej_reason == SAS_OREJ_RSVD_RETRY) + hs = DID_SOFT_ERROR; /* retry */ + else + hs = DID_ERROR; + break; + case SAS_PROTO_RESPONSE: + SAS_DPRINTK("LLDD:%s sent SAS_PROTO_RESP for an SSP " + "task; please report this\n", + task->dev->port->ha->sas_ha_name); + break; + case SAS_ABORTED_TASK: + hs = DID_ABORT; + break; + case SAM_CHECK_COND: + memcpy(sc->sense_buffer, ts->buf, + max(SCSI_SENSE_BUFFERSIZE, ts->buf_valid_size)); + stat = SAM_CHECK_COND; + break; + default: + stat = ts->stat; + break; + } + } + ASSIGN_SAS_TASK(sc, NULL); + sc->result = (hs << 16) | stat; + list_del_init(&task->list); + sas_free_task(task); + /* This is very ugly but this is how SCSI Core works. */ + if (ts_flags & SAS_TASK_STATE_ABORTED) + scsi_finish_command(sc); + else + sc->scsi_done(sc); +} + +static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd) +{ + enum task_attribute ta = TASK_ATTR_SIMPLE; + if (cmd->request && blk_rq_tagged(cmd->request)) { + if (cmd->device->ordered_tags && + (cmd->request->flags & REQ_HARDBARRIER)) + ta = TASK_ATTR_HOQ; + } + return ta; +} + +static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, + struct domain_device *dev, + unsigned long gfp_flags) +{ + struct sas_task *task = sas_alloc_task(gfp_flags); + struct scsi_lun lun; + + if (!task) + return NULL; + + *(u32 *)cmd->sense_buffer = 0; + task->uldd_task = cmd; + ASSIGN_SAS_TASK(cmd, task); + + task->dev = dev; + task->task_proto = task->dev->tproto; /* BUG_ON(!SSP) */ + + task->ssp_task.retry_count = 1; + int_to_scsilun(cmd->device->lun, &lun); + memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); + task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); + memcpy(task->ssp_task.cdb, cmd->cmnd, 16); + + task->scatter = cmd->request_buffer; + task->num_scatter = cmd->use_sg; + task->total_xfer_len = cmd->request_bufflen; + task->data_dir = cmd->sc_data_direction; + + task->task_done = sas_scsi_task_done; + + return task; +} + +static int sas_queue_up(struct sas_task *task) +{ + struct sas_ha_struct *sas_ha = task->dev->port->ha; + struct scsi_core *core = &sas_ha->core; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&core->task_queue_lock, flags); + if (sas_ha->lldd_queue_size < core->task_queue_size + 1) { + spin_unlock_irqrestore(&core->task_queue_lock, flags); + return -SAS_QUEUE_FULL; + } + list_add_tail(&task->list, &core->task_queue); + core->task_queue_size += 1; + spin_unlock_irqrestore(&core->task_queue_lock, flags); + up(&core->queue_thread_sema); + + return 0; +} + +/** + * sas_queuecommand -- Enqueue a command for processing + * @parameters: See SCSI Core documentation + * + * Note: XXX: Remove the host unlock/lock pair when SCSI Core can + * call us without holding an IRQ spinlock... + */ +int sas_queuecommand(struct scsi_cmnd *cmd, + void (*scsi_done)(struct scsi_cmnd *)) +{ + int res = 0; + struct domain_device *dev = cmd_to_domain_dev(cmd); + struct Scsi_Host *host = cmd->device->host; + struct sas_internal *i = to_sas_internal(host->transportt); + + spin_unlock_irq(host->host_lock); + + { + struct sas_ha_struct *sas_ha = dev->port->ha; + struct sas_task *task; + + res = -ENOMEM; + task = sas_create_task(cmd, dev, GFP_ATOMIC); + if (!task) + goto out; + + cmd->scsi_done = scsi_done; + /* Queue up, Direct Mode or Task Collector Mode. */ + if (sas_ha->lldd_max_execute_num < 2) + res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC); + else + res = sas_queue_up(task); + + /* Examine */ + if (res) { + SAS_DPRINTK("lldd_execute_task returned: %d\n", res); + ASSIGN_SAS_TASK(cmd, NULL); + sas_free_task(task); + if (res == -SAS_QUEUE_FULL) { + cmd->result = DID_SOFT_ERROR << 16; /* retry */ + res = 0; + scsi_done(cmd); + } + goto out; + } + } +out: + spin_lock_irq(host->host_lock); + return res; +} + +static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd) +{ + struct scsi_cmnd *cmd, *n; + + list_for_each_entry_safe(cmd, n, error_q, eh_entry) { + if (cmd == my_cmd) + list_del_init(&cmd->eh_entry); + } +} + +static void sas_scsi_clear_queue_I_T(struct list_head *error_q, + struct domain_device *dev) +{ + struct scsi_cmnd *cmd, *n; + + list_for_each_entry_safe(cmd, n, error_q, eh_entry) { + struct domain_device *x = cmd_to_domain_dev(cmd); + + if (x == dev) + list_del_init(&cmd->eh_entry); + } +} + +static void sas_scsi_clear_queue_port(struct list_head *error_q, + struct asd_sas_port *port) +{ + struct scsi_cmnd *cmd, *n; + + list_for_each_entry_safe(cmd, n, error_q, eh_entry) { + struct domain_device *dev = cmd_to_domain_dev(cmd); + struct asd_sas_port *x = dev->port; + + if (x == port) + list_del_init(&cmd->eh_entry); + } +} + +enum task_disposition { + TASK_IS_DONE, + TASK_IS_ABORTED, + TASK_IS_AT_LU, + TASK_IS_NOT_AT_LU, +}; + +static enum task_disposition sas_scsi_find_task(struct sas_task *task) +{ + struct sas_ha_struct *ha = task->dev->port->ha; + unsigned long flags; + int i, res; + struct sas_internal *si = + to_sas_internal(task->dev->port->ha->core.shost->transportt); + + if (ha->lldd_max_execute_num > 1) { + struct scsi_core *core = &ha->core; + struct sas_task *t, *n; + + spin_lock_irqsave(&core->task_queue_lock, flags); + list_for_each_entry_safe(t, n, &core->task_queue, list) { + if (task == t) { + list_del_init(&t->list); + spin_unlock_irqrestore(&core->task_queue_lock, + flags); + SAS_DPRINTK("%s: task 0x%p aborted from " + "task_queue\n", + __FUNCTION__, task); + return TASK_IS_ABORTED; + } + } + spin_unlock_irqrestore(&core->task_queue_lock, flags); + } + + for (i = 0; i < 5; i++) { + SAS_DPRINTK("%s: aborting task 0x%p\n", __FUNCTION__, task); + res = si->dft->lldd_abort_task(task); + + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__, + task); + return TASK_IS_DONE; + } + spin_unlock_irqrestore(&task->task_state_lock, flags); + + if (res == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("%s: task 0x%p is aborted\n", + __FUNCTION__, task); + return TASK_IS_ABORTED; + } else if (si->dft->lldd_query_task) { + SAS_DPRINTK("%s: querying task 0x%p\n", + __FUNCTION__, task); + res = si->dft->lldd_query_task(task); + if (res == TMF_RESP_FUNC_SUCC) { + SAS_DPRINTK("%s: task 0x%p at LU\n", + __FUNCTION__, task); + return TASK_IS_AT_LU; + } else if (res == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("%s: task 0x%p not at LU\n", + __FUNCTION__, task); + return TASK_IS_NOT_AT_LU; + } + } + } + return res; +} + +static int sas_recover_lu(struct domain_device *dev, struct scsi_cmnd *cmd) +{ + int res = TMF_RESP_FUNC_FAILED; + struct scsi_lun lun; + struct sas_internal *i = + to_sas_internal(dev->port->ha->core.shost->transportt); + + int_to_scsilun(cmd->device->lun, &lun); + + SAS_DPRINTK("eh: device %llx LUN %x has the task\n", + SAS_ADDR(dev->sas_addr), + cmd->device->lun); + + if (i->dft->lldd_abort_task_set) + res = i->dft->lldd_abort_task_set(dev, lun.scsi_lun); + + if (res == TMF_RESP_FUNC_FAILED) { + if (i->dft->lldd_clear_task_set) + res = i->dft->lldd_clear_task_set(dev, lun.scsi_lun); + } + + if (res == TMF_RESP_FUNC_FAILED) { + if (i->dft->lldd_lu_reset) + res = i->dft->lldd_lu_reset(dev, lun.scsi_lun); + } + + return res; +} + +static int sas_recover_I_T(struct domain_device *dev) +{ + int res = TMF_RESP_FUNC_FAILED; + struct sas_internal *i = + to_sas_internal(dev->port->ha->core.shost->transportt); + + SAS_DPRINTK("I_T nexus reset for dev %016llx\n", + SAS_ADDR(dev->sas_addr)); + + if (i->dft->lldd_I_T_nexus_reset) + res = i->dft->lldd_I_T_nexus_reset(dev); + + return res; +} + +void sas_scsi_recover_host(struct Scsi_Host *shost) +{ + struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); + unsigned long flags; + LIST_HEAD(error_q); + struct scsi_cmnd *cmd, *n; + enum task_disposition res = TASK_IS_DONE; + int tmf_resp; + struct sas_internal *i = to_sas_internal(shost->transportt); + + spin_lock_irqsave(shost->host_lock, flags); + list_splice_init(&shost->eh_cmd_q, &error_q); + spin_unlock_irqrestore(shost->host_lock, flags); + + SAS_DPRINTK("Enter %s\n", __FUNCTION__); + + /* All tasks on this list were marked SAS_TASK_STATE_ABORTED + * by sas_scsi_timed_out() callback. + */ +Again: + SAS_DPRINTK("going over list...\n"); + list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { + struct sas_task *task = TO_SAS_TASK(cmd); + + SAS_DPRINTK("trying to find task 0x%p\n", task); + list_del_init(&cmd->eh_entry); + res = sas_scsi_find_task(task); + + cmd->eh_eflags = 0; + shost->host_failed--; + + switch (res) { + case TASK_IS_DONE: + SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__, + task); + task->task_done(task); + continue; + case TASK_IS_ABORTED: + SAS_DPRINTK("%s: task 0x%p is aborted\n", + __FUNCTION__, task); + task->task_done(task); + continue; + case TASK_IS_AT_LU: + SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); + tmf_resp = sas_recover_lu(task->dev, cmd); + if (tmf_resp == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("dev %016llx LU %x is " + "recovered\n", + SAS_ADDR(task->dev), + cmd->device->lun); + task->task_done(task); + sas_scsi_clear_queue_lu(&error_q, cmd); + goto Again; + } + /* fallthrough */ + case TASK_IS_NOT_AT_LU: + SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n", + task); + tmf_resp = sas_recover_I_T(task->dev); + if (tmf_resp == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("I_T %016llx recovered\n", + SAS_ADDR(task->dev->sas_addr)); + task->task_done(task); + sas_scsi_clear_queue_I_T(&error_q, task->dev); + goto Again; + } + /* Hammer time :-) */ + if (i->dft->lldd_clear_nexus_port) { + struct asd_sas_port *port = task->dev->port; + SAS_DPRINTK("clearing nexus for port:%d\n", + port->id); + res = i->dft->lldd_clear_nexus_port(port); + if (res == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("clear nexus port:%d " + "succeeded\n", port->id); + task->task_done(task); + sas_scsi_clear_queue_port(&error_q, + port); + goto Again; + } + } + if (i->dft->lldd_clear_nexus_ha) { + SAS_DPRINTK("clear nexus ha\n"); + res = i->dft->lldd_clear_nexus_ha(ha); + if (res == TMF_RESP_FUNC_COMPLETE) { + SAS_DPRINTK("clear nexus ha " + "succeeded\n"); + task->task_done(task); + goto out; + } + } + /* If we are here -- this means that no amount + * of effort could recover from errors. Quite + * possibly the HA just disappeared. + */ + SAS_DPRINTK("error from device %llx, LUN %x " + "couldn't be recovered in any way\n", + SAS_ADDR(task->dev->sas_addr), + cmd->device->lun); + + task->task_done(task); + goto clear_q; + } + } +out: + SAS_DPRINTK("--- Exit %s\n", __FUNCTION__); + return; +clear_q: + SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__); + list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { + struct sas_task *task = TO_SAS_TASK(cmd); + list_del_init(&cmd->eh_entry); + task->task_done(task); + } +} + +enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) +{ + struct sas_task *task = TO_SAS_TASK(cmd); + unsigned long flags; + + if (!task) { + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", + cmd, task); + return EH_HANDLED; + } + + spin_lock_irqsave(&task->task_state_lock, flags); + if (task->task_state_flags & SAS_TASK_STATE_DONE) { + spin_unlock_irqrestore(&task->task_state_lock, flags); + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", + cmd, task); + return EH_HANDLED; + } + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + spin_unlock_irqrestore(&task->task_state_lock, flags); + + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", + cmd, task); + + return EH_NOT_HANDLED; +} + +struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy) +{ + struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent); + struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); + struct domain_device *found_dev = NULL; + int i; + + spin_lock(&ha->phy_port_lock); + for (i = 0; i < ha->num_phys; i++) { + struct asd_sas_port *port = ha->sas_port[i]; + struct domain_device *dev; + + spin_lock(&port->dev_list_lock); + list_for_each_entry(dev, &port->dev_list, dev_list_node) { + if (rphy == dev->rphy) { + found_dev = dev; + spin_unlock(&port->dev_list_lock); + goto found; + } + } + spin_unlock(&port->dev_list_lock); + } + found: + spin_unlock(&ha->phy_port_lock); + + return found_dev; +} + +static inline struct domain_device *sas_find_target(struct scsi_target *starget) +{ + struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent); + + return sas_find_dev_by_rphy(rphy); +} + +int sas_target_alloc(struct scsi_target *starget) +{ + struct domain_device *found_dev = sas_find_target(starget); + + if (!found_dev) + return -ENODEV; + + starget->hostdata = found_dev; + return 0; +} + +#define SAS_DEF_QD 32 +#define SAS_MAX_QD 64 + +int sas_slave_configure(struct scsi_device *scsi_dev) +{ + struct domain_device *dev = sdev_to_domain_dev(scsi_dev); + struct sas_ha_struct *sas_ha; + + BUG_ON(dev->rphy->identify.device_type != SAS_END_DEVICE); + + sas_ha = dev->port->ha; + + sas_read_port_mode_page(scsi_dev); + + if (scsi_dev->tagged_supported) { + scsi_set_tag_type(scsi_dev, MSG_SIMPLE_TAG); + scsi_activate_tcq(scsi_dev, SAS_DEF_QD); + } else { + SAS_DPRINTK("device %llx, LUN %x doesn't support " + "TCQ\n", SAS_ADDR(dev->sas_addr), + scsi_dev->lun); + scsi_dev->tagged_supported = 0; + scsi_set_tag_type(scsi_dev, 0); + scsi_deactivate_tcq(scsi_dev, 1); + } + + return 0; +} + +void sas_slave_destroy(struct scsi_device *scsi_dev) +{ +} + +int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth) +{ + int res = min(new_depth, SAS_MAX_QD); + + if (scsi_dev->tagged_supported) + scsi_adjust_queue_depth(scsi_dev, scsi_get_tag_type(scsi_dev), + res); + else { + struct domain_device *dev = sdev_to_domain_dev(scsi_dev); + sas_printk("device %llx LUN %x queue depth changed to 1\n", + SAS_ADDR(dev->sas_addr), + scsi_dev->lun); + scsi_adjust_queue_depth(scsi_dev, 0, 1); + res = 1; + } + + return res; +} + +int sas_change_queue_type(struct scsi_device *scsi_dev, int qt) +{ + if (!scsi_dev->tagged_supported) + return 0; + + scsi_deactivate_tcq(scsi_dev, 1); + + scsi_set_tag_type(scsi_dev, qt); + scsi_activate_tcq(scsi_dev, scsi_dev->queue_depth); + + return qt; +} + +int sas_bios_param(struct scsi_device *scsi_dev, + struct block_device *bdev, + sector_t capacity, int *hsc) +{ + hsc[0] = 255; + hsc[1] = 63; + sector_div(capacity, 255*63); + hsc[2] = capacity; + + return 0; +} + +/* ---------- Task Collector Thread implementation ---------- */ + +static void sas_queue(struct sas_ha_struct *sas_ha) +{ + struct scsi_core *core = &sas_ha->core; + unsigned long flags; + LIST_HEAD(q); + int can_queue; + int res; + struct sas_internal *i = to_sas_internal(core->shost->transportt); + + spin_lock_irqsave(&core->task_queue_lock, flags); + while (!core->queue_thread_kill && + !list_empty(&core->task_queue)) { + + can_queue = sas_ha->lldd_queue_size - core->task_queue_size; + if (can_queue >= 0) { + can_queue = core->task_queue_size; + list_splice_init(&core->task_queue, &q); + } else { + struct list_head *a, *n; + + can_queue = sas_ha->lldd_queue_size; + list_for_each_safe(a, n, &core->task_queue) { + list_move_tail(a, &q); + if (--can_queue == 0) + break; + } + can_queue = sas_ha->lldd_queue_size; + } + core->task_queue_size -= can_queue; + spin_unlock_irqrestore(&core->task_queue_lock, flags); + { + struct sas_task *task = list_entry(q.next, + struct sas_task, + list); + list_del_init(&q); + res = i->dft->lldd_execute_task(task, can_queue, + GFP_KERNEL); + if (unlikely(res)) + __list_add(&q, task->list.prev, &task->list); + } + spin_lock_irqsave(&core->task_queue_lock, flags); + if (res) { + list_splice_init(&q, &core->task_queue); /*at head*/ + core->task_queue_size += can_queue; + } + } + spin_unlock_irqrestore(&core->task_queue_lock, flags); +} + +static DECLARE_COMPLETION(queue_th_comp); + +/** + * sas_queue_thread -- The Task Collector thread + * @_sas_ha: pointer to struct sas_ha + */ +static int sas_queue_thread(void *_sas_ha) +{ + struct sas_ha_struct *sas_ha = _sas_ha; + struct scsi_core *core = &sas_ha->core; + + daemonize("sas_queue_%d", core->shost->host_no); + current->flags |= PF_NOFREEZE; + + complete(&queue_th_comp); + + while (1) { + down_interruptible(&core->queue_thread_sema); + sas_queue(sas_ha); + if (core->queue_thread_kill) + break; + } + + complete(&queue_th_comp); + + return 0; +} + +int sas_init_queue(struct sas_ha_struct *sas_ha) +{ + int res; + struct scsi_core *core = &sas_ha->core; + + spin_lock_init(&core->task_queue_lock); + core->task_queue_size = 0; + INIT_LIST_HEAD(&core->task_queue); + init_MUTEX_LOCKED(&core->queue_thread_sema); + + res = kernel_thread(sas_queue_thread, sas_ha, 0); + if (res >= 0) + wait_for_completion(&queue_th_comp); + + return res < 0 ? res : 0; +} + +void sas_shutdown_queue(struct sas_ha_struct *sas_ha) +{ + unsigned long flags; + struct scsi_core *core = &sas_ha->core; + struct sas_task *task, *n; + + init_completion(&queue_th_comp); + core->queue_thread_kill = 1; + up(&core->queue_thread_sema); + wait_for_completion(&queue_th_comp); + + if (!list_empty(&core->task_queue)) + SAS_DPRINTK("HA: %llx: scsi core task queue is NOT empty!?\n", + SAS_ADDR(sas_ha->sas_addr)); + + spin_lock_irqsave(&core->task_queue_lock, flags); + list_for_each_entry_safe(task, n, &core->task_queue, list) { + struct scsi_cmnd *cmd = task->uldd_task; + + list_del_init(&task->list); + + ASSIGN_SAS_TASK(cmd, NULL); + sas_free_task(task); + cmd->result = DID_ABORT << 16; + cmd->scsi_done(cmd); + } + spin_unlock_irqrestore(&core->task_queue_lock, flags); +} + +EXPORT_SYMBOL_GPL(sas_queuecommand); +EXPORT_SYMBOL_GPL(sas_target_alloc); +EXPORT_SYMBOL_GPL(sas_slave_configure); +EXPORT_SYMBOL_GPL(sas_slave_destroy); +EXPORT_SYMBOL_GPL(sas_change_queue_depth); +EXPORT_SYMBOL_GPL(sas_change_queue_type); +EXPORT_SYMBOL_GPL(sas_bios_param); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h new file mode 100644 index 000000000000..72acdabe7f80 --- /dev/null +++ b/include/scsi/libsas.h @@ -0,0 +1,627 @@ +/* + * SAS host prototypes and structures header file + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#ifndef _LIBSAS_H_ +#define _LIBSAS_H_ + + +#include +#include +#include +#include +#include +#include +#include +#include + +struct block_device; + +enum sas_class { + SAS, + EXPANDER +}; + +enum sas_phy_role { + PHY_ROLE_NONE = 0, + PHY_ROLE_TARGET = 0x40, + PHY_ROLE_INITIATOR = 0x80, +}; + +enum sas_phy_type { + PHY_TYPE_PHYSICAL, + PHY_TYPE_VIRTUAL +}; + +/* The events are mnemonically described in sas_dump.c + * so when updating/adding events here, please also + * update the other file too. + */ +enum ha_event { + HAE_RESET = 0U, + HA_NUM_EVENTS = 1, +}; + +enum port_event { + PORTE_BYTES_DMAED = 0U, + PORTE_BROADCAST_RCVD = 1, + PORTE_LINK_RESET_ERR = 2, + PORTE_TIMER_EVENT = 3, + PORTE_HARD_RESET = 4, + PORT_NUM_EVENTS = 5, +}; + +enum phy_event { + PHYE_LOSS_OF_SIGNAL = 0U, + PHYE_OOB_DONE = 1, + PHYE_OOB_ERROR = 2, + PHYE_SPINUP_HOLD = 3, /* hot plug SATA, no COMWAKE sent */ + PHY_NUM_EVENTS = 4, +}; + +enum discover_event { + DISCE_DISCOVER_DOMAIN = 0U, + DISCE_REVALIDATE_DOMAIN = 1, + DISCE_PORT_GONE = 2, + DISC_NUM_EVENTS = 3, +}; + +/* ---------- Expander Devices ---------- */ + +#define ETASK 0xFA + +#define to_dom_device(_obj) container_of(_obj, struct domain_device, dev_obj) +#define to_dev_attr(_attr) container_of(_attr, struct domain_dev_attribute,\ + attr) + +enum routing_attribute { + DIRECT_ROUTING, + SUBTRACTIVE_ROUTING, + TABLE_ROUTING, +}; + +enum ex_phy_state { + PHY_EMPTY, + PHY_VACANT, + PHY_NOT_PRESENT, + PHY_DEVICE_DISCOVERED +}; + +struct ex_phy { + int phy_id; + + enum ex_phy_state phy_state; + + enum sas_dev_type attached_dev_type; + enum sas_phy_linkrate linkrate; + + u8 attached_sata_host:1; + u8 attached_sata_dev:1; + u8 attached_sata_ps:1; + + enum sas_proto attached_tproto; + enum sas_proto attached_iproto; + + u8 attached_sas_addr[SAS_ADDR_SIZE]; + u8 attached_phy_id; + + u8 phy_change_count; + enum routing_attribute routing_attr; + u8 virtual:1; + + int last_da_index; + + struct sas_phy *phy; + struct sas_port *port; +}; + +struct expander_device { + struct list_head children; + + u16 ex_change_count; + u16 max_route_indexes; + u8 num_phys; + u8 configuring:1; + u8 conf_route_table:1; + u8 enclosure_logical_id[8]; + + struct ex_phy *ex_phy; + struct sas_port *parent_port; +}; + +/* ---------- SATA device ---------- */ +enum ata_command_set { + ATA_COMMAND_SET = 0, + ATAPI_COMMAND_SET = 1, +}; + +struct sata_device { + enum ata_command_set command_set; + struct smp_resp rps_resp; /* report_phy_sata_resp */ + __le16 *identify_device; + __le16 *identify_packet_device; + + u8 port_no; /* port number, if this is a PM (Port) */ + struct list_head children; /* PM Ports if this is a PM */ +}; + +/* ---------- Domain device ---------- */ +struct domain_device { + enum sas_dev_type dev_type; + + enum sas_phy_linkrate linkrate; + enum sas_phy_linkrate min_linkrate; + enum sas_phy_linkrate max_linkrate; + + int pathways; + + struct domain_device *parent; + struct list_head siblings; /* devices on the same level */ + struct asd_sas_port *port; /* shortcut to root of the tree */ + + struct list_head dev_list_node; + + enum sas_proto iproto; + enum sas_proto tproto; + + struct sas_rphy *rphy; + + u8 sas_addr[SAS_ADDR_SIZE]; + u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE]; + + u8 frame_rcvd[32]; + + union { + struct expander_device ex_dev; + struct sata_device sata_dev; /* STP & directly attached */ + }; + + void *lldd_dev; +}; + +struct sas_discovery { + spinlock_t disc_event_lock; + struct work_struct disc_work[DISC_NUM_EVENTS]; + unsigned long pending; + u8 fanout_sas_addr[8]; + u8 eeds_a[8]; + u8 eeds_b[8]; + int max_level; +}; + + +/* The port struct is Class:RW, driver:RO */ +struct asd_sas_port { +/* private: */ + struct completion port_gone_completion; + + struct sas_discovery disc; + struct domain_device *port_dev; + spinlock_t dev_list_lock; + struct list_head dev_list; + enum sas_phy_linkrate linkrate; + + struct sas_phy *phy; + struct work_struct work; + +/* public: */ + int id; + + enum sas_class class; + u8 sas_addr[SAS_ADDR_SIZE]; + u8 attached_sas_addr[SAS_ADDR_SIZE]; + enum sas_proto iproto; + enum sas_proto tproto; + + enum sas_oob_mode oob_mode; + + spinlock_t phy_list_lock; + struct list_head phy_list; + int num_phys; + u32 phy_mask; + + struct sas_ha_struct *ha; + + struct sas_port *port; + + void *lldd_port; /* not touched by the sas class code */ +}; + +/* The phy pretty much is controlled by the LLDD. + * The class only reads those fields. + */ +struct asd_sas_phy { +/* private: */ + /* protected by ha->event_lock */ + struct work_struct port_events[PORT_NUM_EVENTS]; + struct work_struct phy_events[PHY_NUM_EVENTS]; + + unsigned long port_events_pending; + unsigned long phy_events_pending; + + int error; + + struct sas_phy *phy; + +/* public: */ + /* The following are class:RO, driver:R/W */ + int enabled; /* must be set */ + + int id; /* must be set */ + enum sas_class class; + enum sas_proto iproto; + enum sas_proto tproto; + + enum sas_phy_type type; + enum sas_phy_role role; + enum sas_oob_mode oob_mode; + enum sas_phy_linkrate linkrate; + + u8 *sas_addr; /* must be set */ + u8 attached_sas_addr[SAS_ADDR_SIZE]; /* class:RO, driver: R/W */ + + spinlock_t frame_rcvd_lock; + u8 *frame_rcvd; /* must be set */ + int frame_rcvd_size; + + spinlock_t sas_prim_lock; + u32 sas_prim; + + struct list_head port_phy_el; /* driver:RO */ + struct asd_sas_port *port; /* Class:RW, driver: RO */ + + struct sas_ha_struct *ha; /* may be set; the class sets it anyway */ + + void *lldd_phy; /* not touched by the sas_class_code */ +}; + +struct scsi_core { + struct Scsi_Host *shost; + + spinlock_t task_queue_lock; + struct list_head task_queue; + int task_queue_size; + + struct semaphore queue_thread_sema; + int queue_thread_kill; +}; + +struct sas_ha_struct { +/* private: */ + spinlock_t event_lock; + struct work_struct ha_events[HA_NUM_EVENTS]; + unsigned long pending; + + struct scsi_core core; + +/* public: */ + char *sas_ha_name; + struct pci_dev *pcidev; /* should be set */ + struct module *lldd_module; /* should be set */ + + u8 *sas_addr; /* must be set */ + u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE]; + + spinlock_t phy_port_lock; + struct asd_sas_phy **sas_phy; /* array of valid pointers, must be set */ + struct asd_sas_port **sas_port; /* array of valid pointers, must be set */ + int num_phys; /* must be set, gt 0, static */ + + /* The class calls this to send a task for execution. */ + int lldd_max_execute_num; + int lldd_queue_size; + + /* LLDD calls these to notify the class of an event. */ + void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event); + void (*notify_port_event)(struct asd_sas_phy *, enum port_event); + void (*notify_phy_event)(struct asd_sas_phy *, enum phy_event); + + void *lldd_ha; /* not touched by sas class code */ +}; + +#define SHOST_TO_SAS_HA(_shost) (*(struct sas_ha_struct **)(_shost)->hostdata) + +static inline struct domain_device * +starget_to_domain_dev(struct scsi_target *starget) { + return starget->hostdata; +} + +static inline struct domain_device * +sdev_to_domain_dev(struct scsi_device *sdev) { + return starget_to_domain_dev(sdev->sdev_target); +} + +static inline struct domain_device * +cmd_to_domain_dev(struct scsi_cmnd *cmd) +{ + return sdev_to_domain_dev(cmd->device); +} + +void sas_hash_addr(u8 *hashed, const u8 *sas_addr); + +/* Before calling a notify event, LLDD should use this function + * when the link is severed (possibly from its tasklet). + * The idea is that the Class only reads those, while the LLDD, + * can R/W these (thus avoiding a race). + */ +static inline void sas_phy_disconnected(struct asd_sas_phy *phy) +{ + phy->oob_mode = OOB_NOT_CONNECTED; + phy->linkrate = PHY_LINKRATE_NONE; +} + +/* ---------- Tasks ---------- */ +/* + service_response | SAS_TASK_COMPLETE | SAS_TASK_UNDELIVERED | + exec_status | | | + ---------------------+---------------------+-----------------------+ + SAM_... | X | | + DEV_NO_RESPONSE | X | X | + INTERRUPTED | X | | + QUEUE_FULL | | X | + DEVICE_UNKNOWN | | X | + SG_ERR | | X | + ---------------------+---------------------+-----------------------+ + */ + +enum service_response { + SAS_TASK_COMPLETE, + SAS_TASK_UNDELIVERED = -1, +}; + +enum exec_status { + SAM_GOOD = 0, + SAM_CHECK_COND = 2, + SAM_COND_MET = 4, + SAM_BUSY = 8, + SAM_INTERMEDIATE = 0x10, + SAM_IM_COND_MET = 0x12, + SAM_RESV_CONFLICT= 0x14, + SAM_TASK_SET_FULL= 0x28, + SAM_ACA_ACTIVE = 0x30, + SAM_TASK_ABORTED = 0x40, + + SAS_DEV_NO_RESPONSE = 0x80, + SAS_DATA_UNDERRUN, + SAS_DATA_OVERRUN, + SAS_INTERRUPTED, + SAS_QUEUE_FULL, + SAS_DEVICE_UNKNOWN, + SAS_SG_ERR, + SAS_OPEN_REJECT, + SAS_OPEN_TO, + SAS_PROTO_RESPONSE, + SAS_PHY_DOWN, + SAS_NAK_R_ERR, + SAS_PENDING, + SAS_ABORTED_TASK, +}; + +/* When a task finishes with a response, the LLDD examines the + * response: + * - For an ATA task task_status_struct::stat is set to + * SAS_PROTO_RESPONSE, and the task_status_struct::buf is set to the + * contents of struct ata_task_resp. + * - For SSP tasks, if no data is present or status/TMF response + * is valid, task_status_struct::stat is set. If data is present + * (SENSE data), the LLDD copies up to SAS_STATUS_BUF_SIZE, sets + * task_status_struct::buf_valid_size, and task_status_struct::stat is + * set to SAM_CHECK_COND. + * + * "buf" has format SCSI Sense for SSP task, or struct ata_task_resp + * for ATA task. + * + * "frame_len" is the total frame length, which could be more or less + * than actually copied. + * + * Tasks ending with response, always set the residual field. + */ +struct ata_task_resp { + u16 frame_len; + u8 ending_fis[24]; /* dev to host or data-in */ + u32 sstatus; + u32 serror; + u32 scontrol; + u32 sactive; +}; + +#define SAS_STATUS_BUF_SIZE 96 + +struct task_status_struct { + enum service_response resp; + enum exec_status stat; + int buf_valid_size; + + u8 buf[SAS_STATUS_BUF_SIZE]; + + u32 residual; + enum sas_open_rej_reason open_rej_reason; +}; + +/* ATA and ATAPI task queuable to a SAS LLDD. + */ +struct sas_ata_task { + struct host_to_dev_fis fis; + u8 atapi_packet[16]; /* 0 if not ATAPI task */ + + u8 retry_count; /* hardware retry, should be > 0 */ + + u8 dma_xfer:1; /* PIO:0 or DMA:1 */ + u8 use_ncq:1; + u8 set_affil_pol:1; + u8 stp_affil_pol:1; + + u8 device_control_reg_update:1; +}; + +struct sas_smp_task { + struct scatterlist smp_req; + struct scatterlist smp_resp; +}; + +enum task_attribute { + TASK_ATTR_SIMPLE = 0, + TASK_ATTR_HOQ = 1, + TASK_ATTR_ORDERED= 2, + TASK_ATTR_ACA = 4, +}; + +struct sas_ssp_task { + u8 retry_count; /* hardware retry, should be > 0 */ + + u8 LUN[8]; + u8 enable_first_burst:1; + enum task_attribute task_attr; + u8 task_prio; + u8 cdb[16]; +}; + +struct sas_task { + struct domain_device *dev; + struct list_head list; + + spinlock_t task_state_lock; + unsigned task_state_flags; + + enum sas_proto task_proto; + + /* Used by the discovery code. */ + struct timer_list timer; + struct completion completion; + + union { + struct sas_ata_task ata_task; + struct sas_smp_task smp_task; + struct sas_ssp_task ssp_task; + }; + + struct scatterlist *scatter; + int num_scatter; + u32 total_xfer_len; + u8 data_dir:2; /* Use PCI_DMA_... */ + + struct task_status_struct task_status; + void (*task_done)(struct sas_task *); + + void *lldd_task; /* for use by LLDDs */ + void *uldd_task; +}; + + + +#define SAS_TASK_STATE_PENDING 1 +#define SAS_TASK_STATE_DONE 2 +#define SAS_TASK_STATE_ABORTED 4 + +static inline struct sas_task *sas_alloc_task(unsigned long flags) +{ + extern kmem_cache_t *sas_task_cache; + struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags); + + if (task) { + memset(task, 0, sizeof(*task)); + INIT_LIST_HEAD(&task->list); + spin_lock_init(&task->task_state_lock); + task->task_state_flags = SAS_TASK_STATE_PENDING; + init_timer(&task->timer); + init_completion(&task->completion); + } + + return task; +} + +static inline void sas_free_task(struct sas_task *task) +{ + if (task) { + extern kmem_cache_t *sas_task_cache; + BUG_ON(!list_empty(&task->list)); + kmem_cache_free(sas_task_cache, task); + } +} + +struct sas_domain_function_template { + /* The class calls these to notify the LLDD of an event. */ + void (*lldd_port_formed)(struct asd_sas_phy *); + void (*lldd_port_deformed)(struct asd_sas_phy *); + + /* The class calls these when a device is found or gone. */ + int (*lldd_dev_found)(struct domain_device *); + void (*lldd_dev_gone)(struct domain_device *); + + int (*lldd_execute_task)(struct sas_task *, int num, + unsigned long gfp_flags); + + /* Task Management Functions. Must be called from process context. */ + int (*lldd_abort_task)(struct sas_task *); + int (*lldd_abort_task_set)(struct domain_device *, u8 *lun); + int (*lldd_clear_aca)(struct domain_device *, u8 *lun); + int (*lldd_clear_task_set)(struct domain_device *, u8 *lun); + int (*lldd_I_T_nexus_reset)(struct domain_device *); + int (*lldd_lu_reset)(struct domain_device *, u8 *lun); + int (*lldd_query_task)(struct sas_task *); + + /* Port and Adapter management */ + int (*lldd_clear_nexus_port)(struct asd_sas_port *); + int (*lldd_clear_nexus_ha)(struct sas_ha_struct *); + + /* Phy management */ + int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func); +}; + +extern int sas_register_ha(struct sas_ha_struct *); +extern int sas_unregister_ha(struct sas_ha_struct *); + +extern int sas_queuecommand(struct scsi_cmnd *, + void (*scsi_done)(struct scsi_cmnd *)); +extern int sas_target_alloc(struct scsi_target *); +extern int sas_slave_alloc(struct scsi_device *); +extern int sas_slave_configure(struct scsi_device *); +extern void sas_slave_destroy(struct scsi_device *); +extern int sas_change_queue_depth(struct scsi_device *, int new_depth); +extern int sas_change_queue_type(struct scsi_device *, int qt); +extern int sas_bios_param(struct scsi_device *, + struct block_device *, + sector_t capacity, int *hsc); +extern struct scsi_transport_template * +sas_domain_attach_transport(struct sas_domain_function_template *); +extern void sas_domain_release_transport(struct scsi_transport_template *); + +int sas_discover_root_expander(struct domain_device *); + +void sas_init_ex_attr(void); + +int sas_ex_revalidate_domain(struct domain_device *); + +void sas_unregister_domain_devices(struct asd_sas_port *port); +void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *); +int sas_discover_event(struct asd_sas_port *, enum discover_event ev); + +int sas_discover_sata(struct domain_device *); +int sas_discover_end_dev(struct domain_device *); + +void sas_unregister_dev(struct domain_device *); + +void sas_init_dev(struct domain_device *); + +#endif /* _SASLIB_H_ */ diff --git a/include/scsi/sas.h b/include/scsi/sas.h new file mode 100644 index 000000000000..752853a113dc --- /dev/null +++ b/include/scsi/sas.h @@ -0,0 +1,644 @@ +/* + * SAS structures and definitions header file + * + * Copyright (C) 2005 Adaptec, Inc. All rights reserved. + * Copyright (C) 2005 Luben Tuikov + * + * This file is licensed under GPLv2. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#ifndef _SAS_H_ +#define _SAS_H_ + +#include +#include + +#define SAS_ADDR_SIZE 8 +#define HASHED_SAS_ADDR_SIZE 3 +#define SAS_ADDR(_sa) ((unsigned long long) be64_to_cpu(*(__be64 *)(_sa))) + +#define SMP_REQUEST 0x40 +#define SMP_RESPONSE 0x41 + +#define SSP_DATA 0x01 +#define SSP_XFER_RDY 0x05 +#define SSP_COMMAND 0x06 +#define SSP_RESPONSE 0x07 +#define SSP_TASK 0x16 + +#define SMP_REPORT_GENERAL 0x00 +#define SMP_REPORT_MANUF_INFO 0x01 +#define SMP_READ_GPIO_REG 0x02 +#define SMP_DISCOVER 0x10 +#define SMP_REPORT_PHY_ERR_LOG 0x11 +#define SMP_REPORT_PHY_SATA 0x12 +#define SMP_REPORT_ROUTE_INFO 0x13 +#define SMP_WRITE_GPIO_REG 0x82 +#define SMP_CONF_ROUTE_INFO 0x90 +#define SMP_PHY_CONTROL 0x91 +#define SMP_PHY_TEST_FUNCTION 0x92 + +#define SMP_RESP_FUNC_ACC 0x00 +#define SMP_RESP_FUNC_UNK 0x01 +#define SMP_RESP_FUNC_FAILED 0x02 +#define SMP_RESP_INV_FRM_LEN 0x03 +#define SMP_RESP_NO_PHY 0x10 +#define SMP_RESP_NO_INDEX 0x11 +#define SMP_RESP_PHY_NO_SATA 0x12 +#define SMP_RESP_PHY_UNK_OP 0x13 +#define SMP_RESP_PHY_UNK_TESTF 0x14 +#define SMP_RESP_PHY_TEST_INPROG 0x15 +#define SMP_RESP_PHY_VACANT 0x16 + +/* SAM TMFs */ +#define TMF_ABORT_TASK 0x01 +#define TMF_ABORT_TASK_SET 0x02 +#define TMF_CLEAR_TASK_SET 0x04 +#define TMF_LU_RESET 0x08 +#define TMF_CLEAR_ACA 0x40 +#define TMF_QUERY_TASK 0x80 + +/* SAS TMF responses */ +#define TMF_RESP_FUNC_COMPLETE 0x00 +#define TMF_RESP_INVALID_FRAME 0x02 +#define TMF_RESP_FUNC_ESUPP 0x04 +#define TMF_RESP_FUNC_FAILED 0x05 +#define TMF_RESP_FUNC_SUCC 0x08 +#define TMF_RESP_NO_LUN 0x09 +#define TMF_RESP_OVERLAPPED_TAG 0x0A + +enum sas_oob_mode { + OOB_NOT_CONNECTED, + SATA_OOB_MODE, + SAS_OOB_MODE +}; + +/* See sas_discover.c if you plan on changing these. + */ +enum sas_dev_type { + NO_DEVICE = 0, /* protocol */ + SAS_END_DEV = 1, /* protocol */ + EDGE_DEV = 2, /* protocol */ + FANOUT_DEV = 3, /* protocol */ + SAS_HA = 4, + SATA_DEV = 5, + SATA_PM = 7, + SATA_PM_PORT= 8, +}; + +enum sas_phy_linkrate { + PHY_LINKRATE_NONE = 0, + PHY_LINKRATE_UNKNOWN = 0, + PHY_DISABLED, + PHY_RESET_PROBLEM, + PHY_SPINUP_HOLD, + PHY_PORT_SELECTOR, + PHY_LINKRATE_1_5 = 0x08, + PHY_LINKRATE_G1 = PHY_LINKRATE_1_5, + PHY_LINKRATE_3 = 0x09, + PHY_LINKRATE_G2 = PHY_LINKRATE_3, + PHY_LINKRATE_6 = 0x0A, +}; + +/* Partly from IDENTIFY address frame. */ +enum sas_proto { + SATA_PROTO = 1, + SAS_PROTO_SMP = 2, /* protocol */ + SAS_PROTO_STP = 4, /* protocol */ + SAS_PROTO_SSP = 8, /* protocol */ + SAS_PROTO_ALL = 0xE, +}; + +/* From the spec; local phys only */ +enum phy_func { + PHY_FUNC_NOP, + PHY_FUNC_LINK_RESET, /* Enables the phy */ + PHY_FUNC_HARD_RESET, + PHY_FUNC_DISABLE, + PHY_FUNC_CLEAR_ERROR_LOG = 5, + PHY_FUNC_CLEAR_AFFIL, + PHY_FUNC_TX_SATA_PS_SIGNAL, + PHY_FUNC_RELEASE_SPINUP_HOLD = 0x10, /* LOCAL PORT ONLY! */ +}; + +/* SAS LLDD would need to report only _very_few_ of those, like BROADCAST. + * Most of those are here for completeness. + */ +enum sas_prim { + SAS_PRIM_AIP_NORMAL = 1, + SAS_PRIM_AIP_R0 = 2, + SAS_PRIM_AIP_R1 = 3, + SAS_PRIM_AIP_R2 = 4, + SAS_PRIM_AIP_WC = 5, + SAS_PRIM_AIP_WD = 6, + SAS_PRIM_AIP_WP = 7, + SAS_PRIM_AIP_RWP = 8, + + SAS_PRIM_BC_CH = 9, + SAS_PRIM_BC_RCH0 = 10, + SAS_PRIM_BC_RCH1 = 11, + SAS_PRIM_BC_R0 = 12, + SAS_PRIM_BC_R1 = 13, + SAS_PRIM_BC_R2 = 14, + SAS_PRIM_BC_R3 = 15, + SAS_PRIM_BC_R4 = 16, + + SAS_PRIM_NOTIFY_ENSP= 17, + SAS_PRIM_NOTIFY_R0 = 18, + SAS_PRIM_NOTIFY_R1 = 19, + SAS_PRIM_NOTIFY_R2 = 20, + + SAS_PRIM_CLOSE_CLAF = 21, + SAS_PRIM_CLOSE_NORM = 22, + SAS_PRIM_CLOSE_R0 = 23, + SAS_PRIM_CLOSE_R1 = 24, + + SAS_PRIM_OPEN_RTRY = 25, + SAS_PRIM_OPEN_RJCT = 26, + SAS_PRIM_OPEN_ACPT = 27, + + SAS_PRIM_DONE = 28, + SAS_PRIM_BREAK = 29, + + SATA_PRIM_DMAT = 33, + SATA_PRIM_PMNAK = 34, + SATA_PRIM_PMACK = 35, + SATA_PRIM_PMREQ_S = 36, + SATA_PRIM_PMREQ_P = 37, + SATA_SATA_R_ERR = 38, +}; + +enum sas_open_rej_reason { + /* Abandon open */ + SAS_OREJ_UNKNOWN = 0, + SAS_OREJ_BAD_DEST = 1, + SAS_OREJ_CONN_RATE = 2, + SAS_OREJ_EPROTO = 3, + SAS_OREJ_RESV_AB0 = 4, + SAS_OREJ_RESV_AB1 = 5, + SAS_OREJ_RESV_AB2 = 6, + SAS_OREJ_RESV_AB3 = 7, + SAS_OREJ_WRONG_DEST= 8, + SAS_OREJ_STP_NORES = 9, + + /* Retry open */ + SAS_OREJ_NO_DEST = 10, + SAS_OREJ_PATH_BLOCKED = 11, + SAS_OREJ_RSVD_CONT0 = 12, + SAS_OREJ_RSVD_CONT1 = 13, + SAS_OREJ_RSVD_INIT0 = 14, + SAS_OREJ_RSVD_INIT1 = 15, + SAS_OREJ_RSVD_STOP0 = 16, + SAS_OREJ_RSVD_STOP1 = 17, + SAS_OREJ_RSVD_RETRY = 18, +}; + +struct dev_to_host_fis { + u8 fis_type; /* 0x34 */ + u8 flags; + u8 status; + u8 error; + + u8 lbal; + union { u8 lbam; u8 byte_count_low; }; + union { u8 lbah; u8 byte_count_high; }; + u8 device; + + u8 lbal_exp; + u8 lbam_exp; + u8 lbah_exp; + u8 _r_a; + + union { u8 sector_count; u8 interrupt_reason; }; + u8 sector_count_exp; + u8 _r_b; + u8 _r_c; + + u32 _r_d; +} __attribute__ ((packed)); + +struct host_to_dev_fis { + u8 fis_type; /* 0x27 */ + u8 flags; + u8 command; + u8 features; + + u8 lbal; + union { u8 lbam; u8 byte_count_low; }; + union { u8 lbah; u8 byte_count_high; }; + u8 device; + + u8 lbal_exp; + u8 lbam_exp; + u8 lbah_exp; + u8 features_exp; + + union { u8 sector_count; u8 interrupt_reason; }; + u8 sector_count_exp; + u8 _r_a; + u8 control; + + u32 _r_b; +} __attribute__ ((packed)); + +/* Prefer to have code clarity over header file clarity. + */ +#ifdef __LITTLE_ENDIAN_BITFIELD +struct sas_identify_frame { + /* Byte 0 */ + u8 frame_type:4; + u8 dev_type:3; + u8 _un0:1; + + /* Byte 1 */ + u8 _un1; + + /* Byte 2 */ + union { + struct { + u8 _un20:1; + u8 smp_iport:1; + u8 stp_iport:1; + u8 ssp_iport:1; + u8 _un247:4; + }; + u8 initiator_bits; + }; + + /* Byte 3 */ + union { + struct { + u8 _un30:1; + u8 smp_tport:1; + u8 stp_tport:1; + u8 ssp_tport:1; + u8 _un347:4; + }; + u8 target_bits; + }; + + /* Byte 4 - 11 */ + u8 _un4_11[8]; + + /* Byte 12 - 19 */ + u8 sas_addr[SAS_ADDR_SIZE]; + + /* Byte 20 */ + u8 phy_id; + + u8 _un21_27[7]; + + __be32 crc; +} __attribute__ ((packed)); + +struct ssp_frame_hdr { + u8 frame_type; + u8 hashed_dest_addr[HASHED_SAS_ADDR_SIZE]; + u8 _r_a; + u8 hashed_src_addr[HASHED_SAS_ADDR_SIZE]; + __be16 _r_b; + + u8 changing_data_ptr:1; + u8 retransmit:1; + u8 retry_data_frames:1; + u8 _r_c:5; + + u8 num_fill_bytes:2; + u8 _r_d:6; + + u32 _r_e; + __be16 tag; + __be16 tptt; + __be32 data_offs; +} __attribute__ ((packed)); + +struct ssp_response_iu { + u8 _r_a[10]; + + u8 datapres:2; + u8 _r_b:6; + + u8 status; + + u32 _r_c; + + __be32 sense_data_len; + __be32 response_data_len; + + u8 resp_data[0]; + u8 sense_data[0]; +} __attribute__ ((packed)); + +/* ---------- SMP ---------- */ + +struct report_general_resp { + __be16 change_count; + __be16 route_indexes; + u8 _r_a; + u8 num_phys; + + u8 conf_route_table:1; + u8 configuring:1; + u8 _r_b:6; + + u8 _r_c; + + u8 enclosure_logical_id[8]; + + u8 _r_d[12]; +} __attribute__ ((packed)); + +struct discover_resp { + u8 _r_a[5]; + + u8 phy_id; + __be16 _r_b; + + u8 _r_c:4; + u8 attached_dev_type:3; + u8 _r_d:1; + + u8 linkrate:4; + u8 _r_e:4; + + u8 attached_sata_host:1; + u8 iproto:3; + u8 _r_f:4; + + u8 attached_sata_dev:1; + u8 tproto:3; + u8 _r_g:3; + u8 attached_sata_ps:1; + + u8 sas_addr[8]; + u8 attached_sas_addr[8]; + u8 attached_phy_id; + + u8 _r_h[7]; + + u8 hmin_linkrate:4; + u8 pmin_linkrate:4; + u8 hmax_linkrate:4; + u8 pmax_linkrate:4; + + u8 change_count; + + u8 pptv:4; + u8 _r_i:3; + u8 virtual:1; + + u8 routing_attr:4; + u8 _r_j:4; + + u8 conn_type; + u8 conn_el_index; + u8 conn_phy_link; + + u8 _r_k[8]; +} __attribute__ ((packed)); + +struct report_phy_sata_resp { + u8 _r_a[5]; + + u8 phy_id; + u8 _r_b; + + u8 affil_valid:1; + u8 affil_supp:1; + u8 _r_c:6; + + u32 _r_d; + + u8 stp_sas_addr[8]; + + struct dev_to_host_fis fis; + + u32 _r_e; + + u8 affil_stp_ini_addr[8]; + + __be32 crc; +} __attribute__ ((packed)); + +struct smp_resp { + u8 frame_type; + u8 function; + u8 result; + u8 reserved; + union { + struct report_general_resp rg; + struct discover_resp disc; + struct report_phy_sata_resp rps; + }; +} __attribute__ ((packed)); + +#elif defined(__BIG_ENDIAN_BITFIELD) +struct sas_identify_frame { + /* Byte 0 */ + u8 _un0:1; + u8 dev_type:3; + u8 frame_type:4; + + /* Byte 1 */ + u8 _un1; + + /* Byte 2 */ + union { + struct { + u8 _un247:4; + u8 ssp_iport:1; + u8 stp_iport:1; + u8 smp_iport:1; + u8 _un20:1; + }; + u8 initiator_bits; + }; + + /* Byte 3 */ + union { + struct { + u8 _un347:4; + u8 ssp_tport:1; + u8 stp_tport:1; + u8 smp_tport:1; + u8 _un30:1; + }; + u8 target_bits; + }; + + /* Byte 4 - 11 */ + u8 _un4_11[8]; + + /* Byte 12 - 19 */ + u8 sas_addr[SAS_ADDR_SIZE]; + + /* Byte 20 */ + u8 phy_id; + + u8 _un21_27[7]; + + __be32 crc; +} __attribute__ ((packed)); + +struct ssp_frame_hdr { + u8 frame_type; + u8 hashed_dest_addr[HASHED_SAS_ADDR_SIZE]; + u8 _r_a; + u8 hashed_src_addr[HASHED_SAS_ADDR_SIZE]; + __be16 _r_b; + + u8 _r_c:5; + u8 retry_data_frames:1; + u8 retransmit:1; + u8 changing_data_ptr:1; + + u8 _r_d:6; + u8 num_fill_bytes:2; + + u32 _r_e; + __be16 tag; + __be16 tptt; + __be32 data_offs; +} __attribute__ ((packed)); + +struct ssp_response_iu { + u8 _r_a[10]; + + u8 _r_b:6; + u8 datapres:2; + + u8 status; + + u32 _r_c; + + __be32 sense_data_len; + __be32 response_data_len; + + u8 resp_data[0]; + u8 sense_data[0]; +} __attribute__ ((packed)); + +/* ---------- SMP ---------- */ + +struct report_general_resp { + __be16 change_count; + __be16 route_indexes; + u8 _r_a; + u8 num_phys; + + u8 _r_b:6; + u8 configuring:1; + u8 conf_route_table:1; + + u8 _r_c; + + u8 enclosure_logical_id[8]; + + u8 _r_d[12]; +} __attribute__ ((packed)); + +struct discover_resp { + u8 _r_a[5]; + + u8 phy_id; + __be16 _r_b; + + u8 _r_d:1; + u8 attached_dev_type:3; + u8 _r_c:4; + + u8 _r_e:4; + u8 linkrate:4; + + u8 _r_f:4; + u8 iproto:3; + u8 attached_sata_host:1; + + u8 attached_sata_ps:1; + u8 _r_g:3; + u8 tproto:3; + u8 attached_sata_dev:1; + + u8 sas_addr[8]; + u8 attached_sas_addr[8]; + u8 attached_phy_id; + + u8 _r_h[7]; + + u8 pmin_linkrate:4; + u8 hmin_linkrate:4; + u8 pmax_linkrate:4; + u8 hmax_linkrate:4; + + u8 change_count; + + u8 virtual:1; + u8 _r_i:3; + u8 pptv:4; + + u8 _r_j:4; + u8 routing_attr:4; + + u8 conn_type; + u8 conn_el_index; + u8 conn_phy_link; + + u8 _r_k[8]; +} __attribute__ ((packed)); + +struct report_phy_sata_resp { + u8 _r_a[5]; + + u8 phy_id; + u8 _r_b; + + u8 _r_c:6; + u8 affil_supp:1; + u8 affil_valid:1; + + u32 _r_d; + + u8 stp_sas_addr[8]; + + struct dev_to_host_fis fis; + + u32 _r_e; + + u8 affil_stp_ini_addr[8]; + + __be32 crc; +} __attribute__ ((packed)); + +struct smp_resp { + u8 frame_type; + u8 function; + u8 result; + u8 reserved; + union { + struct report_general_resp rg; + struct discover_resp disc; + struct report_phy_sata_resp rps; + }; +} __attribute__ ((packed)); + +#else +#error "Bitfield order not defined!" +#endif + +#endif /* _SAS_H_ */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 1bc675201413..84a6d5fe0920 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -429,4 +429,10 @@ struct scsi_lun { /* Used to obtain the PCI location of a device */ #define SCSI_IOCTL_GET_PCI 0x5387 +/* Pull a u32 out of a SCSI message (using BE SCSI conventions) */ +static inline u32 scsi_to_u32(u8 *ptr) +{ + return (ptr[0]<<24) + (ptr[1]<<16) + (ptr[2]<<8) + ptr[3]; +} + #endif /* _SCSI_SCSI_H */ From 187afbed1814ea0851bf30bacbf807217dd7864b Mon Sep 17 00:00:00 2001 From: Jon Masters Date: Mon, 28 Aug 2006 17:08:21 -0500 Subject: [PATCH 0129/1063] [SCSI] MODULE_FIRMWARE for binary firmware(s) Right now, various kernel modules are being migrated over to use request_firmware in order to pull in binary firmware blobs from userland when the module is loaded. This makes sense. However, there is right now little mechanism in place to automatically determine which binary firmware blobs must be included with a kernel in order to satisfy the prerequisites of these drivers. This affects vendors, but also regular users to a certain extent too. The attached patch introduces MODULE_FIRMWARE as a mechanism for advertising that a particular firmware file is to be loaded - it will then show up via modinfo and could be used e.g. when packaging a kernel. Signed-off-by: Jon Masters Comments added in line with all the other MODULE_ tag Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- include/linux/module.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/module.h b/include/linux/module.h index 0dfb794c52d3..d4486cc2e7fe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -156,6 +156,11 @@ extern struct module __this_module; */ #define MODULE_VERSION(_version) MODULE_INFO(version, _version) +/* Optional firmware file (or files) needed by the module + * format is simply firmware file name. Multiple firmware + * files require multiple MODULE_FIRMWARE() specifiers */ +#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) + /* Given an address, look for it in the exception tables */ const struct exception_table_entry *search_exception_tables(unsigned long add); From bc229b3663dcd7d8f266cb13b0839efdee6d95b5 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 28 Aug 2006 17:08:21 -0500 Subject: [PATCH 0130/1063] [SCSI] aic94xx: add MODULE_FIRMWARE tag Add a tag which shows what the firmware file we're requesting is. Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_seq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/aic94xx/aic94xx_seq.c b/drivers/scsi/aic94xx/aic94xx_seq.c index 9050c6f3f6bd..d9b6da5fd06c 100644 --- a/drivers/scsi/aic94xx/aic94xx_seq.c +++ b/drivers/scsi/aic94xx/aic94xx_seq.c @@ -28,6 +28,7 @@ #include #include +#include #include #include "aic94xx_reg.h" #include "aic94xx_hwi.h" @@ -1399,3 +1400,5 @@ void asd_update_port_links(struct asd_sas_phy *sas_phy) if (err) asd_printk("couldn't update DDB 0:error:%d\n", err); } + +MODULE_FIRMWARE(SAS_RAZOR_SEQUENCER_FW_FILE); From f19eaa7f53736449a6eac89c3863eca2c64d5913 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 30 Aug 2006 14:18:33 -0700 Subject: [PATCH 0131/1063] [SCSI] aic94xx: Increase can_queue for better performance This patch sets can_queue in the aic94xx driver's scsi_host to better performing values than what's there currently. It seems that asd_ha->seq.can_queue reflects the number of requests that can be queued per controller; so long as there's one scsi_host per controller, it seems logical that the scsi_host ought to have the same can_queue value. To the best of my (still limited) knowledge, this method provides the correct value. The effect of leaving this value set to 1 is terrible performance in the case of either (a) certain Maxtor SAS drives flying solo or (b) flooding several disks with I/O simultaneously (md-raid). There may be more scenarios where we see similar problems that I haven't uncovered. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 3ec2e46f80c6..69aa70887530 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -620,6 +620,8 @@ static int __devinit asd_pci_probe(struct pci_dev *dev, asd_ha->hw_prof.bios.present ? "build " : "not present", asd_ha->hw_prof.bios.bld); + shost->can_queue = asd_ha->seq.can_queue; + if (use_msi) pci_enable_msi(asd_ha->pcidev); From 492dfb489658dfe4a755fa29dd0e34e9c8bd8fb8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 30 Aug 2006 15:48:45 -0400 Subject: [PATCH 0132/1063] [SCSI] block: add support for shared tag maps The current block queue implementation already contains most of the machinery for shared tag maps. The only remaining pieces are a way to allocate and destroy a tag map independently of the queues (so that the maps can be managed on the life cycle of the overseeing entity) Acked-by: Jens Axboe Signed-off-by: James Bottomley --- block/ll_rw_blk.c | 109 +++++++++++++++++++++++++++++++++-------- include/linux/blkdev.h | 2 + 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ddd9253f9d55..556a3d354eab 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -847,6 +847,35 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); +/** + * __blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free + * + * Tries to free the specified @bqt@. Returns true if it was + * actually freed and false if there are still references using it + */ +static int __blk_free_tags(struct blk_queue_tag *bqt) +{ + int retval; + + retval = atomic_dec_and_test(&bqt->refcnt); + if (retval) { + BUG_ON(bqt->busy); + BUG_ON(!list_empty(&bqt->busy_list)); + + kfree(bqt->tag_index); + bqt->tag_index = NULL; + + kfree(bqt->tag_map); + bqt->tag_map = NULL; + + kfree(bqt); + + } + + return retval; +} + /** * __blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device @@ -862,23 +891,28 @@ static void __blk_queue_free_tags(request_queue_t *q) if (!bqt) return; - if (atomic_dec_and_test(&bqt->refcnt)) { - BUG_ON(bqt->busy); - BUG_ON(!list_empty(&bqt->busy_list)); - - kfree(bqt->tag_index); - bqt->tag_index = NULL; - - kfree(bqt->tag_map); - bqt->tag_map = NULL; - - kfree(bqt); - } + __blk_free_tags(bqt); q->queue_tags = NULL; q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } + +/** + * blk_free_tags - release a given set of tag maintenance info + * @bqt: the tag map to free + * + * For externally managed @bqt@ frees the map. Callers of this + * function must guarantee to have released all the queues that + * might have been using this tag map. + */ +void blk_free_tags(struct blk_queue_tag *bqt) +{ + if (unlikely(!__blk_free_tags(bqt))) + BUG(); +} +EXPORT_SYMBOL(blk_free_tags); + /** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device @@ -901,7 +935,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) unsigned long *tag_map; int nr_ulongs; - if (depth > q->nr_requests * 2) { + if (q && depth > q->nr_requests * 2) { depth = q->nr_requests * 2; printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); @@ -927,6 +961,38 @@ fail: return -ENOMEM; } +static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, + int depth) +{ + struct blk_queue_tag *tags; + + tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); + if (!tags) + goto fail; + + if (init_tag_map(q, tags, depth)) + goto fail; + + INIT_LIST_HEAD(&tags->busy_list); + tags->busy = 0; + atomic_set(&tags->refcnt, 1); + return tags; +fail: + kfree(tags); + return NULL; +} + +/** + * blk_init_tags - initialize the tag info for an external tag map + * @depth: the maximum queue depth supported + * @tags: the tag to use + **/ +struct blk_queue_tag *blk_init_tags(int depth) +{ + return __blk_queue_init_tags(NULL, depth); +} +EXPORT_SYMBOL(blk_init_tags); + /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device @@ -941,16 +1007,10 @@ int blk_queue_init_tags(request_queue_t *q, int depth, BUG_ON(tags && q->queue_tags && tags != q->queue_tags); if (!tags && !q->queue_tags) { - tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC); + tags = __blk_queue_init_tags(q, depth); + if (!tags) goto fail; - - if (init_tag_map(q, tags, depth)) - goto fail; - - INIT_LIST_HEAD(&tags->busy_list); - tags->busy = 0; - atomic_set(&tags->refcnt, 1); } else if (q->queue_tags) { if ((rc = blk_queue_resize_tags(q, depth))) return rc; @@ -1001,6 +1061,13 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) return 0; } + /* + * Currently cannot replace a shared tag map with a new + * one, so error out if this is the case + */ + if (atomic_read(&bqt->refcnt) != 1) + return -EBUSY; + /* * save the old state info, so we can copy it back */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aafe82788b4e..427b0d61be6c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -746,6 +746,8 @@ extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); +extern struct blk_queue_tag *blk_init_tags(int); +extern void blk_free_tags(struct blk_queue_tag *); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern int blkdev_issue_flush(struct block_device *, sector_t *); From 86e33a296c2c9ed6eece0bfff4ac776f42040504 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 30 Aug 2006 09:45:51 -0400 Subject: [PATCH 0133/1063] [SCSI] add shared tag map helpers This patch adds support for sharing tag maps at the host level (i.e. either every queue [LUN] has its own tag map or there's a single one for the entire host). This formulation is primarily intended to help single issue queue hardware, like the aic7xxx Signed-off-by: James Bottomley --- drivers/scsi/hosts.c | 3 +++ include/scsi/scsi_host.h | 7 +++++++ include/scsi/scsi_tcq.h | 14 +++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index f244d4f6597a..68ef1636678d 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -265,6 +265,9 @@ static void scsi_host_dev_release(struct device *dev) destroy_workqueue(shost->work_q); scsi_destroy_command_freelist(shost); + if (shost->bqt) + blk_free_tags(shost->bqt); + kfree(shost->shost_data); if (parent) diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b3dd90f3e858..39c6f8cc20c3 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -16,6 +16,7 @@ struct scsi_target; struct Scsi_Host; struct scsi_host_cmd_pool; struct scsi_transport_template; +struct blk_queue_tags; /* @@ -465,6 +466,12 @@ struct Scsi_Host { struct scsi_host_template *hostt; struct scsi_transport_template *transportt; + /* + * area to keep a shared tag map (if needed, will be + * NULL if not) + */ + struct blk_queue_tag *bqt; + /* * The following two fields are protected with host_lock; * however, eh routines can safely access during eh processing diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index e47e36a4ef49..4eea254b1ce9 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -4,6 +4,7 @@ #include #include #include +#include #define MSG_SIMPLE_TAG 0x20 @@ -66,7 +67,8 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth) return; if (!blk_queue_tagged(sdev->request_queue)) - blk_queue_init_tags(sdev->request_queue, depth, NULL); + blk_queue_init_tags(sdev->request_queue, depth, + sdev->host->bqt); scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth); } @@ -131,4 +133,14 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag) return sdev->current_cmnd; } +/** + * scsi_init_shared_tag_map - create a shared tag map + * @shost: the host to share the tag map among all devices + * @depth: the total depth of the map + */ +static inline void scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) +{ + shost->bqt = blk_init_tags(depth); +} + #endif /* _SCSI_SCSI_TCQ_H */ From 85b6c720b0931101c8bcc3a5abdc2b8514b0fb4b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 31 Aug 2006 18:15:22 -0400 Subject: [PATCH 0134/1063] [SCSI] sd: fix cache flushing on module removal (and individual device removal) The fix isn't actually in sd: it's in scsi_device_get(). I modified it to allow devices to be returned in SDEV_CANCEL, but not SDEV_DEL. This means that the device_remove_driver, which occurs in device_del() in scsi_remove_device() after the device has gone into SDEV_CANCEL is now effective at flushing the cache. Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 94df671d776a..37843927e47f 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -851,14 +851,14 @@ EXPORT_SYMBOL(scsi_track_queue_full); */ int scsi_device_get(struct scsi_device *sdev) { - if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL) + if (sdev->sdev_state == SDEV_DEL) return -ENXIO; if (!get_device(&sdev->sdev_gendev)) return -ENXIO; - if (!try_module_get(sdev->host->hostt->module)) { - put_device(&sdev->sdev_gendev); - return -ENXIO; - } + /* We can fail this if we're doing SCSI operations + * from module exit (like cache flush) */ + try_module_get(sdev->host->hostt->module); + return 0; } EXPORT_SYMBOL(scsi_device_get); @@ -873,7 +873,10 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { - module_put(sdev->host->hostt->module); + /* The module refcount will be zero if scsi_device_get() + * was called from a module removal routine */ + if (likely(module_refcount(sdev->host->hostt->module) != 0)) + module_put(sdev->host->hostt->module); put_device(&sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_device_put); From e5b3cd42960a10c1bc3701d4f00767463c88ec9d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Aug 2006 15:53:25 -0400 Subject: [PATCH 0135/1063] [SCSI] SCSI: sanitize INQUIRY strings Sanitize the Vendor, Product, and Revision strings contained in an INQUIRY result by setting all non-graphic or non-ASCII characters to ' '. Since the standard disallows such characters, this will affect only non-compliant devices. To help maintain backward compatibility, NUL characters are treated specially. They are taken as string terminators; they and all the following characters are set to ' '. If some valid characters get erased as a result... well, we weren't seeing them before so we haven't lost anything. The primary purpose of this change is to allow blacklist entries to match devices with illegal Vendor or Product strings. In addition, the patch updates a couple of function prototypes, giving inq_result its correct type (unsigned char *). Signed-off-by: Alan Stern Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index a24d3461fc78..31d05ab0b2fc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -396,6 +396,32 @@ void scsi_target_reap(struct scsi_target *starget) return; } +/** + * sanitize_inquiry_string - remove non-graphical chars from an INQUIRY result string + * @s: INQUIRY result string to sanitize + * @len: length of the string + * + * Description: + * The SCSI spec says that INQUIRY vendor, product, and revision + * strings must consist entirely of graphic ASCII characters, + * padded on the right with spaces. Since not all devices obey + * this rule, we will replace non-graphic or non-ASCII characters + * with spaces. Exception: a NUL character is interpreted as a + * string terminator, so all the following characters are set to + * spaces. + **/ +static void sanitize_inquiry_string(unsigned char *s, int len) +{ + int terminated = 0; + + for (; len > 0; (--len, ++s)) { + if (*s == 0) + terminated = 1; + if (terminated || *s < 0x20 || *s > 0x7e) + *s = ' '; + } +} + /** * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY * @sdev: scsi_device to probe @@ -410,7 +436,7 @@ void scsi_target_reap(struct scsi_target *starget) * INQUIRY data is in @inq_result; the scsi_level and INQUIRY length * are copied to the scsi_device any flags value is stored in *@bflags. **/ -static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result, +static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, int result_len, int *bflags) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; @@ -469,7 +495,11 @@ static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result, } if (result == 0) { - response_len = (unsigned char) inq_result[4] + 5; + sanitize_inquiry_string(&inq_result[8], 8); + sanitize_inquiry_string(&inq_result[16], 16); + sanitize_inquiry_string(&inq_result[32], 4); + + response_len = inq_result[4] + 5; if (response_len > 255) response_len = first_inquiry_len; /* sanity */ @@ -575,7 +605,8 @@ static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result, * SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ -static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) +static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, + int *bflags) { /* * XXX do not save the inquiry, since it can change underneath us, From ffd0436ed2e5a741c8d30062b489b989acf0a526 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:24 -0400 Subject: [PATCH 0136/1063] [SCSI] libiscsi, iscsi_tcp, iscsi_iser: check that burst lengths are valid. iSCSI RFC states that the first burst length must be smaller than the max burst length. We currently assume targets will be good, but that may not be the case, so this patch adds a check. This patch also moves the unsol data out offset to the lib so the LLDs do not have to track it. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 18 ++++---------- drivers/infiniband/ulp/iser/iscsi_iser.h | 1 - drivers/scsi/iscsi_tcp.c | 30 ++++++------------------ drivers/scsi/iscsi_tcp.h | 1 - drivers/scsi/libiscsi.c | 25 +++++++++++++------- include/scsi/libiscsi.h | 5 ++-- 6 files changed, 31 insertions(+), 49 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1437d7ee3b19..101e407eaa43 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -141,18 +141,11 @@ iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) if (sc->sc_data_direction == DMA_TO_DEVICE) { BUG_ON(ctask->total_length == 0); - /* bytes to be sent via RDMA operations */ - iser_ctask->rdma_data_count = ctask->total_length - - ctask->imm_count - - ctask->unsol_count; - debug_scsi("cmd [itt %x total %d imm %d unsol_data %d " - "rdma_data %d]\n", + debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", ctask->itt, ctask->total_length, ctask->imm_count, - ctask->unsol_count, iser_ctask->rdma_data_count); - } else - /* bytes to be sent via RDMA operations */ - iser_ctask->rdma_data_count = ctask->total_length; + ctask->unsol_count); + } iser_ctask_rdma_init(iser_ctask); } @@ -196,13 +189,10 @@ iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, { struct iscsi_data hdr; int error = 0; - struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; /* Send data-out PDUs while there's still unsolicited data to send */ while (ctask->unsol_count > 0) { - iscsi_prep_unsolicit_data_pdu(ctask, &hdr, - iser_ctask->rdma_data_count); - + iscsi_prep_unsolicit_data_pdu(ctask, &hdr); debug_scsi("Sending data-out: itt 0x%x, data count %d\n", hdr.itt, ctask->data_count); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 3350ba690cfe..7c3d0c96d889 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -257,7 +257,6 @@ struct iscsi_iser_conn { struct iscsi_iser_cmd_task { struct iser_desc desc; struct iscsi_iser_conn *iser_conn; - int rdma_data_count;/* RDMA bytes */ enum iser_task_status status; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 058f094f945a..a97a3a4e99eb 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1264,19 +1264,6 @@ iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, r2t->data_count); } -static void -iscsi_unsolicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_data_task *dtask; - - dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask; - iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr, - tcp_ctask->r2t_data_count); - iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr, - sizeof(struct iscsi_hdr)); -} - /** * iscsi_tcp_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands * @conn: iscsi connection @@ -1326,14 +1313,11 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask) if (ctask->unsol_count) tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; - tcp_ctask->r2t_data_count = ctask->total_length - - ctask->imm_count - - ctask->unsol_count; - debug_scsi("cmd [itt 0x%x total %d imm %d imm_data %d " - "r2t_data %d]\n", + debug_scsi("cmd [itt 0x%x total %d imm_data %d " + "unsol count %d, unsol offset %d]\n", ctask->itt, ctask->total_length, ctask->imm_count, - ctask->unsol_count, tcp_ctask->r2t_data_count); + ctask->unsol_count, ctask->unsol_offset); } else tcp_ctask->xmstate = XMSTATE_R_HDR; @@ -1531,8 +1515,10 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) tcp_ctask->xmstate |= XMSTATE_UNS_DATA; if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) { - iscsi_unsolicit_data_init(conn, ctask); - dtask = tcp_ctask->dtask; + dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask; + iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr); + iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr, + sizeof(struct iscsi_hdr)); if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &tcp_ctask->headbuf, (u8*)dtask->hdrext); @@ -1720,7 +1706,6 @@ data_out_done: * Done with this R2T. Check if there are more * outstanding R2Ts ready to be processed. */ - BUG_ON(tcp_ctask->r2t_data_count - r2t->data_length < 0); if (conn->datadgst_en) { rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf, &dtask->digest, 1); @@ -1732,7 +1717,6 @@ data_out_done: debug_tcp("r2t done dout digest 0x%x\n", dtask->digest); } - tcp_ctask->r2t_data_count -= r2t->data_length; tcp_ctask->r2t = NULL; spin_lock_bh(&session->lock); __kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*)); diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 6a4ee704e46e..aace8f70dfd7 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -157,7 +157,6 @@ struct iscsi_tcp_cmd_task { struct scatterlist *bad_sg; /* assert statement */ int sg_count; /* SG's to process */ uint32_t exp_r2tsn; - int r2t_data_count; /* R2T Data-Out bytes */ int data_offset; struct iscsi_r2t_info *r2t; /* in progress R2T */ struct iscsi_queue r2tpool; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 5884cd26d53a..a7c6e70f4ef8 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -68,8 +68,7 @@ iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn); void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask, - struct iscsi_data *hdr, - int transport_data_cnt) + struct iscsi_data *hdr) { struct iscsi_conn *conn = ctask->conn; @@ -82,14 +81,12 @@ void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask, hdr->itt = ctask->hdr->itt; hdr->exp_statsn = cpu_to_be32(conn->exp_statsn); - - hdr->offset = cpu_to_be32(ctask->total_length - - transport_data_cnt - - ctask->unsol_count); + hdr->offset = cpu_to_be32(ctask->unsol_offset); if (ctask->unsol_count > conn->max_xmit_dlength) { hton24(hdr->dlength, conn->max_xmit_dlength); ctask->data_count = conn->max_xmit_dlength; + ctask->unsol_offset += ctask->data_count; hdr->flags = 0; } else { hton24(hdr->dlength, ctask->unsol_count); @@ -125,6 +122,7 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask) memcpy(hdr->cdb, sc->cmnd, sc->cmd_len); memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len); + ctask->data_count = 0; if (sc->sc_data_direction == DMA_TO_DEVICE) { hdr->flags |= ISCSI_FLAG_CMD_WRITE; /* @@ -143,6 +141,7 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask) */ ctask->imm_count = 0; ctask->unsol_count = 0; + ctask->unsol_offset = 0; ctask->unsol_datasn = 0; if (session->imm_data_en) { @@ -156,9 +155,12 @@ static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask) } else zero_data(ctask->hdr->dlength); - if (!session->initial_r2t_en) + if (!session->initial_r2t_en) { ctask->unsol_count = min(session->first_burst, ctask->total_length) - ctask->imm_count; + ctask->unsol_offset = ctask->imm_count; + } + if (!ctask->unsol_count) /* No unsolicit Data-Out's */ ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL; @@ -1520,11 +1522,18 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn) struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; - if (session == NULL) { + if (!session) { printk(KERN_ERR "iscsi: can't start unbound connection\n"); return -EPERM; } + if (session->first_burst > session->max_burst) { + printk("iscsi: invalid burst lengths: " + "first_burst %d max_burst %d\n", + session->first_burst, session->max_burst); + return -EINVAL; + } + spin_lock_bh(&session->lock); conn->c_stage = ISCSI_CONN_STARTED; session->state = ISCSI_STATE_LOGGED_IN; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 41904f611d12..4900650bd081 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -102,6 +102,8 @@ struct iscsi_cmd_task { uint32_t unsol_datasn; int imm_count; /* imm-data (bytes) */ int unsol_count; /* unsolicited (bytes)*/ + /* offset in unsolicited stream (bytes); */ + int unsol_offset; int data_count; /* remaining Data-Out */ struct scsi_cmnd *sc; /* associated SCSI cmd*/ int total_length; @@ -290,8 +292,7 @@ extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, extern int iscsi_check_assign_cmdsn(struct iscsi_session *, struct iscsi_nopin *); extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *, - struct iscsi_data *hdr, - int transport_data_cnt); + struct iscsi_data *hdr); extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *, char *, uint32_t); extern int iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *, From 60ecebf5a10e42f5e2d6e07eb9e24bdee8500b81 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:25 -0400 Subject: [PATCH 0137/1063] [SCSI] add refcouting around ctask usage in main IO patch It is possible that a ctask could be completing and getting cleaned up at the same time, we are finishing up the last data transfer. This could then result in the data transfer code using stale or invalid values. This patch adds a refcount to the ctask. When the count goes to zero then we know the transmit thread and recv thread or softirq are not touching it and we can safely release it. The eh should not need to grab a reference because it only cleans up a task if it has both the xmit mutex and recv lock (or recv side suspended). Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 59 ++++++++++++++++++++++++++++++++++------- include/scsi/libiscsi.h | 1 + 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index a7c6e70f4ef8..9584cbc082fe 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -179,16 +179,15 @@ EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu); /** * iscsi_complete_command - return command back to scsi-ml - * @session: iscsi session * @ctask: iscsi cmd task * * Must be called with session lock. * This function returns the scsi command to scsi-ml and returns * the cmd task to the pool of available cmd tasks. */ -static void iscsi_complete_command(struct iscsi_session *session, - struct iscsi_cmd_task *ctask) +static void iscsi_complete_command(struct iscsi_cmd_task *ctask) { + struct iscsi_session *session = ctask->conn->session; struct scsi_cmnd *sc = ctask->sc; ctask->state = ISCSI_TASK_COMPLETED; @@ -198,6 +197,35 @@ static void iscsi_complete_command(struct iscsi_session *session, sc->scsi_done(sc); } +static void __iscsi_get_ctask(struct iscsi_cmd_task *ctask) +{ + atomic_inc(&ctask->refcount); +} + +static void iscsi_get_ctask(struct iscsi_cmd_task *ctask) +{ + spin_lock_bh(&ctask->conn->session->lock); + __iscsi_get_ctask(ctask); + spin_unlock_bh(&ctask->conn->session->lock); +} + +static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask) +{ + struct iscsi_conn *conn = ctask->conn; + + if (atomic_dec_and_test(&ctask->refcount)) { + conn->session->tt->cleanup_cmd_task(conn, ctask); + iscsi_complete_command(ctask); + } +} + +static void iscsi_put_ctask(struct iscsi_cmd_task *ctask) +{ + spin_lock_bh(&ctask->conn->session->lock); + __iscsi_put_ctask(ctask); + spin_unlock_bh(&ctask->conn->session->lock); +} + /** * iscsi_cmd_rsp - SCSI Command Response processing * @conn: iscsi connection @@ -274,7 +302,7 @@ out: (long)sc, sc->result, ctask->itt); conn->scsirsp_pdus_cnt++; - iscsi_complete_command(conn->session, ctask); + __iscsi_put_ctask(ctask); return rc; } @@ -338,7 +366,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, BUG_ON((void*)ctask != ctask->sc->SCp.ptr); if (hdr->flags & ISCSI_FLAG_DATA_STATUS) { conn->scsirsp_pdus_cnt++; - iscsi_complete_command(session, ctask); + __iscsi_put_ctask(ctask); } break; case ISCSI_OP_R2T: @@ -563,7 +591,9 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) BUG_ON(conn->ctask && conn->mtask); if (conn->ctask) { + iscsi_get_ctask(conn->ctask); rc = tt->xmit_cmd_task(conn, conn->ctask); + iscsi_put_ctask(conn->ctask); if (rc) goto again; /* done with this in-progress ctask */ @@ -604,12 +634,19 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) struct iscsi_cmd_task, running); conn->ctask->state = ISCSI_TASK_RUNNING; list_move_tail(conn->xmitqueue.next, &conn->run_list); + __iscsi_get_ctask(conn->ctask); spin_unlock_bh(&conn->session->lock); rc = tt->xmit_cmd_task(conn, conn->ctask); if (rc) goto again; + spin_lock_bh(&conn->session->lock); + __iscsi_put_ctask(conn->ctask); + if (rc) { + spin_unlock_bh(&conn->session->lock); + goto again; + } } spin_unlock_bh(&conn->session->lock); /* done with this ctask */ @@ -659,6 +696,7 @@ enum { FAILURE_SESSION_FAILED, FAILURE_SESSION_FREED, FAILURE_WINDOW_CLOSED, + FAILURE_OOM, FAILURE_SESSION_TERMINATE, FAILURE_SESSION_IN_RECOVERY, FAILURE_SESSION_RECOVERY_TIMEOUT, @@ -717,10 +755,15 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) conn = session->leadconn; - __kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*)); + if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask, + sizeof(void*))) { + reason = FAILURE_OOM; + goto reject; + } sc->SCp.phase = session->age; sc->SCp.ptr = (char *)ctask; + atomic_set(&ctask->refcount, 1); ctask->state = ISCSI_TASK_PENDING; ctask->mtask = NULL; ctask->conn = conn; @@ -1057,13 +1100,11 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, sc = ctask->sc; if (!sc) return; - - conn->session->tt->cleanup_cmd_task(conn, ctask); iscsi_ctask_mtask_cleanup(ctask); sc->result = err; sc->resid = sc->request_bufflen; - iscsi_complete_command(conn->session, ctask); + __iscsi_put_ctask(ctask); } int iscsi_eh_abort(struct scsi_cmnd *sc) diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 4900650bd081..401192e56e50 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -112,6 +112,7 @@ struct iscsi_cmd_task { /* state set/tested under session->lock */ int state; + atomic_t refcount; struct list_head running; /* running cmd list */ void *dd_data; /* driver/transport data */ }; From 98a9416af08385f8497e9c1595113a81aefa5d49 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:26 -0400 Subject: [PATCH 0138/1063] [SCSI] attempt to complete r2t with data len greater than max burst A couple targets like string bean and MDS, send r2ts with a data len greater than the max burst we agreed to. We were being strict in our enforcing of the iscsi rfc in that code path, but there is no driver limitation that prevents us from fullfilling the request. To allow those targets to work we will ignore the max_burst length and send as much data as the target asks for assuming it has consciously decided to override its max burst length. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index a97a3a4e99eb..d6927f1a6b65 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -358,8 +358,11 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) int r2tsn = be32_to_cpu(rhdr->r2tsn); int rc; - if (tcp_conn->in.datalen) + if (tcp_conn->in.datalen) { + printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n", + tcp_conn->in.datalen); return ISCSI_ERR_DATALEN; + } if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn) return ISCSI_ERR_R2TSN; @@ -385,15 +388,23 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) r2t->exp_statsn = rhdr->statsn; r2t->data_length = be32_to_cpu(rhdr->data_length); - if (r2t->data_length == 0 || - r2t->data_length > session->max_burst) { + if (r2t->data_length == 0) { + printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n"); spin_unlock(&session->lock); return ISCSI_ERR_DATALEN; } + if (r2t->data_length > session->max_burst) + debug_scsi("invalid R2T with data len %u and max burst %u." + "Attempting to execute request.\n", + r2t->data_length, session->max_burst); + r2t->data_offset = be32_to_cpu(rhdr->data_offset); if (r2t->data_offset + r2t->data_length > ctask->total_length) { spin_unlock(&session->lock); + printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at " + "offset %u and total length %d\n", r2t->data_length, + r2t->data_offset, ctask->total_length); return ISCSI_ERR_DATALEN; } From 62f383003c22cd34920d0412465eddcb1223da0d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:27 -0400 Subject: [PATCH 0139/1063] [SCSI] iscsi_tcp: fix padding, data digests, and IO at weird offsets iscsi_tcp calculates padding by using the expected transfer length. This has the problem where if we have immediate data = no and initial R2T = yes, and the transfer length ended up needing padding then we send: 1. header 2. padding which should have gone after data 3. data Besides this bug, we also assume the target will always ask for nice transfer lengths and the first burst length will always be a nice value. As far as I can tell form the RFC this is not a requirement. It would be silly to do this, but if someone did it we will end doing bad things. Finally the last bug in that bit of code is in our handling of the recalculation of data digests when we do not send a whole iscsi_buf in one try. The bug here is that we call crypto_digest_final on a iscsi_sendpage error, then when we send the rest of the iscsi_buf, we doiscsi_data_digest_init and this causes the previous data digest to be lost. And to make matters worse, some of these bugs are replicated over and over and over again for immediate data, solicited data and unsolicited data. So the attached patch made over the iscsi git tree (see kernel.org/git for details) which I updated today to include the patches I said I merged, consolidates the sending of data, padding and digests and calculation of data digests and fixes the above bugs. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 676 +++++++++++++++------------------------ drivers/scsi/iscsi_tcp.h | 31 +- drivers/scsi/libiscsi.c | 37 ++- 3 files changed, 305 insertions(+), 439 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index d6927f1a6b65..290c1d76cd40 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -281,7 +281,6 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, { struct iscsi_data *hdr; struct scsi_cmnd *sc = ctask->sc; - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; hdr = &r2t->dtask.hdr; memset(hdr, 0, sizeof(struct iscsi_data)); @@ -336,10 +335,12 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, sg_count += sg->length; } BUG_ON(r2t->sg == NULL); - } else - iscsi_buf_init_iov(&tcp_ctask->sendbuf, + } else { + iscsi_buf_init_iov(&r2t->sendbuf, (char*)sc->request_buffer + r2t->data_offset, r2t->data_count); + r2t->sg = NULL; + } } /** @@ -503,7 +504,6 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn) goto copy_hdr; spin_lock(&session->lock); - iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask); rc = __iscsi_complete_pdu(conn, hdr, NULL, 0); spin_unlock(&session->lock); break; @@ -676,15 +676,15 @@ iscsi_tcp_copy(struct iscsi_conn *conn) } static inline void -partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn, - struct scatterlist *sg, int offset, int length) +partial_sg_digest_update(struct crypto_tfm *tfm, struct scatterlist *sg, + int offset, int length) { struct scatterlist temp; memcpy(&temp, sg, sizeof(struct scatterlist)); temp.offset = offset; temp.length = length; - crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1); + crypto_digest_update(tfm, &temp, 1); } static void @@ -751,7 +751,8 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) tcp_conn->data_rx_tfm, &sg[i], 1); else - partial_sg_digest_update(tcp_conn, + partial_sg_digest_update( + tcp_conn->data_rx_tfm, &sg[i], sg[i].offset + offset, sg[i].length - offset); @@ -765,7 +766,8 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) /* * data-in is complete, but buffer not... */ - partial_sg_digest_update(tcp_conn, &sg[i], + partial_sg_digest_update(tcp_conn->data_rx_tfm, + &sg[i], sg[i].offset, sg[i].length-rc); rc = 0; break; @@ -783,7 +785,6 @@ done: (long)sc, sc->result, ctask->itt, tcp_conn->in.hdr->flags); spin_lock(&conn->session->lock); - iscsi_tcp_cleanup_ctask(conn, ctask); __iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0); spin_unlock(&conn->session->lock); } @@ -803,9 +804,6 @@ iscsi_data_recv(struct iscsi_conn *conn) rc = iscsi_scsi_data_in(conn); break; case ISCSI_OP_SCSI_CMD_RSP: - spin_lock(&conn->session->lock); - iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask); - spin_unlock(&conn->session->lock); case ISCSI_OP_TEXT_RSP: case ISCSI_OP_LOGIN_RSP: case ISCSI_OP_ASYNC_EVENT: @@ -1188,37 +1186,12 @@ iscsi_sendpage(struct iscsi_conn *conn, struct iscsi_buf *buf, static inline void iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn, - struct iscsi_cmd_task *ctask) + struct iscsi_tcp_cmd_task *tcp_ctask) { - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - - BUG_ON(!tcp_conn->data_tx_tfm); crypto_digest_init(tcp_conn->data_tx_tfm); tcp_ctask->digest_count = 4; } -static int -iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, - struct iscsi_buf *buf, uint32_t *digest, int final) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - int rc = 0; - int sent = 0; - - if (final) - crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest); - - iscsi_buf_init_iov(buf, (char*)digest, 4); - rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent); - if (rc) { - tcp_ctask->datadigest = *digest; - tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST; - } else - tcp_ctask->digest_count = 4; - return rc; -} - /** * iscsi_solicit_data_cont - initialize next Data-Out * @conn: iscsi connection @@ -1236,7 +1209,6 @@ static void iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, struct iscsi_r2t_info *r2t, int left) { - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; struct iscsi_data *hdr; struct scsi_cmnd *sc = ctask->sc; int new_offset; @@ -1265,14 +1237,30 @@ iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr, sizeof(struct iscsi_hdr)); - if (sc->use_sg && !iscsi_buf_left(&r2t->sendbuf)) { - BUG_ON(tcp_ctask->bad_sg == r2t->sg); + if (iscsi_buf_left(&r2t->sendbuf)) + return; + + if (sc->use_sg) { iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg); r2t->sg += 1; - } else - iscsi_buf_init_iov(&tcp_ctask->sendbuf, + } else { + iscsi_buf_init_iov(&r2t->sendbuf, (char*)sc->request_buffer + new_offset, r2t->data_count); + r2t->sg = NULL; + } +} + +static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask, + unsigned long len) +{ + tcp_ctask->pad_count = len & (ISCSI_PAD_LEN - 1); + if (!tcp_ctask->pad_count) + return; + + tcp_ctask->pad_count = ISCSI_PAD_LEN - tcp_ctask->pad_count; + debug_scsi("write padding %d bytes\n", tcp_ctask->pad_count); + tcp_ctask->xmstate |= XMSTATE_W_PAD; } /** @@ -1300,31 +1288,16 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask) if (sc->use_sg) { struct scatterlist *sg = sc->request_buffer; - iscsi_buf_init_sg(&tcp_ctask->sendbuf, - &sg[tcp_ctask->sg_count++]); - tcp_ctask->sg = sg; + iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg); + tcp_ctask->sg = sg + 1; tcp_ctask->bad_sg = sg + sc->use_sg; - } else + } else { iscsi_buf_init_iov(&tcp_ctask->sendbuf, sc->request_buffer, sc->request_bufflen); - - if (ctask->imm_count) - tcp_ctask->xmstate |= XMSTATE_IMM_DATA; - - tcp_ctask->pad_count = ctask->total_length & (ISCSI_PAD_LEN-1); - if (tcp_ctask->pad_count) { - tcp_ctask->pad_count = ISCSI_PAD_LEN - - tcp_ctask->pad_count; - debug_scsi("write padding %d bytes\n", - tcp_ctask->pad_count); - tcp_ctask->xmstate |= XMSTATE_W_PAD; + tcp_ctask->sg = NULL; + tcp_ctask->bad_sg = NULL; } - - if (ctask->unsol_count) - tcp_ctask->xmstate |= XMSTATE_UNS_HDR | - XMSTATE_UNS_INIT; - debug_scsi("cmd [itt 0x%x total %d imm_data %d " "unsol count %d, unsol offset %d]\n", ctask->itt, ctask->total_length, ctask->imm_count, @@ -1410,8 +1383,8 @@ iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) } static inline int -handle_xmstate_r_hdr(struct iscsi_conn *conn, - struct iscsi_tcp_cmd_task *tcp_ctask) +iscsi_send_read_hdr(struct iscsi_conn *conn, + struct iscsi_tcp_cmd_task *tcp_ctask) { int rc; @@ -1429,7 +1402,7 @@ handle_xmstate_r_hdr(struct iscsi_conn *conn, } static inline int -handle_xmstate_w_hdr(struct iscsi_conn *conn, +iscsi_send_write_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; @@ -1440,85 +1413,125 @@ handle_xmstate_w_hdr(struct iscsi_conn *conn, iscsi_hdr_digest(conn, &tcp_ctask->headbuf, (u8*)tcp_ctask->hdrext); rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count); - if (rc) - tcp_ctask->xmstate |= XMSTATE_W_HDR; - return rc; -} - -static inline int -handle_xmstate_data_digest(struct iscsi_conn *conn, - struct iscsi_cmd_task *ctask) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - int rc; - - tcp_ctask->xmstate &= ~XMSTATE_DATA_DIGEST; - debug_tcp("resent data digest 0x%x\n", tcp_ctask->datadigest); - rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf, - &tcp_ctask->datadigest, 0); if (rc) { - tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST; - debug_tcp("resent data digest 0x%x fail!\n", - tcp_ctask->datadigest); + tcp_ctask->xmstate |= XMSTATE_W_HDR; + return rc; } - return rc; -} + if (ctask->imm_count) { + tcp_ctask->xmstate |= XMSTATE_IMM_DATA; + iscsi_set_padding(tcp_ctask, ctask->imm_count); -static inline int -handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - int rc; - - BUG_ON(!ctask->imm_count); - tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA; - - if (conn->datadgst_en) { - iscsi_data_digest_init(tcp_conn, ctask); - tcp_ctask->immdigest = 0; - } - - for (;;) { - rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, - &ctask->imm_count, &tcp_ctask->sent); - if (rc) { - tcp_ctask->xmstate |= XMSTATE_IMM_DATA; - if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, - (u8*)&tcp_ctask->immdigest); - debug_tcp("tx imm sendpage fail 0x%x\n", - tcp_ctask->datadigest); - } - return rc; + if (ctask->conn->datadgst_en) { + iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); + tcp_ctask->immdigest = 0; } - if (conn->datadgst_en) - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); - - if (!ctask->imm_count) - break; - iscsi_buf_init_sg(&tcp_ctask->sendbuf, - &tcp_ctask->sg[tcp_ctask->sg_count++]); - } - - if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) { - rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf, - &tcp_ctask->immdigest, 1); - if (rc) { - debug_tcp("sending imm digest 0x%x fail!\n", - tcp_ctask->immdigest); - return rc; - } - debug_tcp("sending imm digest 0x%x\n", tcp_ctask->immdigest); } + if (ctask->unsol_count) + tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; return 0; } -static inline int -handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +static int +iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +{ + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + int sent = 0, rc; + + if (tcp_ctask->xmstate & XMSTATE_W_PAD) { + iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad, + tcp_ctask->pad_count); + if (conn->datadgst_en) + crypto_digest_update(tcp_conn->data_tx_tfm, + &tcp_ctask->sendbuf.sg, 1); + } else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD)) + return 0; + + tcp_ctask->xmstate &= ~XMSTATE_W_PAD; + tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_PAD; + debug_scsi("sending %d pad bytes for itt 0x%x\n", + tcp_ctask->pad_count, ctask->itt); + rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count, + &sent); + if (rc) { + debug_scsi("padding send failed %d\n", rc); + tcp_ctask->xmstate |= XMSTATE_W_RESEND_PAD; + } + return rc; +} + +static int +iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, + struct iscsi_buf *buf, uint32_t *digest) +{ + struct iscsi_tcp_cmd_task *tcp_ctask; + struct iscsi_tcp_conn *tcp_conn; + int rc, sent = 0; + + if (!conn->datadgst_en) + return 0; + + tcp_ctask = ctask->dd_data; + tcp_conn = conn->dd_data; + + if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) { + crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest); + iscsi_buf_init_iov(buf, (char*)digest, 4); + } + tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST; + + rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent); + if (!rc) + debug_scsi("sent digest 0x%x for itt 0x%x\n", *digest, + ctask->itt); + else { + debug_scsi("sending digest 0x%x failed for itt 0x%x!\n", + *digest, ctask->itt); + tcp_ctask->xmstate |= XMSTATE_W_RESEND_DATA_DIGEST; + } + return rc; +} + +static int +iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf, + struct scatterlist **sg, int *sent, int *count, + struct iscsi_buf *digestbuf, uint32_t *digest) +{ + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + struct iscsi_conn *conn = ctask->conn; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + int rc, buf_sent, offset; + + while (*count) { + buf_sent = 0; + offset = sendbuf->sent; + + rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent); + *sent = *sent + buf_sent; + if (buf_sent && conn->datadgst_en) + partial_sg_digest_update(tcp_conn->data_tx_tfm, + &sendbuf->sg, sendbuf->sg.offset + offset, + buf_sent); + if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) { + iscsi_buf_init_sg(sendbuf, *sg); + *sg = *sg + 1; + } + + if (rc) + return rc; + } + + rc = iscsi_send_padding(conn, ctask); + if (rc) + return rc; + + return iscsi_send_digest(conn, ctask, digestbuf, digest); +} + +static int +iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; struct iscsi_data_task *dtask; @@ -1526,14 +1539,21 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) tcp_ctask->xmstate |= XMSTATE_UNS_DATA; if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) { - dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask; + dtask = &tcp_ctask->unsol_dtask; + iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr); iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr, sizeof(struct iscsi_hdr)); if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &tcp_ctask->headbuf, (u8*)dtask->hdrext); + if (conn->datadgst_en) { + iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); + dtask->digest = 0; + } + tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT; + iscsi_set_padding(tcp_ctask, ctask->data_count); } rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->data_count); @@ -1548,247 +1568,128 @@ handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) return 0; } -static inline int -handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +static int +iscsi_send_unsol_pdu(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) { struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_data_task *dtask = tcp_ctask->dtask; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; int rc; - BUG_ON(!ctask->data_count); - tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA; - - if (conn->datadgst_en) { - iscsi_data_digest_init(tcp_conn, ctask); - dtask->digest = 0; + if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) { + BUG_ON(!ctask->unsol_count); + tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR; +send_hdr: + rc = iscsi_send_unsol_hdr(conn, ctask); + if (rc) + return rc; } - for (;;) { + if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) { + struct iscsi_data_task *dtask = &tcp_ctask->unsol_dtask; int start = tcp_ctask->sent; - rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, - &ctask->data_count, &tcp_ctask->sent); - if (rc) { - ctask->unsol_count -= tcp_ctask->sent - start; - tcp_ctask->xmstate |= XMSTATE_UNS_DATA; - /* will continue with this ctask later.. */ - if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, - (u8 *)&dtask->digest); - debug_tcp("tx uns data fail 0x%x\n", - dtask->digest); - } - return rc; - } - - BUG_ON(tcp_ctask->sent > ctask->total_length); + rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg, + &tcp_ctask->sent, &ctask->data_count, + &dtask->digestbuf, &dtask->digest); ctask->unsol_count -= tcp_ctask->sent - start; - - /* - * XXX:we may run here with un-initial sendbuf. - * so pass it - */ - if (conn->datadgst_en && tcp_ctask->sent - start > 0) - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); - - if (!ctask->data_count) - break; - iscsi_buf_init_sg(&tcp_ctask->sendbuf, - &tcp_ctask->sg[tcp_ctask->sg_count++]); - } - BUG_ON(ctask->unsol_count < 0); - - /* - * Done with the Data-Out. Next, check if we need - * to send another unsolicited Data-Out. - */ - if (ctask->unsol_count) { - if (conn->datadgst_en) { - rc = iscsi_digest_final_send(conn, ctask, - &dtask->digestbuf, - &dtask->digest, 1); - if (rc) { - debug_tcp("send uns digest 0x%x fail\n", - dtask->digest); - return rc; - } - debug_tcp("sending uns digest 0x%x, more uns\n", - dtask->digest); - } - tcp_ctask->xmstate |= XMSTATE_UNS_INIT; - return 1; - } - - if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) { - rc = iscsi_digest_final_send(conn, ctask, - &dtask->digestbuf, - &dtask->digest, 1); - if (rc) { - debug_tcp("send last uns digest 0x%x fail\n", - dtask->digest); + if (rc) return rc; + tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA; + /* + * Done with the Data-Out. Next, check if we need + * to send another unsolicited Data-Out. + */ + if (ctask->unsol_count) { + debug_scsi("sending more uns\n"); + tcp_ctask->xmstate |= XMSTATE_UNS_INIT; + goto send_hdr; } - debug_tcp("sending uns digest 0x%x\n",dtask->digest); } - return 0; } -static inline int -handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) +static int iscsi_send_sol_pdu(struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask) { - struct iscsi_session *session = conn->session; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_r2t_info *r2t = tcp_ctask->r2t; - struct iscsi_data_task *dtask = &r2t->dtask; + struct iscsi_session *session = conn->session; + struct iscsi_r2t_info *r2t; + struct iscsi_data_task *dtask; int left, rc; - tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; - tcp_ctask->dtask = dtask; - - if (conn->datadgst_en) { - iscsi_data_digest_init(tcp_conn, ctask); - dtask->digest = 0; - } -solicit_again: - /* - * send Data-Out within this R2T sequence. - */ - if (!r2t->data_count) - goto data_out_done; - - rc = iscsi_sendpage(conn, &r2t->sendbuf, &r2t->data_count, &r2t->sent); - if (rc) { - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; - /* will continue with this ctask later.. */ - if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, - (u8 *)&dtask->digest); - debug_tcp("r2t data send fail 0x%x\n", dtask->digest); - } - return rc; - } - - BUG_ON(r2t->data_count < 0); - if (conn->datadgst_en) - crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg, - 1); - - if (r2t->data_count) { - BUG_ON(ctask->sc->use_sg == 0); - if (!iscsi_buf_left(&r2t->sendbuf)) { - BUG_ON(tcp_ctask->bad_sg == r2t->sg); - iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg); - r2t->sg += 1; - } - goto solicit_again; - } - -data_out_done: - /* - * Done with this Data-Out. Next, check if we have - * to send another Data-Out for this R2T. - */ - BUG_ON(r2t->data_length - r2t->sent < 0); - left = r2t->data_length - r2t->sent; - if (left) { - if (conn->datadgst_en) { - rc = iscsi_digest_final_send(conn, ctask, - &dtask->digestbuf, - &dtask->digest, 1); - if (rc) { - debug_tcp("send r2t data digest 0x%x" - "fail\n", dtask->digest); - return rc; - } - debug_tcp("r2t data send digest 0x%x\n", - dtask->digest); - } - iscsi_solicit_data_cont(conn, ctask, r2t, left); - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; - return 1; - } + tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + if (!tcp_ctask->r2t) + __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t, + sizeof(void*)); +send_hdr: + r2t = tcp_ctask->r2t; + dtask = &r2t->dtask; - /* - * Done with this R2T. Check if there are more - * outstanding R2Ts ready to be processed. - */ - if (conn->datadgst_en) { - rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf, - &dtask->digest, 1); + if (conn->hdrdgst_en) + iscsi_hdr_digest(conn, &r2t->headbuf, + (u8*)dtask->hdrext); + + if (conn->datadgst_en) { + iscsi_data_digest_init(conn->dd_data, tcp_ctask); + dtask->digest = 0; + } + + rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); if (rc) { - debug_tcp("send last r2t data digest 0x%x" - "fail\n", dtask->digest); + tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; + tcp_ctask->xmstate |= XMSTATE_SOL_HDR; return rc; } - debug_tcp("r2t done dout digest 0x%x\n", dtask->digest); + + iscsi_set_padding(tcp_ctask, r2t->data_count); + debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n", + r2t->solicit_datasn - 1, ctask->itt, r2t->data_count, + r2t->sent); } - tcp_ctask->r2t = NULL; - spin_lock_bh(&session->lock); - __kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*)); - spin_unlock_bh(&session->lock); - if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) { - tcp_ctask->r2t = r2t; - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; - tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; - return 1; - } + if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) { + r2t = tcp_ctask->r2t; + dtask = &r2t->dtask; - return 0; -} + rc = iscsi_send_data(ctask, &r2t->sendbuf, &r2t->sg, + &r2t->sent, &r2t->data_count, + &dtask->digestbuf, &dtask->digest); + if (rc) + return rc; + tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; -static inline int -handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) -{ - struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - struct iscsi_data_task *dtask = tcp_ctask->dtask; - int sent = 0, rc; - - tcp_ctask->xmstate &= ~XMSTATE_W_PAD; - iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad, - tcp_ctask->pad_count); - rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count, - &sent); - if (rc) { - tcp_ctask->xmstate |= XMSTATE_W_PAD; - return rc; - } - - if (conn->datadgst_en) { - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); - /* imm data? */ - if (!dtask) { - rc = iscsi_digest_final_send(conn, ctask, - &tcp_ctask->immbuf, - &tcp_ctask->immdigest, 1); - if (rc) { - debug_tcp("send padding digest 0x%x" - "fail!\n", tcp_ctask->immdigest); - return rc; - } - debug_tcp("done with padding, digest 0x%x\n", - tcp_ctask->datadigest); - } else { - rc = iscsi_digest_final_send(conn, ctask, - &dtask->digestbuf, - &dtask->digest, 1); - if (rc) { - debug_tcp("send padding digest 0x%x" - "fail\n", dtask->digest); - return rc; - } - debug_tcp("done with padding, digest 0x%x\n", - dtask->digest); + /* + * Done with this Data-Out. Next, check if we have + * to send another Data-Out for this R2T. + */ + BUG_ON(r2t->data_length - r2t->sent < 0); + left = r2t->data_length - r2t->sent; + if (left) { + iscsi_solicit_data_cont(conn, ctask, r2t, left); + tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; + goto send_hdr; } - } + /* + * Done with this R2T. Check if there are more + * outstanding R2Ts ready to be processed. + */ + spin_lock_bh(&session->lock); + tcp_ctask->r2t = NULL; + __kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, + sizeof(void*)); + if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, + sizeof(void*))) { + tcp_ctask->r2t = r2t; + tcp_ctask->xmstate |= XMSTATE_SOL_DATA; + tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; + spin_unlock_bh(&session->lock); + goto send_hdr; + } + spin_unlock_bh(&session->lock); + } return 0; } @@ -1808,85 +1709,30 @@ iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) return rc; if (tcp_ctask->xmstate & XMSTATE_R_HDR) - return handle_xmstate_r_hdr(conn, tcp_ctask); + return iscsi_send_read_hdr(conn, tcp_ctask); if (tcp_ctask->xmstate & XMSTATE_W_HDR) { - rc = handle_xmstate_w_hdr(conn, ctask); - if (rc) - return rc; - } - - /* XXX: for data digest xmit recover */ - if (tcp_ctask->xmstate & XMSTATE_DATA_DIGEST) { - rc = handle_xmstate_data_digest(conn, ctask); + rc = iscsi_send_write_hdr(conn, ctask); if (rc) return rc; } if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) { - rc = handle_xmstate_imm_data(conn, ctask); + rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg, + &tcp_ctask->sent, &ctask->imm_count, + &tcp_ctask->immbuf, &tcp_ctask->immdigest); if (rc) return rc; + tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA; } - if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) { - BUG_ON(!ctask->unsol_count); - tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR; -unsolicit_head_again: - rc = handle_xmstate_uns_hdr(conn, ctask); - if (rc) - return rc; - } + rc = iscsi_send_unsol_pdu(conn, ctask); + if (rc) + return rc; - if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) { - rc = handle_xmstate_uns_data(conn, ctask); - if (rc == 1) - goto unsolicit_head_again; - else if (rc) - return rc; - goto done; - } - - if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { - struct iscsi_r2t_info *r2t; - - tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; - tcp_ctask->xmstate |= XMSTATE_SOL_DATA; - if (!tcp_ctask->r2t) - __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t, - sizeof(void*)); -solicit_head_again: - r2t = tcp_ctask->r2t; - if (conn->hdrdgst_en) - iscsi_hdr_digest(conn, &r2t->headbuf, - (u8*)r2t->dtask.hdrext); - rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); - if (rc) { - tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; - tcp_ctask->xmstate |= XMSTATE_SOL_HDR; - return rc; - } - - debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n", - r2t->solicit_datasn - 1, ctask->itt, r2t->data_count, - r2t->sent); - } - - if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) { - rc = handle_xmstate_sol_data(conn, ctask); - if (rc == 1) - goto solicit_head_again; - if (rc) - return rc; - } - -done: - /* - * Last thing to check is whether we need to send write - * padding. Note that we check for xmstate equality, not just the bit. - */ - if (tcp_ctask->xmstate == XMSTATE_W_PAD) - rc = handle_xmstate_w_pad(conn, ctask); + rc = iscsi_send_sol_pdu(conn, ctask); + if (rc) + return rc; return rc; } diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index aace8f70dfd7..7e40e94d9fdc 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -31,23 +31,21 @@ #define IN_PROGRESS_DDIGEST_RECV 0x3 /* xmit state machine */ -#define XMSTATE_IDLE 0x0 -#define XMSTATE_R_HDR 0x1 -#define XMSTATE_W_HDR 0x2 -#define XMSTATE_IMM_HDR 0x4 -#define XMSTATE_IMM_DATA 0x8 -#define XMSTATE_UNS_INIT 0x10 -#define XMSTATE_UNS_HDR 0x20 -#define XMSTATE_UNS_DATA 0x40 -#define XMSTATE_SOL_HDR 0x80 -#define XMSTATE_SOL_DATA 0x100 -#define XMSTATE_W_PAD 0x200 -#define XMSTATE_DATA_DIGEST 0x400 +#define XMSTATE_IDLE 0x0 +#define XMSTATE_R_HDR 0x1 +#define XMSTATE_W_HDR 0x2 +#define XMSTATE_IMM_HDR 0x4 +#define XMSTATE_IMM_DATA 0x8 +#define XMSTATE_UNS_INIT 0x10 +#define XMSTATE_UNS_HDR 0x20 +#define XMSTATE_UNS_DATA 0x40 +#define XMSTATE_SOL_HDR 0x80 +#define XMSTATE_SOL_DATA 0x100 +#define XMSTATE_W_PAD 0x200 +#define XMSTATE_W_RESEND_PAD 0x400 +#define XMSTATE_W_RESEND_DATA_DIGEST 0x800 -#define ISCSI_CONN_RCVBUF_MIN 262144 -#define ISCSI_CONN_SNDBUF_MIN 262144 #define ISCSI_PAD_LEN 4 -#define ISCSI_R2T_MAX 16 #define ISCSI_SG_TABLESIZE SG_ALL #define ISCSI_TCP_MAX_CMD_LEN 16 @@ -162,13 +160,10 @@ struct iscsi_tcp_cmd_task { struct iscsi_queue r2tpool; struct kfifo *r2tqueue; struct iscsi_r2t_info **r2ts; - uint32_t datadigest; /* for recover digest */ int digest_count; uint32_t immdigest; /* for imm data */ struct iscsi_buf immbuf; /* for imm data digest */ - struct iscsi_data_task *dtask; /* data task in progress*/ struct iscsi_data_task unsol_dtask; /* unsol data task */ - int digest_offset; /* for partial buff digest */ }; #endif /* ISCSI_H */ diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 9584cbc082fe..fb65311c81dd 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -325,6 +325,30 @@ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) wake_up(&conn->ehwait); } +static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + char *data, int datalen) +{ + struct iscsi_reject *reject = (struct iscsi_reject *)hdr; + struct iscsi_hdr rejected_pdu; + uint32_t itt; + + conn->exp_statsn = be32_to_cpu(reject->statsn) + 1; + + if (reject->reason == ISCSI_REASON_DATA_DIGEST_ERROR) { + if (ntoh24(reject->dlength) > datalen) + return ISCSI_ERR_PROTO; + + if (ntoh24(reject->dlength) >= sizeof(struct iscsi_hdr)) { + memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr)); + itt = rejected_pdu.itt & ISCSI_ITT_MASK; + printk(KERN_ERR "itt 0x%x had pdu (op 0x%x) rejected " + "due to DataDigest error.\n", itt, + rejected_pdu.opcode); + } + } + return 0; +} + /** * __iscsi_complete_pdu - complete pdu * @conn: iscsi conn @@ -436,6 +460,11 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, break; } } else if (itt == ISCSI_RESERVED_TAG) { + rc = iscsi_check_assign_cmdsn(session, + (struct iscsi_nopin*)hdr); + if (rc) + goto done; + switch(opcode) { case ISCSI_OP_NOOP_IN: if (datalen) { @@ -443,11 +472,6 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, break; } - rc = iscsi_check_assign_cmdsn(session, - (struct iscsi_nopin*)hdr); - if (rc) - break; - if (hdr->ttt == ISCSI_RESERVED_TAG) break; @@ -455,7 +479,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, rc = ISCSI_ERR_CONN_FAILED; break; case ISCSI_OP_REJECT: - /* we need sth like iscsi_reject_rsp()*/ + rc = iscsi_handle_reject(conn, hdr, data, datalen); + break; case ISCSI_OP_ASYNC_EVENT: conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1; /* we need sth like iscsi_async_event_rsp() */ From dd8c0d958621e3137f3e3302f7b8952041a4a1d7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:28 -0400 Subject: [PATCH 0140/1063] [SCSI] scsi_tcp: rm data rx and tx tfms We currently allocated seperate tfms for data and header digests. There is no reason for this since we can never calculate a rx header and digest at the same time. Same for sends. So this patch removes the data tfms and has the send and recv sides use the rx_tfm or tx_tfm. I also made the connection creation code preallocate the tfms because I thought I hit a bug where I changed the digests settings during a relogin but could not allocate the tfm and then we just failed. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 102 ++++++++++++++------------------------- drivers/scsi/iscsi_tcp.h | 8 +-- 2 files changed, 38 insertions(+), 72 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 290c1d76cd40..82399f71028d 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -693,7 +693,7 @@ iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len) struct scatterlist tmp; sg_init_one(&tmp, buf, len); - crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1); + crypto_digest_update(tcp_conn->rx_tfm, &tmp, 1); } static int iscsi_scsi_data_in(struct iscsi_conn *conn) @@ -748,11 +748,11 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) if (conn->datadgst_en) { if (!offset) crypto_digest_update( - tcp_conn->data_rx_tfm, + tcp_conn->rx_tfm, &sg[i], 1); else partial_sg_digest_update( - tcp_conn->data_rx_tfm, + tcp_conn->rx_tfm, &sg[i], sg[i].offset + offset, sg[i].length - offset); @@ -766,7 +766,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) /* * data-in is complete, but buffer not... */ - partial_sg_digest_update(tcp_conn->data_rx_tfm, + partial_sg_digest_update(tcp_conn->rx_tfm, &sg[i], sg[i].offset, sg[i].length-rc); rc = 0; @@ -885,10 +885,8 @@ more: */ rc = iscsi_tcp_hdr_recv(conn); if (!rc && tcp_conn->in.datalen) { - if (conn->datadgst_en) { - BUG_ON(!tcp_conn->data_rx_tfm); - crypto_digest_init(tcp_conn->data_rx_tfm); - } + if (conn->datadgst_en) + crypto_digest_init(tcp_conn->rx_tfm); tcp_conn->in_progress = IN_PROGRESS_DATA_RECV; } else if (rc) { iscsi_conn_failure(conn, rc); @@ -940,10 +938,10 @@ more: tcp_conn->in.padding); memset(pad, 0, tcp_conn->in.padding); sg_init_one(&sg, pad, tcp_conn->in.padding); - crypto_digest_update(tcp_conn->data_rx_tfm, + crypto_digest_update(tcp_conn->rx_tfm, &sg, 1); } - crypto_digest_final(tcp_conn->data_rx_tfm, + crypto_digest_final(tcp_conn->rx_tfm, (u8 *) & tcp_conn->in.datadgst); debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; @@ -1188,7 +1186,7 @@ static inline void iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn, struct iscsi_tcp_cmd_task *tcp_ctask) { - crypto_digest_init(tcp_conn->data_tx_tfm); + crypto_digest_init(tcp_conn->tx_tfm); tcp_ctask->digest_count = 4; } @@ -1444,7 +1442,7 @@ iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad, tcp_ctask->pad_count); if (conn->datadgst_en) - crypto_digest_update(tcp_conn->data_tx_tfm, + crypto_digest_update(tcp_conn->tx_tfm, &tcp_ctask->sendbuf.sg, 1); } else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD)) return 0; @@ -1477,7 +1475,7 @@ iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, tcp_conn = conn->dd_data; if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) { - crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest); + crypto_digest_final(tcp_conn->tx_tfm, (u8*)digest); iscsi_buf_init_iov(buf, (char*)digest, 4); } tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST; @@ -1511,7 +1509,7 @@ iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf, rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent); *sent = *sent + buf_sent; if (buf_sent && conn->datadgst_en) - partial_sg_digest_update(tcp_conn->data_tx_tfm, + partial_sg_digest_update(tcp_conn->tx_tfm, &sendbuf->sg, sendbuf->sg.offset + offset, buf_sent); if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) { @@ -1547,10 +1545,6 @@ iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &tcp_ctask->headbuf, (u8*)dtask->hdrext); - if (conn->datadgst_en) { - iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); - dtask->digest = 0; - } tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT; iscsi_set_padding(tcp_ctask, ctask->data_count); @@ -1563,6 +1557,12 @@ iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) return rc; } + if (conn->datadgst_en) { + dtask = &tcp_ctask->unsol_dtask; + iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); + dtask->digest = 0; + } + debug_scsi("uns dout [itt 0x%x dlen %d sent %d]\n", ctask->itt, ctask->unsol_count, tcp_ctask->sent); return 0; @@ -1629,12 +1629,6 @@ send_hdr: if (conn->hdrdgst_en) iscsi_hdr_digest(conn, &r2t->headbuf, (u8*)dtask->hdrext); - - if (conn->datadgst_en) { - iscsi_data_digest_init(conn->dd_data, tcp_ctask); - dtask->digest = 0; - } - rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); if (rc) { tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; @@ -1642,6 +1636,11 @@ send_hdr: return rc; } + if (conn->datadgst_en) { + iscsi_data_digest_init(conn->dd_data, tcp_ctask); + dtask->digest = 0; + } + iscsi_set_padding(tcp_ctask, r2t->data_count); debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n", r2t->solicit_datasn - 1, ctask->itt, r2t->data_count, @@ -1764,8 +1763,20 @@ iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) /* initial operational parameters */ tcp_conn->hdr_size = sizeof(struct iscsi_hdr); + tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c", 0); + if (!tcp_conn->tx_tfm) + goto free_tcp_conn; + + tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c", 0); + if (!tcp_conn->rx_tfm) + goto free_tx_tfm; + return cls_conn; +free_tx_tfm: + crypto_free_tfm(tcp_conn->tx_tfm); +free_tcp_conn: + kfree(tcp_conn); tcp_conn_alloc_fail: iscsi_conn_teardown(cls_conn); return NULL; @@ -1807,10 +1818,6 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn) crypto_free_tfm(tcp_conn->tx_tfm); if (tcp_conn->rx_tfm) crypto_free_tfm(tcp_conn->rx_tfm); - if (tcp_conn->data_tx_tfm) - crypto_free_tfm(tcp_conn->data_tx_tfm); - if (tcp_conn->data_rx_tfm) - crypto_free_tfm(tcp_conn->data_rx_tfm); } kfree(tcp_conn); @@ -1968,48 +1975,11 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, case ISCSI_PARAM_HDRDGST_EN: iscsi_set_param(cls_conn, param, buf, buflen); tcp_conn->hdr_size = sizeof(struct iscsi_hdr); - if (conn->hdrdgst_en) { + if (conn->hdrdgst_en) tcp_conn->hdr_size += sizeof(__u32); - if (!tcp_conn->tx_tfm) - tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c", - 0); - if (!tcp_conn->tx_tfm) - return -ENOMEM; - if (!tcp_conn->rx_tfm) - tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c", - 0); - if (!tcp_conn->rx_tfm) { - crypto_free_tfm(tcp_conn->tx_tfm); - return -ENOMEM; - } - } else { - if (tcp_conn->tx_tfm) - crypto_free_tfm(tcp_conn->tx_tfm); - if (tcp_conn->rx_tfm) - crypto_free_tfm(tcp_conn->rx_tfm); - } break; case ISCSI_PARAM_DATADGST_EN: iscsi_set_param(cls_conn, param, buf, buflen); - if (conn->datadgst_en) { - if (!tcp_conn->data_tx_tfm) - tcp_conn->data_tx_tfm = - crypto_alloc_tfm("crc32c", 0); - if (!tcp_conn->data_tx_tfm) - return -ENOMEM; - if (!tcp_conn->data_rx_tfm) - tcp_conn->data_rx_tfm = - crypto_alloc_tfm("crc32c", 0); - if (!tcp_conn->data_rx_tfm) { - crypto_free_tfm(tcp_conn->data_tx_tfm); - return -ENOMEM; - } - } else { - if (tcp_conn->data_tx_tfm) - crypto_free_tfm(tcp_conn->data_tx_tfm); - if (tcp_conn->data_rx_tfm) - crypto_free_tfm(tcp_conn->data_rx_tfm); - } tcp_conn->sendpage = conn->datadgst_en ? sock_no_sendpage : tcp_conn->sock->ops->sendpage; break; diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 7e40e94d9fdc..609f4778d125 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -81,10 +81,6 @@ struct iscsi_tcp_conn { * stop to terminate */ /* iSCSI connection-wide sequencing */ int hdr_size; /* PDU header size */ - - struct crypto_tfm *rx_tfm; /* CRC32C (Rx) */ - struct crypto_tfm *data_rx_tfm; /* CRC32C (Rx) for data */ - /* control data */ struct iscsi_tcp_recv in; /* TCP receive context */ int in_progress; /* connection state machine */ @@ -94,9 +90,9 @@ struct iscsi_tcp_conn { void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); - /* xmit */ + /* data and header digests */ struct crypto_tfm *tx_tfm; /* CRC32C (Tx) */ - struct crypto_tfm *data_tx_tfm; /* CRC32C (Tx) for data */ + struct crypto_tfm *rx_tfm; /* CRC32C (Rx) */ /* MIB custom statistics */ uint32_t sendpage_failures_cnt; From 753e7d3866748799e4a8769cd27ea7202654211b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:29 -0400 Subject: [PATCH 0141/1063] [SCSI] iscsi_tcp: fix header resend This patch built over the last ones fixes a bug in the partial header resend code, where we add on another 4 bytes to the send length on the resend. We want just the header plus digest. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 82399f71028d..541912a5b886 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -109,7 +109,7 @@ iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf, struct iscsi_tcp_conn *tcp_conn = conn->dd_data; crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc); - buf->sg.length += sizeof(uint32_t); + buf->sg.length = tcp_conn->hdr_size; } static inline int From d5390f5f788f01788e9dfd41ad516a2908901610 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:30 -0400 Subject: [PATCH 0142/1063] [SCSI] iscsi_tcp: update header size during relogin When we relogin to a target, we have not yet negotiated digests so we must reset the hdr_size var. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 541912a5b886..5d292d0b65ec 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1827,9 +1827,11 @@ static void iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; iscsi_conn_stop(cls_conn, flag); iscsi_tcp_release_conn(conn); + tcp_conn->hdr_size = sizeof(struct iscsi_hdr); } static int From db98ccde0881b8247acb52dece6d94ed770a7aa5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:31 -0400 Subject: [PATCH 0143/1063] [SCSI] libiscsi: only check burst lengths when sending unsol data The first burst length is only relevant if immedate data = Yes or if Initial R2T is No Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index fb65311c81dd..864c6284e83c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1593,7 +1593,8 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn) return -EPERM; } - if (session->first_burst > session->max_burst) { + if ((session->imm_data_en || !session->initial_r2t_en) && + session->first_burst > session->max_burst) { printk("iscsi: invalid burst lengths: " "first_burst %d max_burst %d\n", session->first_burst, session->max_burst); From ca5186842a6d85e982e3d572ecd407453d0c5116 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:32 -0400 Subject: [PATCH 0144/1063] [SCSI] iscsi_tcp: fix partial digest recv When a digest is spread across two network buffers, we currently ignore this and try to check the digest with the partial buffer. Or course this fails. This patch has use iscsi_tcp_copy to copy the whole digest before testing it. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/iscsi_tcp.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 5d292d0b65ec..d91e8949c71e 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -648,10 +648,9 @@ iscsi_ctask_copy(struct iscsi_tcp_conn *tcp_conn, struct iscsi_cmd_task *ctask, * byte counters. **/ static inline int -iscsi_tcp_copy(struct iscsi_conn *conn) +iscsi_tcp_copy(struct iscsi_conn *conn, int buf_size) { struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - int buf_size = tcp_conn->in.datalen; int buf_left = buf_size - tcp_conn->data_copied; int size = min(tcp_conn->in.copy, buf_left); int rc; @@ -812,7 +811,7 @@ iscsi_data_recv(struct iscsi_conn *conn) * Collect data segment to the connection's data * placeholder */ - if (iscsi_tcp_copy(conn)) { + if (iscsi_tcp_copy(conn, tcp_conn->in.datalen)) { rc = -EAGAIN; goto exit; } @@ -899,10 +898,15 @@ more: debug_tcp("extra data_recv offset %d copy %d\n", tcp_conn->in.offset, tcp_conn->in.copy); - skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset, - &recv_digest, 4); - tcp_conn->in.offset += 4; - tcp_conn->in.copy -= 4; + rc = iscsi_tcp_copy(conn, sizeof(uint32_t)); + if (rc) { + if (rc == -EAGAIN) + goto again; + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + return 0; + } + + memcpy(&recv_digest, conn->data, sizeof(uint32_t)); if (recv_digest != tcp_conn->in.datadgst) { debug_tcp("iscsi_tcp: data digest error!" "0x%x != 0x%x\n", recv_digest, @@ -942,9 +946,10 @@ more: &sg, 1); } crypto_digest_final(tcp_conn->rx_tfm, - (u8 *) & tcp_conn->in.datadgst); + (u8 *) &tcp_conn->in.datadgst); debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; + tcp_conn->data_copied = 0; } else tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; } From f47f2cf5d4acf929a3aaa6957c3fc4622c358703 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:33 -0400 Subject: [PATCH 0145/1063] [SCSI] libiscsi: check that command ptr is set before accessing it If the scsi eh sends a TUR and the session is down we could return SCSI_ML_HOST_BUSY. scsi eh will ignore this and send ask us to abort the command and we blindly accesst the command ptr. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 864c6284e83c..12b5c1800740 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -192,6 +192,8 @@ static void iscsi_complete_command(struct iscsi_cmd_task *ctask) ctask->state = ISCSI_TASK_COMPLETED; ctask->sc = NULL; + /* SCSI eh reuses commands to verify us */ + sc->SCp.ptr = NULL; list_del_init(&ctask->running); __kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*)); sc->scsi_done(sc); @@ -737,6 +739,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) sc->scsi_done = done; sc->result = 0; + sc->SCp.ptr = NULL; host = sc->device->host; session = iscsi_hostdata(host->hostdata); @@ -801,9 +804,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) list_add_tail(&ctask->running, &conn->xmitqueue); debug_scsi( - "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n", + "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d " + "win %d]\n", sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", - conn->id, (long)sc, ctask->itt, sc->request_bufflen, + conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen, session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); spin_unlock(&session->lock); @@ -1134,11 +1138,24 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, int iscsi_eh_abort(struct scsi_cmnd *sc) { - struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr; - struct iscsi_conn *conn = ctask->conn; - struct iscsi_session *session = conn->session; + struct iscsi_cmd_task *ctask; + struct iscsi_conn *conn; + struct iscsi_session *session; int rc; + /* + * if session was ISCSI_STATE_IN_RECOVERY then we may not have + * got the command. + */ + if (!sc->SCp.ptr) { + debug_scsi("sc never reached iscsi layer or it completed.\n"); + return SUCCESS; + } + + ctask = (struct iscsi_cmd_task *)sc->SCp.ptr; + conn = ctask->conn; + session = conn->session; + conn->eh_abort_cnt++; debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt); From e648f63c6520d6e572573149c16a64d2c5ad7ec5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:34 -0400 Subject: [PATCH 0146/1063] [SCSI] libiscsi: don't call into lld to cleanup task In the normal IO path we should not be calling back into the LLD since the LLD will have cleaned up the task before or after calling complete pdu. For the fail_command path we still need to do this to force the cleanup. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 12b5c1800740..c542d0e95e68 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -213,12 +213,8 @@ static void iscsi_get_ctask(struct iscsi_cmd_task *ctask) static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask) { - struct iscsi_conn *conn = ctask->conn; - - if (atomic_dec_and_test(&ctask->refcount)) { - conn->session->tt->cleanup_cmd_task(conn, ctask); + if (atomic_dec_and_test(&ctask->refcount)) iscsi_complete_command(ctask); - } } static void iscsi_put_ctask(struct iscsi_cmd_task *ctask) @@ -1129,10 +1125,13 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, sc = ctask->sc; if (!sc) return; + + conn->session->tt->cleanup_cmd_task(conn, ctask); iscsi_ctask_mtask_cleanup(ctask); sc->result = err; sc->resid = sc->request_bufflen; + /* release ref from queuecommand */ __iscsi_put_ctask(ctask); } From 01dfc7fc56f4b7ec0e5344ab44fcf673ebfbf7fa Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 31 Aug 2006 18:09:35 -0400 Subject: [PATCH 0147/1063] [SCSI] iscsi class: update version Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2ecd14188574..7b0019cccce3 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -34,7 +34,7 @@ #define ISCSI_SESSION_ATTRS 11 #define ISCSI_CONN_ATTRS 11 #define ISCSI_HOST_ATTRS 0 -#define ISCSI_TRANSPORT_VERSION "1.1-646" +#define ISCSI_TRANSPORT_VERSION "2.0-685" struct iscsi_internal { int daemon_pid; From 69bdd88ca2670c321fef774e77059516f836c6f2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 1 Sep 2006 15:50:23 +0200 Subject: [PATCH 0148/1063] [SCSI] Wrong size information for devices with disabled read access When accessing a device with disabled read access the capacity is set randomly to 1GB. This makes it impossible to userspace tools to detect invalid device capacities. Signed-off-by: Mike Anderson Acked-by: Chris Mason Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 98bd3aab9739..638cff41d436 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1215,7 +1215,7 @@ repeat: /* Either no media are present but the drive didn't tell us, or they are present but the read capacity command fails */ /* sdkp->media_present = 0; -- not always correct */ - sdkp->capacity = 0x200000; /* 1 GB - random */ + sdkp->capacity = 0; /* unknown mapped to zero - as usual */ return; } else if (the_result && longrc) { From 5a25ba1677ab8d63890016a8c1bca68a3e0fbc7d Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 1 Sep 2006 03:12:19 -0400 Subject: [PATCH 0149/1063] [SCSI] Add Promise SuperTrak driver Add Promise SuperTrak 'stex' driver, supporting SuperTrak EX8350/8300/16350/16300 controllers. The controller's firmware accepts SCSI commands, handing them to the underlying RAID or JBOD disks. The driver consisted of the following cleanups and fixes, beyond its initial submission: Ed Lin: stex: cleanup and minor fixes stex: add new device ids stex: update internal copy code path stex: add hard reset function stex: adjust command timeout in slave_config routine stex: use more efficient method for unload/shutdown flush Jeff Garzik: [SCSI] Add Promise SuperTrak 'shasta' driver. Rename drivers/scsi/shasta.c to stex.c ("SuperTrak EX"). [SCSI] stex: update with community comments from 'Promise SuperTrak' thread [SCSI] stex: Fix warning, trim trailing whitespace. [SCSI] stex: remove last remnants of "shasta" project code name [SCSI] stex: removed 6-byte command emulation [SCSI] stex: minor cleanups [SCSI] stex: minor fixes: irq flag, error return value [SCSI] stex: use dma_alloc_coherent() Signed-off-by: Jeff Garzik Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 7 + drivers/scsi/Makefile | 1 + drivers/scsi/stex.c | 1316 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1324 insertions(+) create mode 100644 drivers/scsi/stex.c diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 7de5fdfdab67..c8c606589ea6 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1070,6 +1070,13 @@ config 53C700_LE_ON_BE depends on SCSI_LASI700 default y +config SCSI_STEX + tristate "Promise SuperTrak EX Series support" + depends on PCI && SCSI + ---help--- + This driver supports Promise SuperTrak EX8350/8300/16350/16300 + Storage controllers. + config SCSI_SYM53C8XX_2 tristate "SYM53C8XX Version 2 SCSI support" depends on PCI && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 83da70decdd1..fd9aeb1ba07f 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_SATA_ULI) += libata.o sata_uli.o obj-$(CONFIG_SCSI_SATA_MV) += libata.o sata_mv.o obj-$(CONFIG_SCSI_PDC_ADMA) += libata.o pdc_adma.o obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o +obj-$(CONFIG_SCSI_STEX) += stex.o obj-$(CONFIG_ARM) += arm/ diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c new file mode 100644 index 000000000000..fd093302bf1a --- /dev/null +++ b/drivers/scsi/stex.c @@ -0,0 +1,1316 @@ +/* + * SuperTrak EX Series Storage Controller driver for Linux + * + * Copyright (C) 2005, 2006 Promise Technology Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Written By: + * Ed Lin + * + * Version: 2.9.0.13 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "stex" +#define ST_DRIVER_VERSION "2.9.0.13" +#define ST_VER_MAJOR 2 +#define ST_VER_MINOR 9 +#define ST_OEM 0 +#define ST_BUILD_VER 13 + +enum { + /* MU register offset */ + IMR0 = 0x10, /* MU_INBOUND_MESSAGE_REG0 */ + IMR1 = 0x14, /* MU_INBOUND_MESSAGE_REG1 */ + OMR0 = 0x18, /* MU_OUTBOUND_MESSAGE_REG0 */ + OMR1 = 0x1c, /* MU_OUTBOUND_MESSAGE_REG1 */ + IDBL = 0x20, /* MU_INBOUND_DOORBELL */ + IIS = 0x24, /* MU_INBOUND_INTERRUPT_STATUS */ + IIM = 0x28, /* MU_INBOUND_INTERRUPT_MASK */ + ODBL = 0x2c, /* MU_OUTBOUND_DOORBELL */ + OIS = 0x30, /* MU_OUTBOUND_INTERRUPT_STATUS */ + OIM = 0x3c, /* MU_OUTBOUND_INTERRUPT_MASK */ + + /* MU register value */ + MU_INBOUND_DOORBELL_HANDSHAKE = 1, + MU_INBOUND_DOORBELL_REQHEADCHANGED = 2, + MU_INBOUND_DOORBELL_STATUSTAILCHANGED = 4, + MU_INBOUND_DOORBELL_HMUSTOPPED = 8, + MU_INBOUND_DOORBELL_RESET = 16, + + MU_OUTBOUND_DOORBELL_HANDSHAKE = 1, + MU_OUTBOUND_DOORBELL_REQUESTTAILCHANGED = 2, + MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED = 4, + MU_OUTBOUND_DOORBELL_BUSCHANGE = 8, + MU_OUTBOUND_DOORBELL_HASEVENT = 16, + + /* MU status code */ + MU_STATE_STARTING = 1, + MU_STATE_FMU_READY_FOR_HANDSHAKE = 2, + MU_STATE_SEND_HANDSHAKE_FRAME = 3, + MU_STATE_STARTED = 4, + MU_STATE_RESETTING = 5, + + MU_MAX_DELAY_TIME = 240000, + MU_HANDSHAKE_SIGNATURE = 0x55aaaa55, + HMU_PARTNER_TYPE = 2, + + /* firmware returned values */ + SRB_STATUS_SUCCESS = 0x01, + SRB_STATUS_ERROR = 0x04, + SRB_STATUS_BUSY = 0x05, + SRB_STATUS_INVALID_REQUEST = 0x06, + SRB_STATUS_SELECTION_TIMEOUT = 0x0A, + SRB_SEE_SENSE = 0x80, + + /* task attribute */ + TASK_ATTRIBUTE_SIMPLE = 0x0, + TASK_ATTRIBUTE_HEADOFQUEUE = 0x1, + TASK_ATTRIBUTE_ORDERED = 0x2, + TASK_ATTRIBUTE_ACA = 0x4, + + /* request count, etc. */ + MU_MAX_REQUEST = 32, + TAG_BITMAP_LENGTH = MU_MAX_REQUEST, + + /* one message wasted, use MU_MAX_REQUEST+1 + to handle MU_MAX_REQUEST messages */ + MU_REQ_COUNT = (MU_MAX_REQUEST + 1), + MU_STATUS_COUNT = (MU_MAX_REQUEST + 1), + + STEX_CDB_LENGTH = MAX_COMMAND_SIZE, + REQ_VARIABLE_LEN = 1024, + STATUS_VAR_LEN = 128, + ST_CAN_QUEUE = MU_MAX_REQUEST, + ST_CMD_PER_LUN = MU_MAX_REQUEST, + ST_MAX_SG = 32, + + /* sg flags */ + SG_CF_EOT = 0x80, /* end of table */ + SG_CF_64B = 0x40, /* 64 bit item */ + SG_CF_HOST = 0x20, /* sg in host memory */ + + ST_MAX_ARRAY_SUPPORTED = 16, + ST_MAX_TARGET_NUM = (ST_MAX_ARRAY_SUPPORTED+1), + ST_MAX_LUN_PER_TARGET = 16, + + st_shasta = 0, + st_vsc = 1, + + PASSTHRU_REQ_TYPE = 0x00000001, + PASSTHRU_REQ_NO_WAKEUP = 0x00000100, + ST_INTERNAL_TIMEOUT = 30, + + /* vendor specific commands of Promise */ + ARRAY_CMD = 0xe0, + CONTROLLER_CMD = 0xe1, + DEBUGGING_CMD = 0xe2, + PASSTHRU_CMD = 0xe3, + + PASSTHRU_GET_ADAPTER = 0x05, + PASSTHRU_GET_DRVVER = 0x10, + CTLR_POWER_STATE_CHANGE = 0x0e, + CTLR_POWER_SAVING = 0x01, + + PASSTHRU_SIGNATURE = 0x4e415041, + + INQUIRY_EVPD = 0x01, +}; + +struct st_sgitem { + u8 ctrl; /* SG_CF_xxx */ + u8 reserved[3]; + __le32 count; + __le32 addr; + __le32 addr_hi; +}; + +struct st_sgtable { + __le16 sg_count; + __le16 max_sg_count; + __le32 sz_in_byte; + struct st_sgitem table[ST_MAX_SG]; +}; + +struct handshake_frame { + __le32 rb_phy; /* request payload queue physical address */ + __le32 rb_phy_hi; + __le16 req_sz; /* size of each request payload */ + __le16 req_cnt; /* count of reqs the buffer can hold */ + __le16 status_sz; /* size of each status payload */ + __le16 status_cnt; /* count of status the buffer can hold */ + __le32 hosttime; /* seconds from Jan 1, 1970 (GMT) */ + __le32 hosttime_hi; + u8 partner_type; /* who sends this frame */ + u8 reserved0[7]; + __le32 partner_ver_major; + __le32 partner_ver_minor; + __le32 partner_ver_oem; + __le32 partner_ver_build; + u32 reserved1[4]; +}; + +struct req_msg { + __le16 tag; + u8 lun; + u8 target; + u8 task_attr; + u8 task_manage; + u8 prd_entry; + u8 payload_sz; /* payload size in 4-byte */ + u8 cdb[STEX_CDB_LENGTH]; + u8 variable[REQ_VARIABLE_LEN]; +}; + +struct status_msg { + __le16 tag; + u8 lun; + u8 target; + u8 srb_status; + u8 scsi_status; + u8 reserved; + u8 payload_sz; /* payload size in 4-byte */ + u8 variable[STATUS_VAR_LEN]; +}; + +struct ver_info { + u32 major; + u32 minor; + u32 oem; + u32 build; + u32 reserved[2]; +}; + +struct st_frame { + u32 base[6]; + u32 rom_addr; + + struct ver_info drv_ver; + struct ver_info bios_ver; + + u32 bus; + u32 slot; + u32 irq_level; + u32 irq_vec; + u32 id; + u32 subid; + + u32 dimm_size; + u8 dimm_type; + u8 reserved[3]; + + u32 channel; + u32 reserved1; +}; + +struct st_drvver { + u32 major; + u32 minor; + u32 oem; + u32 build; + u32 signature[2]; + u8 console_id; + u8 host_no; + u8 reserved0[2]; + u32 reserved[3]; +}; + +#define MU_REQ_BUFFER_SIZE (MU_REQ_COUNT * sizeof(struct req_msg)) +#define MU_STATUS_BUFFER_SIZE (MU_STATUS_COUNT * sizeof(struct status_msg)) +#define MU_BUFFER_SIZE (MU_REQ_BUFFER_SIZE + MU_STATUS_BUFFER_SIZE) +#define STEX_BUFFER_SIZE (MU_BUFFER_SIZE + sizeof(struct st_frame)) + +struct st_ccb { + struct req_msg *req; + struct scsi_cmnd *cmd; + + void *sense_buffer; + unsigned int sense_bufflen; + int sg_count; + + u32 req_type; + u8 srb_status; + u8 scsi_status; +}; + +struct st_hba { + void __iomem *mmio_base; /* iomapped PCI memory space */ + void *dma_mem; + dma_addr_t dma_handle; + + struct Scsi_Host *host; + struct pci_dev *pdev; + + u32 tag; + u32 req_head; + u32 req_tail; + u32 status_head; + u32 status_tail; + + struct status_msg *status_buffer; + void *copy_buffer; /* temp buffer for driver-handled commands */ + struct st_ccb ccb[MU_MAX_REQUEST]; + struct st_ccb *wait_ccb; + wait_queue_head_t waitq; + + unsigned int mu_status; + int out_req_cnt; + + unsigned int cardtype; +}; + +static const char console_inq_page[] = +{ + 0x03,0x00,0x03,0x03,0xFA,0x00,0x00,0x30, + 0x50,0x72,0x6F,0x6D,0x69,0x73,0x65,0x20, /* "Promise " */ + 0x52,0x41,0x49,0x44,0x20,0x43,0x6F,0x6E, /* "RAID Con" */ + 0x73,0x6F,0x6C,0x65,0x20,0x20,0x20,0x20, /* "sole " */ + 0x31,0x2E,0x30,0x30,0x20,0x20,0x20,0x20, /* "1.00 " */ + 0x53,0x58,0x2F,0x52,0x53,0x41,0x46,0x2D, /* "SX/RSAF-" */ + 0x54,0x45,0x31,0x2E,0x30,0x30,0x20,0x20, /* "TE1.00 " */ + 0x0C,0x20,0x20,0x20,0x20,0x20,0x20,0x20 +}; + +MODULE_AUTHOR("Ed Lin"); +MODULE_DESCRIPTION("Promise Technology SuperTrak EX Controllers"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(ST_DRIVER_VERSION); + +static void stex_gettime(__le32 *time) +{ + struct timeval tv; + do_gettimeofday(&tv); + + *time = cpu_to_le32(tv.tv_sec & 0xffffffff); + *(time + 1) = cpu_to_le32((tv.tv_sec >> 16) >> 16); +} + +static u16 __stex_alloc_tag(unsigned long *bitmap) +{ + int i; + i = find_first_zero_bit(bitmap, TAG_BITMAP_LENGTH); + if (i < TAG_BITMAP_LENGTH) + __set_bit(i, bitmap); + return (u16)i; +} + +static u16 stex_alloc_tag(struct st_hba *hba, unsigned long *bitmap) +{ + unsigned long flags; + u16 tag; + + spin_lock_irqsave(hba->host->host_lock, flags); + tag = __stex_alloc_tag(bitmap); + spin_unlock_irqrestore(hba->host->host_lock, flags); + return tag; +} + +static void __stex_free_tag(unsigned long *bitmap, u16 tag) +{ + __clear_bit((int)tag, bitmap); +} + +static void stex_free_tag(struct st_hba *hba, unsigned long *bitmap, u16 tag) +{ + unsigned long flags; + + spin_lock_irqsave(hba->host->host_lock, flags); + __stex_free_tag(bitmap, tag); + spin_unlock_irqrestore(hba->host->host_lock, flags); +} + +static struct status_msg *stex_get_status(struct st_hba *hba) +{ + struct status_msg *status = + hba->status_buffer + hba->status_tail; + + ++hba->status_tail; + hba->status_tail %= MU_STATUS_COUNT; + + return status; +} + +static void stex_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +{ + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; + + cmd->sense_buffer[0] = 0x70; /* fixed format, current */ + cmd->sense_buffer[2] = sk; + cmd->sense_buffer[7] = 18 - 8; /* additional sense length */ + cmd->sense_buffer[12] = asc; + cmd->sense_buffer[13] = ascq; +} + +static void stex_invalid_field(struct scsi_cmnd *cmd, + void (*done)(struct scsi_cmnd *)) +{ + /* "Invalid field in cbd" */ + stex_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0); + done(cmd); +} + +static struct req_msg *stex_alloc_req(struct st_hba *hba) +{ + struct req_msg *req = ((struct req_msg *)hba->dma_mem) + + hba->req_head; + + ++hba->req_head; + hba->req_head %= MU_REQ_COUNT; + + return req; +} + +static int stex_map_sg(struct st_hba *hba, + struct req_msg *req, struct st_ccb *ccb) +{ + struct pci_dev *pdev = hba->pdev; + struct scsi_cmnd *cmd; + dma_addr_t dma_handle; + struct scatterlist *src; + struct st_sgtable *dst; + int i; + + cmd = ccb->cmd; + dst = (struct st_sgtable *)req->variable; + dst->max_sg_count = cpu_to_le16(ST_MAX_SG); + dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen); + + if (cmd->use_sg) { + int n_elem; + + src = (struct scatterlist *) cmd->request_buffer; + n_elem = pci_map_sg(pdev, src, + cmd->use_sg, cmd->sc_data_direction); + if (n_elem <= 0) + return -EIO; + + ccb->sg_count = n_elem; + dst->sg_count = cpu_to_le16((u16)n_elem); + + for (i = 0; i < n_elem; i++, src++) { + dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src)); + dst->table[i].addr = + cpu_to_le32(sg_dma_address(src) & 0xffffffff); + dst->table[i].addr_hi = + cpu_to_le32((sg_dma_address(src) >> 16) >> 16); + dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST; + } + dst->table[--i].ctrl |= SG_CF_EOT; + return 0; + } + + dma_handle = pci_map_single(pdev, cmd->request_buffer, + cmd->request_bufflen, cmd->sc_data_direction); + cmd->SCp.dma_handle = dma_handle; + + ccb->sg_count = 1; + dst->sg_count = cpu_to_le16(1); + dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff); + dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16); + dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen); + dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST; + + return 0; +} + +static void stex_internal_copy(struct scsi_cmnd *cmd, + const void *src, size_t *count, int sg_count) +{ + size_t lcount; + size_t len; + void *s, *d, *base = NULL; + if (*count > cmd->request_bufflen) + *count = cmd->request_bufflen; + lcount = *count; + while (lcount) { + len = lcount; + s = (void *)src; + if (cmd->use_sg) { + size_t offset = *count - lcount; + s += offset; + base = scsi_kmap_atomic_sg(cmd->request_buffer, + sg_count, &offset, &len); + if (base == NULL) { + *count -= lcount; + return; + } + d = base + offset; + } else + d = cmd->request_buffer; + + memcpy(d, s, len); + + lcount -= len; + if (cmd->use_sg) + scsi_kunmap_atomic_sg(base); + } +} + +static int stex_direct_copy(struct scsi_cmnd *cmd, + const void *src, size_t count) +{ + struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0]; + size_t cp_len = count; + int n_elem = 0; + + if (cmd->use_sg) { + n_elem = pci_map_sg(hba->pdev, cmd->request_buffer, + cmd->use_sg, cmd->sc_data_direction); + if (n_elem <= 0) + return 0; + } + + stex_internal_copy(cmd, src, &cp_len, n_elem); + + if (cmd->use_sg) + pci_unmap_sg(hba->pdev, cmd->request_buffer, + cmd->use_sg, cmd->sc_data_direction); + return cp_len == count; +} + +static void stex_controller_info(struct st_hba *hba, struct st_ccb *ccb) +{ + struct st_frame *p; + size_t count = sizeof(struct st_frame); + + p = hba->copy_buffer; + memset(p->base, 0, sizeof(u32)*6); + *(unsigned long *)(p->base) = pci_resource_start(hba->pdev, 0); + p->rom_addr = 0; + + p->drv_ver.major = ST_VER_MAJOR; + p->drv_ver.minor = ST_VER_MINOR; + p->drv_ver.oem = ST_OEM; + p->drv_ver.build = ST_BUILD_VER; + + p->bus = hba->pdev->bus->number; + p->slot = hba->pdev->devfn; + p->irq_level = 0; + p->irq_vec = hba->pdev->irq; + p->id = hba->pdev->vendor << 16 | hba->pdev->device; + p->subid = + hba->pdev->subsystem_vendor << 16 | hba->pdev->subsystem_device; + + stex_internal_copy(ccb->cmd, p, &count, ccb->sg_count); +} + +static void +stex_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag) +{ + req->tag = cpu_to_le16(tag); + req->task_attr = TASK_ATTRIBUTE_SIMPLE; + req->task_manage = 0; /* not supported yet */ + req->payload_sz = (u8)(sizeof(struct req_msg)/sizeof(u32)); + + hba->ccb[tag].req = req; + hba->out_req_cnt++; + + writel(hba->req_head, hba->mmio_base + IMR0); + writel(MU_INBOUND_DOORBELL_REQHEADCHANGED, hba->mmio_base + IDBL); + readl(hba->mmio_base + IDBL); /* flush */ +} + +static int +stex_slave_config(struct scsi_device *sdev) +{ + sdev->use_10_for_rw = 1; + sdev->use_10_for_ms = 1; + sdev->timeout = 60 * HZ; + return 0; +} + +static void +stex_slave_destroy(struct scsi_device *sdev) +{ + struct st_hba *hba = (struct st_hba *) sdev->host->hostdata; + struct req_msg *req; + unsigned long flags; + unsigned long before; + u16 tag; + + if (sdev->type != TYPE_DISK) + return; + + before = jiffies; + while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag)) + == TAG_BITMAP_LENGTH) { + if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ)) + return; + msleep(10); + } + + spin_lock_irqsave(hba->host->host_lock, flags); + req = stex_alloc_req(hba); + memset(req->cdb, 0, STEX_CDB_LENGTH); + + req->target = sdev->id; + req->lun = sdev->channel; /* firmware lun issue work around */ + req->cdb[0] = SYNCHRONIZE_CACHE; + + hba->ccb[tag].cmd = NULL; + hba->ccb[tag].sg_count = 0; + hba->ccb[tag].sense_bufflen = 0; + hba->ccb[tag].sense_buffer = NULL; + hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE; + + stex_send_cmd(hba, req, tag); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + wait_event_timeout(hba->waitq, + !(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ); + if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) + return; + + stex_free_tag(hba, (unsigned long *)&hba->tag, tag); +} + +static int +stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) +{ + struct st_hba *hba; + struct Scsi_Host *host; + unsigned int id,lun; + struct req_msg *req; + u16 tag; + host = cmd->device->host; + id = cmd->device->id; + lun = cmd->device->channel; /* firmware lun issue work around */ + hba = (struct st_hba *) &host->hostdata[0]; + + switch (cmd->cmnd[0]) { + case MODE_SENSE_10: + { + static char ms10_caching_page[12] = + { 0, 0x12, 0, 0, 0, 0, 0, 0, 0x8, 0xa, 0x4, 0 }; + unsigned char page; + page = cmd->cmnd[2] & 0x3f; + if (page == 0x8 || page == 0x3f) { + stex_direct_copy(cmd, ms10_caching_page, + sizeof(ms10_caching_page)); + cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8; + done(cmd); + } else + stex_invalid_field(cmd, done); + return 0; + } + case INQUIRY: + if (id != ST_MAX_ARRAY_SUPPORTED) + break; + if (lun == 0 && (cmd->cmnd[1] & INQUIRY_EVPD) == 0) { + stex_direct_copy(cmd, console_inq_page, + sizeof(console_inq_page)); + cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8; + done(cmd); + } else + stex_invalid_field(cmd, done); + return 0; + case PASSTHRU_CMD: + if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) { + struct st_drvver ver; + ver.major = ST_VER_MAJOR; + ver.minor = ST_VER_MINOR; + ver.oem = ST_OEM; + ver.build = ST_BUILD_VER; + ver.signature[0] = PASSTHRU_SIGNATURE; + ver.console_id = ST_MAX_ARRAY_SUPPORTED; + ver.host_no = hba->host->host_no; + cmd->result = stex_direct_copy(cmd, &ver, sizeof(ver)) ? + DID_OK << 16 | COMMAND_COMPLETE << 8 : + DID_ERROR << 16 | COMMAND_COMPLETE << 8; + done(cmd); + return 0; + } + default: + break; + } + + cmd->scsi_done = done; + + if (unlikely((tag = __stex_alloc_tag((unsigned long *)&hba->tag)) + == TAG_BITMAP_LENGTH)) + return SCSI_MLQUEUE_HOST_BUSY; + + req = stex_alloc_req(hba); + req->lun = lun; + req->target = id; + + /* cdb */ + memcpy(req->cdb, cmd->cmnd, STEX_CDB_LENGTH); + + hba->ccb[tag].cmd = cmd; + hba->ccb[tag].sense_bufflen = SCSI_SENSE_BUFFERSIZE; + hba->ccb[tag].sense_buffer = cmd->sense_buffer; + hba->ccb[tag].req_type = 0; + + if (cmd->sc_data_direction != DMA_NONE) + stex_map_sg(hba, req, &hba->ccb[tag]); + + stex_send_cmd(hba, req, tag); + return 0; +} + +static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd) +{ + if (cmd->sc_data_direction != DMA_NONE) { + if (cmd->use_sg) + pci_unmap_sg(hba->pdev, cmd->request_buffer, + cmd->use_sg, cmd->sc_data_direction); + else + pci_unmap_single(hba->pdev, cmd->SCp.dma_handle, + cmd->request_bufflen, cmd->sc_data_direction); + } +} + +static void stex_scsi_done(struct st_ccb *ccb) +{ + struct scsi_cmnd *cmd = ccb->cmd; + int result; + + if (ccb->srb_status == SRB_STATUS_SUCCESS || ccb->srb_status == 0) { + result = ccb->scsi_status; + switch (ccb->scsi_status) { + case SAM_STAT_GOOD: + result |= DID_OK << 16 | COMMAND_COMPLETE << 8; + break; + case SAM_STAT_CHECK_CONDITION: + result |= DRIVER_SENSE << 24; + break; + case SAM_STAT_BUSY: + result |= DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8; + break; + default: + result |= DID_ERROR << 16 | COMMAND_COMPLETE << 8; + break; + } + } + else if (ccb->srb_status & SRB_SEE_SENSE) + result = DRIVER_SENSE << 24 | SAM_STAT_CHECK_CONDITION; + else switch (ccb->srb_status) { + case SRB_STATUS_SELECTION_TIMEOUT: + result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8; + break; + case SRB_STATUS_BUSY: + result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8; + break; + case SRB_STATUS_INVALID_REQUEST: + case SRB_STATUS_ERROR: + default: + result = DID_ERROR << 16 | COMMAND_COMPLETE << 8; + break; + } + + cmd->result = result; + cmd->scsi_done(cmd); +} + +static void stex_copy_data(struct st_ccb *ccb, + struct status_msg *resp, unsigned int variable) +{ + size_t count = variable; + if (resp->scsi_status != SAM_STAT_GOOD) { + if (ccb->sense_buffer != NULL) + memcpy(ccb->sense_buffer, resp->variable, + min(variable, ccb->sense_bufflen)); + return; + } + + if (ccb->cmd == NULL) + return; + stex_internal_copy(ccb->cmd, resp->variable, &count, ccb->sg_count); +} + +static void stex_mu_intr(struct st_hba *hba, u32 doorbell) +{ + void __iomem *base = hba->mmio_base; + struct status_msg *resp; + struct st_ccb *ccb; + unsigned int size; + u16 tag; + + if (!(doorbell & MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED)) + return; + + /* status payloads */ + hba->status_head = readl(base + OMR1); + if (unlikely(hba->status_head >= MU_STATUS_COUNT)) { + printk(KERN_WARNING DRV_NAME "(%s): invalid status head\n", + pci_name(hba->pdev)); + return; + } + + if (unlikely(hba->mu_status != MU_STATE_STARTED || + hba->out_req_cnt <= 0)) { + hba->status_tail = hba->status_head; + goto update_status; + } + + while (hba->status_tail != hba->status_head) { + resp = stex_get_status(hba); + tag = le16_to_cpu(resp->tag); + if (unlikely(tag >= TAG_BITMAP_LENGTH)) { + printk(KERN_WARNING DRV_NAME + "(%s): invalid tag\n", pci_name(hba->pdev)); + continue; + } + if (unlikely((hba->tag & (1 << tag)) == 0)) { + printk(KERN_WARNING DRV_NAME + "(%s): null tag\n", pci_name(hba->pdev)); + continue; + } + + hba->out_req_cnt--; + ccb = &hba->ccb[tag]; + if (hba->wait_ccb == ccb) + hba->wait_ccb = NULL; + if (unlikely(ccb->req == NULL)) { + printk(KERN_WARNING DRV_NAME + "(%s): lagging req\n", pci_name(hba->pdev)); + __stex_free_tag((unsigned long *)&hba->tag, tag); + stex_unmap_sg(hba, ccb->cmd); /* ??? */ + continue; + } + + size = resp->payload_sz * sizeof(u32); /* payload size */ + if (unlikely(size < sizeof(*resp) - STATUS_VAR_LEN || + size > sizeof(*resp))) { + printk(KERN_WARNING DRV_NAME "(%s): bad status size\n", + pci_name(hba->pdev)); + } else { + size -= sizeof(*resp) - STATUS_VAR_LEN; /* copy size */ + if (size) + stex_copy_data(ccb, resp, size); + } + + ccb->srb_status = resp->srb_status; + ccb->scsi_status = resp->scsi_status; + + if (ccb->req_type & PASSTHRU_REQ_TYPE) { + if (ccb->req_type & PASSTHRU_REQ_NO_WAKEUP) { + ccb->req_type = 0; + continue; + } + ccb->req_type = 0; + if (waitqueue_active(&hba->waitq)) + wake_up(&hba->waitq); + continue; + } + if (ccb->cmd->cmnd[0] == PASSTHRU_CMD && + ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER) + stex_controller_info(hba, ccb); + __stex_free_tag((unsigned long *)&hba->tag, tag); + stex_unmap_sg(hba, ccb->cmd); + stex_scsi_done(ccb); + } + +update_status: + writel(hba->status_head, base + IMR1); + readl(base + IMR1); /* flush */ +} + +static irqreturn_t stex_intr(int irq, void *__hba, struct pt_regs *regs) +{ + struct st_hba *hba = __hba; + void __iomem *base = hba->mmio_base; + u32 data; + unsigned long flags; + int handled = 0; + + spin_lock_irqsave(hba->host->host_lock, flags); + + data = readl(base + ODBL); + + if (data && data != 0xffffffff) { + /* clear the interrupt */ + writel(data, base + ODBL); + readl(base + ODBL); /* flush */ + stex_mu_intr(hba, data); + handled = 1; + } + + spin_unlock_irqrestore(hba->host->host_lock, flags); + + return IRQ_RETVAL(handled); +} + +static int stex_handshake(struct st_hba *hba) +{ + void __iomem *base = hba->mmio_base; + struct handshake_frame *h; + dma_addr_t status_phys; + int i; + + if (readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE) { + writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL); + readl(base + IDBL); + for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE + && i < MU_MAX_DELAY_TIME; i++) { + rmb(); + msleep(1); + } + + if (i == MU_MAX_DELAY_TIME) { + printk(KERN_ERR DRV_NAME + "(%s): no handshake signature\n", + pci_name(hba->pdev)); + return -1; + } + } + + udelay(10); + + h = (struct handshake_frame *)(hba->dma_mem + MU_REQ_BUFFER_SIZE); + h->rb_phy = cpu_to_le32(hba->dma_handle); + h->rb_phy_hi = cpu_to_le32((hba->dma_handle >> 16) >> 16); + h->req_sz = cpu_to_le16(sizeof(struct req_msg)); + h->req_cnt = cpu_to_le16(MU_REQ_COUNT); + h->status_sz = cpu_to_le16(sizeof(struct status_msg)); + h->status_cnt = cpu_to_le16(MU_STATUS_COUNT); + stex_gettime(&h->hosttime); + h->partner_type = HMU_PARTNER_TYPE; + + status_phys = hba->dma_handle + MU_REQ_BUFFER_SIZE; + writel(status_phys, base + IMR0); + readl(base + IMR0); + writel((status_phys >> 16) >> 16, base + IMR1); + readl(base + IMR1); + + writel((status_phys >> 16) >> 16, base + OMR0); /* old fw compatible */ + readl(base + OMR0); + writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL); + readl(base + IDBL); /* flush */ + + udelay(10); + for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE + && i < MU_MAX_DELAY_TIME; i++) { + rmb(); + msleep(1); + } + + if (i == MU_MAX_DELAY_TIME) { + printk(KERN_ERR DRV_NAME + "(%s): no signature after handshake frame\n", + pci_name(hba->pdev)); + return -1; + } + + writel(0, base + IMR0); + readl(base + IMR0); + writel(0, base + OMR0); + readl(base + OMR0); + writel(0, base + IMR1); + readl(base + IMR1); + writel(0, base + OMR1); + readl(base + OMR1); /* flush */ + hba->mu_status = MU_STATE_STARTED; + return 0; +} + +static int stex_abort(struct scsi_cmnd *cmd) +{ + struct Scsi_Host *host = cmd->device->host; + struct st_hba *hba = (struct st_hba *)host->hostdata; + u16 tag; + void __iomem *base; + u32 data; + int result = SUCCESS; + unsigned long flags; + base = hba->mmio_base; + spin_lock_irqsave(host->host_lock, flags); + + for (tag = 0; tag < MU_MAX_REQUEST; tag++) + if (hba->ccb[tag].cmd == cmd && (hba->tag & (1 << tag))) { + hba->wait_ccb = &(hba->ccb[tag]); + break; + } + if (tag >= MU_MAX_REQUEST) + goto out; + + data = readl(base + ODBL); + if (data == 0 || data == 0xffffffff) + goto fail_out; + + writel(data, base + ODBL); + readl(base + ODBL); /* flush */ + + stex_mu_intr(hba, data); + + if (hba->wait_ccb == NULL) { + printk(KERN_WARNING DRV_NAME + "(%s): lost interrupt\n", pci_name(hba->pdev)); + goto out; + } + +fail_out: + hba->wait_ccb->req = NULL; /* nullify the req's future return */ + hba->wait_ccb = NULL; + result = FAILED; +out: + spin_unlock_irqrestore(host->host_lock, flags); + return result; +} + +static void stex_hard_reset(struct st_hba *hba) +{ + struct pci_bus *bus; + int i; + u16 pci_cmd; + u8 pci_bctl; + + for (i = 0; i < 16; i++) + pci_read_config_dword(hba->pdev, i * 4, + &hba->pdev->saved_config_space[i]); + + /* Reset secondary bus. Our controller(MU/ATU) is the only device on + secondary bus. Consult Intel 80331/3 developer's manual for detail */ + bus = hba->pdev->bus; + pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl); + pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl); + msleep(1); + pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl); + + for (i = 0; i < MU_MAX_DELAY_TIME; i++) { + pci_read_config_word(hba->pdev, PCI_COMMAND, &pci_cmd); + if (pci_cmd & PCI_COMMAND_MASTER) + break; + msleep(1); + } + + ssleep(5); + for (i = 0; i < 16; i++) + pci_write_config_dword(hba->pdev, i * 4, + hba->pdev->saved_config_space[i]); +} + +static int stex_reset(struct scsi_cmnd *cmd) +{ + struct st_hba *hba; + unsigned long flags; + hba = (struct st_hba *) &cmd->device->host->hostdata[0]; + + hba->mu_status = MU_STATE_RESETTING; + + if (hba->cardtype == st_shasta) + stex_hard_reset(hba); + + if (stex_handshake(hba)) { + printk(KERN_WARNING DRV_NAME + "(%s): resetting: handshake failed\n", + pci_name(hba->pdev)); + return FAILED; + } + spin_lock_irqsave(hba->host->host_lock, flags); + hba->tag = 0; + hba->req_head = 0; + hba->req_tail = 0; + hba->status_head = 0; + hba->status_tail = 0; + hba->out_req_cnt = 0; + spin_unlock_irqrestore(hba->host->host_lock, flags); + + return SUCCESS; +} + +static int stex_biosparam(struct scsi_device *sdev, + struct block_device *bdev, sector_t capacity, int geom[]) +{ + int heads = 255, sectors = 63, cylinders; + + if (capacity < 0x200000) { + heads = 64; + sectors = 32; + } + + cylinders = sector_div(capacity, heads * sectors); + + geom[0] = heads; + geom[1] = sectors; + geom[2] = cylinders; + + return 0; +} + +static struct scsi_host_template driver_template = { + .module = THIS_MODULE, + .name = DRV_NAME, + .proc_name = DRV_NAME, + .bios_param = stex_biosparam, + .queuecommand = stex_queuecommand, + .slave_configure = stex_slave_config, + .slave_destroy = stex_slave_destroy, + .eh_abort_handler = stex_abort, + .eh_host_reset_handler = stex_reset, + .can_queue = ST_CAN_QUEUE, + .this_id = -1, + .sg_tablesize = ST_MAX_SG, + .cmd_per_lun = ST_CMD_PER_LUN, +}; + +static int stex_set_dma_mask(struct pci_dev * pdev) +{ + int ret; + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK) + && !pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) + return 0; + ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (!ret) + ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + return ret; +} + +static int __devinit +stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct st_hba *hba; + struct Scsi_Host *host; + int err; + + err = pci_enable_device(pdev); + if (err) + return err; + + pci_set_master(pdev); + + host = scsi_host_alloc(&driver_template, sizeof(struct st_hba)); + + if (!host) { + printk(KERN_ERR DRV_NAME "(%s): scsi_host_alloc failed\n", + pci_name(pdev)); + err = -ENOMEM; + goto out_disable; + } + + hba = (struct st_hba *)host->hostdata; + memset(hba, 0, sizeof(struct st_hba)); + + err = pci_request_regions(pdev, DRV_NAME); + if (err < 0) { + printk(KERN_ERR DRV_NAME "(%s): request regions failed\n", + pci_name(pdev)); + goto out_scsi_host_put; + } + + hba->mmio_base = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if ( !hba->mmio_base) { + printk(KERN_ERR DRV_NAME "(%s): memory map failed\n", + pci_name(pdev)); + err = -ENOMEM; + goto out_release_regions; + } + + err = stex_set_dma_mask(pdev); + if (err) { + printk(KERN_ERR DRV_NAME "(%s): set dma mask failed\n", + pci_name(pdev)); + goto out_iounmap; + } + + hba->dma_mem = dma_alloc_coherent(&pdev->dev, + STEX_BUFFER_SIZE, &hba->dma_handle, GFP_KERNEL); + if (!hba->dma_mem) { + err = -ENOMEM; + printk(KERN_ERR DRV_NAME "(%s): dma mem alloc failed\n", + pci_name(pdev)); + goto out_iounmap; + } + + hba->status_buffer = + (struct status_msg *)(hba->dma_mem + MU_REQ_BUFFER_SIZE); + hba->copy_buffer = hba->dma_mem + MU_BUFFER_SIZE; + hba->mu_status = MU_STATE_STARTING; + + hba->cardtype = (unsigned int) id->driver_data; + + /* firmware uses id/lun pair for a logical drive, but lun would be + always 0 if CONFIG_SCSI_MULTI_LUN not configured, so we use + channel to map lun here */ + host->max_channel = ST_MAX_LUN_PER_TARGET - 1; + host->max_id = ST_MAX_TARGET_NUM; + host->max_lun = 1; + host->unique_id = host->host_no; + host->max_cmd_len = STEX_CDB_LENGTH; + + hba->host = host; + hba->pdev = pdev; + init_waitqueue_head(&hba->waitq); + + err = request_irq(pdev->irq, stex_intr, IRQF_SHARED, DRV_NAME, hba); + if (err) { + printk(KERN_ERR DRV_NAME "(%s): request irq failed\n", + pci_name(pdev)); + goto out_pci_free; + } + + err = stex_handshake(hba); + if (err) + goto out_free_irq; + + pci_set_drvdata(pdev, hba); + + err = scsi_add_host(host, &pdev->dev); + if (err) { + printk(KERN_ERR DRV_NAME "(%s): scsi_add_host failed\n", + pci_name(pdev)); + goto out_free_irq; + } + + scsi_scan_host(host); + + return 0; + +out_free_irq: + free_irq(pdev->irq, hba); +out_pci_free: + dma_free_coherent(&pdev->dev, STEX_BUFFER_SIZE, + hba->dma_mem, hba->dma_handle); +out_iounmap: + iounmap(hba->mmio_base); +out_release_regions: + pci_release_regions(pdev); +out_scsi_host_put: + scsi_host_put(host); +out_disable: + pci_disable_device(pdev); + + return err; +} + +static void stex_hba_stop(struct st_hba *hba) +{ + struct req_msg *req; + unsigned long flags; + unsigned long before; + u16 tag; + + before = jiffies; + while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag)) + == TAG_BITMAP_LENGTH) { + if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ)) + return; + msleep(10); + } + + spin_lock_irqsave(hba->host->host_lock, flags); + req = stex_alloc_req(hba); + memset(req->cdb, 0, STEX_CDB_LENGTH); + + req->cdb[0] = CONTROLLER_CMD; + req->cdb[1] = CTLR_POWER_STATE_CHANGE; + req->cdb[2] = CTLR_POWER_SAVING; + + hba->ccb[tag].cmd = NULL; + hba->ccb[tag].sg_count = 0; + hba->ccb[tag].sense_bufflen = 0; + hba->ccb[tag].sense_buffer = NULL; + hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE; + + stex_send_cmd(hba, req, tag); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + wait_event_timeout(hba->waitq, + !(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ); + if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) + return; + + stex_free_tag(hba, (unsigned long *)&hba->tag, tag); +} + +static void stex_hba_free(struct st_hba *hba) +{ + free_irq(hba->pdev->irq, hba); + + iounmap(hba->mmio_base); + + pci_release_regions(hba->pdev); + + dma_free_coherent(&hba->pdev->dev, STEX_BUFFER_SIZE, + hba->dma_mem, hba->dma_handle); +} + +static void stex_remove(struct pci_dev *pdev) +{ + struct st_hba *hba = pci_get_drvdata(pdev); + + scsi_remove_host(hba->host); + + pci_set_drvdata(pdev, NULL); + + stex_hba_stop(hba); + + stex_hba_free(hba); + + scsi_host_put(hba->host); + + pci_disable_device(pdev); +} + +static void stex_shutdown(struct pci_dev *pdev) +{ + struct st_hba *hba = pci_get_drvdata(pdev); + + stex_hba_stop(hba); +} + +static struct pci_device_id stex_pci_tbl[] = { + { 0x105a, 0x8350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0xc350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0xf350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0x4301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0x4302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0x8301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x105a, 0x8302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta }, + { 0x1725, 0x7250, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_vsc }, + { } /* terminate list */ +}; +MODULE_DEVICE_TABLE(pci, stex_pci_tbl); + +static struct pci_driver stex_pci_driver = { + .name = DRV_NAME, + .id_table = stex_pci_tbl, + .probe = stex_probe, + .remove = __devexit_p(stex_remove), + .shutdown = stex_shutdown, +}; + +static int __init stex_init(void) +{ + printk(KERN_INFO DRV_NAME + ": Promise SuperTrak EX Driver version: %s\n", + ST_DRIVER_VERSION); + + return pci_register_driver(&stex_pci_driver); +} + +static void __exit stex_exit(void) +{ + pci_unregister_driver(&stex_pci_driver); +} + +module_init(stex_init); +module_exit(stex_exit); From cf355883f506051a8ce3ac4539752829320b6c8c Mon Sep 17 00:00:00 2001 From: Ed Lin Date: Fri, 1 Sep 2006 14:31:51 +0800 Subject: [PATCH 0150/1063] [SCSI] stex: add shared tags from block Use block shared tags entirely within the driver. In the case of shutdown, assume that there are no other outstanding commands, so tag 0 is fine. Signed-off-by: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 177 ++++++++++++++------------------------------ 1 file changed, 57 insertions(+), 120 deletions(-) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index fd093302bf1a..15fb99f224ee 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -34,6 +34,7 @@ #include #include #include +#include #define DRV_NAME "stex" #define ST_DRIVER_VERSION "2.9.0.13" @@ -95,7 +96,6 @@ enum { /* request count, etc. */ MU_MAX_REQUEST = 32, - TAG_BITMAP_LENGTH = MU_MAX_REQUEST, /* one message wasted, use MU_MAX_REQUEST+1 to handle MU_MAX_REQUEST messages */ @@ -265,7 +265,6 @@ struct st_hba { struct Scsi_Host *host; struct pci_dev *pdev; - u32 tag; u32 req_head; u32 req_tail; u32 status_head; @@ -309,40 +308,6 @@ static void stex_gettime(__le32 *time) *(time + 1) = cpu_to_le32((tv.tv_sec >> 16) >> 16); } -static u16 __stex_alloc_tag(unsigned long *bitmap) -{ - int i; - i = find_first_zero_bit(bitmap, TAG_BITMAP_LENGTH); - if (i < TAG_BITMAP_LENGTH) - __set_bit(i, bitmap); - return (u16)i; -} - -static u16 stex_alloc_tag(struct st_hba *hba, unsigned long *bitmap) -{ - unsigned long flags; - u16 tag; - - spin_lock_irqsave(hba->host->host_lock, flags); - tag = __stex_alloc_tag(bitmap); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return tag; -} - -static void __stex_free_tag(unsigned long *bitmap, u16 tag) -{ - __clear_bit((int)tag, bitmap); -} - -static void stex_free_tag(struct st_hba *hba, unsigned long *bitmap, u16 tag) -{ - unsigned long flags; - - spin_lock_irqsave(hba->host->host_lock, flags); - __stex_free_tag(bitmap, tag); - spin_unlock_irqrestore(hba->host->host_lock, flags); -} - static struct status_msg *stex_get_status(struct st_hba *hba) { struct status_msg *status = @@ -534,58 +499,32 @@ stex_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag) readl(hba->mmio_base + IDBL); /* flush */ } +static int +stex_slave_alloc(struct scsi_device *sdev) +{ + /* Cheat: usually extracted from Inquiry data */ + sdev->tagged_supported = 1; + + scsi_activate_tcq(sdev, sdev->host->can_queue); + + return 0; +} + static int stex_slave_config(struct scsi_device *sdev) { sdev->use_10_for_rw = 1; sdev->use_10_for_ms = 1; sdev->timeout = 60 * HZ; + sdev->tagged_supported = 1; + return 0; } static void stex_slave_destroy(struct scsi_device *sdev) { - struct st_hba *hba = (struct st_hba *) sdev->host->hostdata; - struct req_msg *req; - unsigned long flags; - unsigned long before; - u16 tag; - - if (sdev->type != TYPE_DISK) - return; - - before = jiffies; - while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag)) - == TAG_BITMAP_LENGTH) { - if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ)) - return; - msleep(10); - } - - spin_lock_irqsave(hba->host->host_lock, flags); - req = stex_alloc_req(hba); - memset(req->cdb, 0, STEX_CDB_LENGTH); - - req->target = sdev->id; - req->lun = sdev->channel; /* firmware lun issue work around */ - req->cdb[0] = SYNCHRONIZE_CACHE; - - hba->ccb[tag].cmd = NULL; - hba->ccb[tag].sg_count = 0; - hba->ccb[tag].sense_bufflen = 0; - hba->ccb[tag].sense_buffer = NULL; - hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE; - - stex_send_cmd(hba, req, tag); - spin_unlock_irqrestore(hba->host->host_lock, flags); - - wait_event_timeout(hba->waitq, - !(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ); - if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) - return; - - stex_free_tag(hba, (unsigned long *)&hba->tag, tag); + scsi_deactivate_tcq(sdev, 1); } static int @@ -650,8 +589,9 @@ stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) cmd->scsi_done = done; - if (unlikely((tag = __stex_alloc_tag((unsigned long *)&hba->tag)) - == TAG_BITMAP_LENGTH)) + tag = cmd->request->tag; + + if (unlikely(tag >= host->can_queue)) return SCSI_MLQUEUE_HOST_BUSY; req = stex_alloc_req(hba); @@ -771,26 +711,18 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell) while (hba->status_tail != hba->status_head) { resp = stex_get_status(hba); tag = le16_to_cpu(resp->tag); - if (unlikely(tag >= TAG_BITMAP_LENGTH)) { + if (unlikely(tag >= hba->host->can_queue)) { printk(KERN_WARNING DRV_NAME "(%s): invalid tag\n", pci_name(hba->pdev)); continue; } - if (unlikely((hba->tag & (1 << tag)) == 0)) { - printk(KERN_WARNING DRV_NAME - "(%s): null tag\n", pci_name(hba->pdev)); - continue; - } - hba->out_req_cnt--; ccb = &hba->ccb[tag]; if (hba->wait_ccb == ccb) hba->wait_ccb = NULL; if (unlikely(ccb->req == NULL)) { printk(KERN_WARNING DRV_NAME "(%s): lagging req\n", pci_name(hba->pdev)); - __stex_free_tag((unsigned long *)&hba->tag, tag); - stex_unmap_sg(hba, ccb->cmd); /* ??? */ continue; } @@ -808,7 +740,15 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell) ccb->srb_status = resp->srb_status; ccb->scsi_status = resp->scsi_status; - if (ccb->req_type & PASSTHRU_REQ_TYPE) { + if (likely(ccb->cmd != NULL)) { + if (unlikely(ccb->cmd->cmnd[0] == PASSTHRU_CMD && + ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER)) + stex_controller_info(hba, ccb); + stex_unmap_sg(hba, ccb->cmd); + stex_scsi_done(ccb); + hba->out_req_cnt--; + } else if (ccb->req_type & PASSTHRU_REQ_TYPE) { + hba->out_req_cnt--; if (ccb->req_type & PASSTHRU_REQ_NO_WAKEUP) { ccb->req_type = 0; continue; @@ -816,14 +756,7 @@ static void stex_mu_intr(struct st_hba *hba, u32 doorbell) ccb->req_type = 0; if (waitqueue_active(&hba->waitq)) wake_up(&hba->waitq); - continue; } - if (ccb->cmd->cmnd[0] == PASSTHRU_CMD && - ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER) - stex_controller_info(hba, ccb); - __stex_free_tag((unsigned long *)&hba->tag, tag); - stex_unmap_sg(hba, ccb->cmd); - stex_scsi_done(ccb); } update_status: @@ -933,21 +866,24 @@ static int stex_abort(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct st_hba *hba = (struct st_hba *)host->hostdata; - u16 tag; + u16 tag = cmd->request->tag; void __iomem *base; u32 data; int result = SUCCESS; unsigned long flags; base = hba->mmio_base; spin_lock_irqsave(host->host_lock, flags); - - for (tag = 0; tag < MU_MAX_REQUEST; tag++) - if (hba->ccb[tag].cmd == cmd && (hba->tag & (1 << tag))) { - hba->wait_ccb = &(hba->ccb[tag]); - break; - } - if (tag >= MU_MAX_REQUEST) - goto out; + if (tag < host->can_queue && hba->ccb[tag].cmd == cmd) + hba->wait_ccb = &hba->ccb[tag]; + else { + for (tag = 0; tag < host->can_queue; tag++) + if (hba->ccb[tag].cmd == cmd) { + hba->wait_ccb = &hba->ccb[tag]; + break; + } + if (tag >= host->can_queue) + goto out; + } data = readl(base + ODBL); if (data == 0 || data == 0xffffffff) @@ -965,6 +901,7 @@ static int stex_abort(struct scsi_cmnd *cmd) } fail_out: + stex_unmap_sg(hba, cmd); hba->wait_ccb->req = NULL; /* nullify the req's future return */ hba->wait_ccb = NULL; result = FAILED; @@ -1025,7 +962,6 @@ static int stex_reset(struct scsi_cmnd *cmd) return FAILED; } spin_lock_irqsave(hba->host->host_lock, flags); - hba->tag = 0; hba->req_head = 0; hba->req_tail = 0; hba->status_head = 0; @@ -1061,6 +997,7 @@ static struct scsi_host_template driver_template = { .proc_name = DRV_NAME, .bios_param = stex_biosparam, .queuecommand = stex_queuecommand, + .slave_alloc = stex_slave_alloc, .slave_configure = stex_slave_config, .slave_destroy = stex_slave_destroy, .eh_abort_handler = stex_abort, @@ -1171,6 +1108,14 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto out_free_irq; + scsi_init_shared_tag_map(host, ST_CAN_QUEUE); + if (host->bqt == NULL) { + err = -ENOMEM; + printk(KERN_ERR DRV_NAME "(%s): init shared queue failed\n", + pci_name(pdev)); + goto out_free_irq; + } + pci_set_drvdata(pdev, hba); err = scsi_add_host(host, &pdev->dev); @@ -1206,15 +1151,7 @@ static void stex_hba_stop(struct st_hba *hba) struct req_msg *req; unsigned long flags; unsigned long before; - u16 tag; - - before = jiffies; - while ((tag = stex_alloc_tag(hba, (unsigned long *)&hba->tag)) - == TAG_BITMAP_LENGTH) { - if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ)) - return; - msleep(10); - } + u16 tag = 0; spin_lock_irqsave(hba->host->host_lock, flags); req = stex_alloc_req(hba); @@ -1233,12 +1170,12 @@ static void stex_hba_stop(struct st_hba *hba) stex_send_cmd(hba, req, tag); spin_unlock_irqrestore(hba->host->host_lock, flags); - wait_event_timeout(hba->waitq, - !(hba->ccb[tag].req_type), ST_INTERNAL_TIMEOUT * HZ); - if (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) - return; - - stex_free_tag(hba, (unsigned long *)&hba->tag, tag); + before = jiffies; + while (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) { + if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ)) + return; + msleep(10); + } } static void stex_hba_free(struct st_hba *hba) From deb81d80ba27da8dfabc29ccb5977db8f4942a0a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 1 Sep 2006 09:28:48 -0400 Subject: [PATCH 0151/1063] [SCSI] add failure return to scsi_init_shared_tag_map() And use it in the stex driver. Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 5 ++--- include/scsi/scsi_tcq.h | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 15fb99f224ee..3cf3106a29b8 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -1108,9 +1108,8 @@ stex_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto out_free_irq; - scsi_init_shared_tag_map(host, ST_CAN_QUEUE); - if (host->bqt == NULL) { - err = -ENOMEM; + err = scsi_init_shared_tag_map(host, ST_CAN_QUEUE); + if (err) { printk(KERN_ERR DRV_NAME "(%s): init shared queue failed\n", pci_name(pdev)); goto out_free_irq; diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 4eea254b1ce9..d04d05adfa9b 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -138,9 +138,10 @@ static inline struct scsi_cmnd *scsi_find_tag(struct scsi_device *sdev, int tag) * @shost: the host to share the tag map among all devices * @depth: the total depth of the map */ -static inline void scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) +static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) { shost->bqt = blk_init_tags(depth); + return shost->bqt ? 0 : -ENOMEM; } #endif /* _SCSI_SCSI_TCQ_H */ From 84314fd4740ad73550c76dee4a9578979d84af48 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:30:09 -0400 Subject: [PATCH 0152/1063] [SCSI] SCSI and FC Transport: add netlink support for posting of transport events This patch formally adds support for the posting of FC events via netlink. It is a followup to the original RFC at: http://marc.theaimsgroup.com/?l=linux-scsi&m=114530667923464&w=2 and the initial posting at: http://marc.theaimsgroup.com/?l=linux-scsi&m=115507374832500&w=2 The patch has been updated to optimize the send path, per the discussions in the initial posting. Per discussions at the Storage Summit and at OLS, we are to use netlink for async events from transports. Also per discussions, to avoid a netlink protocol per transport, I've create a single NETLINK_SCSITRANSPORT protocol, which can then be used by all transports. This patch: - Creates new files scsi_netlink.c and scsi_netlink.h, which contains the single and shared definitions for the SCSI Transport. It is tied into the base SCSI subsystem intialization. Contains a single interface routine, scsi_send_transport_event(), for a transport to send an event (via multicast to a protocol specific group). - Creates a new scsi_netlink_fc.h file, which contains the FC netlink event messages - Adds 3 new routines to the fc transport: fc_get_event_number() - to get a FC event # fc_host_post_event() - to send a simple FC event (32 bits of data) fc_host_post_vendor_event() - to send a Vendor unique event, with arbitrary amounts of data. Note: the separation of event number allows for a LLD to send a standard event, followed by vendor-specific data for the event. Note: This patch assumes 2 prior fc transport patches have been installed: http://marc.theaimsgroup.com/?l=linux-scsi&m=115555807316329&w=2 http://marc.theaimsgroup.com/?l=linux-scsi&m=115581614930261&w=2 Sorry - next time I'll do something like making these individual patches of the same posting when I know they'll be posted closely together. Signed-off-by: James Smart Tidy up configuration not to make SCSI always select NET Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 6 + drivers/scsi/Makefile | 1 + drivers/scsi/scsi.c | 3 + drivers/scsi/scsi_netlink.c | 199 ++++++++++++++++++++++++++++++ drivers/scsi/scsi_priv.h | 11 ++ drivers/scsi/scsi_transport_fc.c | 200 ++++++++++++++++++++++++++++++- include/linux/netlink.h | 2 + include/scsi/scsi_netlink.h | 86 +++++++++++++ include/scsi/scsi_netlink_fc.h | 71 +++++++++++ include/scsi/scsi_transport_fc.h | 34 ++++++ 10 files changed, 612 insertions(+), 1 deletion(-) create mode 100644 drivers/scsi/scsi_netlink.c create mode 100644 include/scsi/scsi_netlink.h create mode 100644 include/scsi/scsi_netlink_fc.h diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index c8c606589ea6..4d1998d23f0f 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -27,6 +27,11 @@ config SCSI However, do not compile this as a module if your root file system (the one containing the directory /) is located on a SCSI device. +config SCSI_NETLINK + tristate + default n + select NET + config SCSI_PROC_FS bool "legacy /proc/scsi/ support" depends on SCSI && PROC_FS @@ -222,6 +227,7 @@ config SCSI_SPI_ATTRS config SCSI_FC_ATTRS tristate "FiberChannel Transport Attributes" depends on SCSI + select SCSI_NETLINK help If you wish to export transport-specific information about each attached FiberChannel device to sysfs, say Y. diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fd9aeb1ba07f..8fc2c594b537 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -159,6 +159,7 @@ scsi_mod-y += scsi.o hosts.o scsi_ioctl.o constants.o \ scsicam.o scsi_error.o scsi_lib.o \ scsi_scan.o scsi_sysfs.o \ scsi_devinfo.o +scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 37843927e47f..eedfd059b82b 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1118,6 +1118,8 @@ static int __init init_scsi(void) for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); + scsi_netlink_init(); + printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; @@ -1138,6 +1140,7 @@ cleanup_queue: static void __exit exit_scsi(void) { + scsi_netlink_exit(); scsi_sysfs_unregister(); scsi_exit_sysctl(); scsi_exit_hosts(); diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c new file mode 100644 index 000000000000..1b59b27e887f --- /dev/null +++ b/drivers/scsi/scsi_netlink.c @@ -0,0 +1,199 @@ +/* + * scsi_netlink.c - SCSI Transport Netlink Interface + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include +#include "scsi_priv.h" + +struct sock *scsi_nl_sock = NULL; +EXPORT_SYMBOL_GPL(scsi_nl_sock); + + +/** + * scsi_nl_rcv_msg - + * Receive message handler. Extracts message from a receive buffer. + * Validates message header and calls appropriate transport message handler + * + * @skb: socket receive buffer + * + **/ +static void +scsi_nl_rcv_msg(struct sk_buff *skb) +{ + struct nlmsghdr *nlh; + struct scsi_nl_hdr *hdr; + uint32_t rlen; + int err; + + while (skb->len >= NLMSG_SPACE(0)) { + err = 0; + + nlh = (struct nlmsghdr *) skb->data; + if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) || + (skb->len < nlh->nlmsg_len)) { + printk(KERN_WARNING "%s: discarding partial skb\n", + __FUNCTION__); + return; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + + if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) { + err = -EBADMSG; + goto next_msg; + } + + hdr = NLMSG_DATA(nlh); + if ((hdr->version != SCSI_NL_VERSION) || + (hdr->magic != SCSI_NL_MAGIC)) { + err = -EPROTOTYPE; + goto next_msg; + } + + if (security_netlink_recv(skb, CAP_SYS_ADMIN)) { + err = -EPERM; + goto next_msg; + } + + if (nlh->nlmsg_len < (sizeof(*nlh) + hdr->msglen)) { + printk(KERN_WARNING "%s: discarding partial message\n", + __FUNCTION__); + return; + } + + /* + * We currently don't support anyone sending us a message + */ + +next_msg: + if ((err) || (nlh->nlmsg_flags & NLM_F_ACK)) + netlink_ack(skb, nlh, err); + + skb_pull(skb, rlen); + } +} + + +/** + * scsi_nl_rcv_msg - + * Receive handler for a socket. Extracts a received message buffer from + * the socket, and starts message processing. + * + * @sk: socket + * @len: unused + * + **/ +static void +scsi_nl_rcv(struct sock *sk, int len) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) { + scsi_nl_rcv_msg(skb); + kfree_skb(skb); + } +} + + +/** + * scsi_nl_rcv_event - + * Event handler for a netlink socket. + * + * @this: event notifier block + * @event: event type + * @ptr: event payload + * + **/ +static int +scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (n->protocol != NETLINK_SCSITRANSPORT) + return NOTIFY_DONE; + + /* + * Currently, we are not tracking PID's, etc. There is nothing + * to handle. + */ + + return NOTIFY_DONE; +} + +static struct notifier_block scsi_netlink_notifier = { + .notifier_call = scsi_nl_rcv_event, +}; + + +/** + * scsi_netlink_init - + * Called by SCSI subsystem to intialize the SCSI transport netlink + * interface + * + **/ +void +scsi_netlink_init(void) +{ + int error; + + error = netlink_register_notifier(&scsi_netlink_notifier); + if (error) { + printk(KERN_ERR "%s: register of event handler failed - %d\n", + __FUNCTION__, error); + return; + } + + scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE); + if (!scsi_nl_sock) { + printk(KERN_ERR "%s: register of recieve handler failed\n", + __FUNCTION__); + netlink_unregister_notifier(&scsi_netlink_notifier); + } + + return; +} + + +/** + * scsi_netlink_exit - + * Called by SCSI subsystem to disable the SCSI transport netlink + * interface + * + **/ +void +scsi_netlink_exit(void) +{ + if (scsi_nl_sock) { + sock_release(scsi_nl_sock->sk_socket); + netlink_unregister_notifier(&scsi_netlink_notifier); + } + + return; +} + + diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index ae24c85aaeea..5d023d44e5e7 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -8,6 +8,7 @@ struct scsi_cmnd; struct scsi_device; struct scsi_host_template; struct Scsi_Host; +struct scsi_nl_hdr; /* @@ -110,6 +111,16 @@ extern void __scsi_remove_device(struct scsi_device *); extern struct bus_type scsi_bus_type; +/* scsi_netlink.c */ +#ifdef CONFIG_SCSI_NETLINK +extern void scsi_netlink_init(void); +extern void scsi_netlink_exit(void); +extern struct sock *scsi_nl_sock; +#else +static inline void scsi_netlink_init(void) {} +static inline void scsi_netlink_exit(void) {} +#endif + /* * internal scsi timeout functions: for use by mid-layer and transport * classes. diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 79d31ca2b741..05989f130554 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include "scsi_priv.h" static int fc_queue_work(struct Scsi_Host *, struct work_struct *); @@ -93,6 +96,29 @@ fc_enum_name_search(port_type, fc_port_type, fc_port_type_names) #define FC_PORTTYPE_MAX_NAMELEN 50 +/* Convert fc_host_event_code values to ascii string name */ +static const struct { + enum fc_host_event_code value; + char *name; +} fc_host_event_code_names[] = { + { FCH_EVT_LIP, "lip" }, + { FCH_EVT_LINKUP, "link_up" }, + { FCH_EVT_LINKDOWN, "link_down" }, + { FCH_EVT_LIPRESET, "lip_reset" }, + { FCH_EVT_RSCN, "rscn" }, + { FCH_EVT_ADAPTER_CHANGE, "adapter_chg" }, + { FCH_EVT_PORT_UNKNOWN, "port_unknown" }, + { FCH_EVT_PORT_ONLINE, "port_online" }, + { FCH_EVT_PORT_OFFLINE, "port_offline" }, + { FCH_EVT_PORT_FABRIC, "port_fabric" }, + { FCH_EVT_LINK_UNKNOWN, "link_unknown" }, + { FCH_EVT_VENDOR_UNIQUE, "vendor_unique" }, +}; +fc_enum_name_search(host_event_code, fc_host_event_code, + fc_host_event_code_names) +#define FC_HOST_EVENT_CODE_MAX_NAMELEN 30 + + /* Convert fc_port_state values to ascii string name */ static struct { enum fc_port_state value; @@ -377,10 +403,182 @@ MODULE_PARM_DESC(dev_loss_tmo, " exceeded, the scsi target is removed. Value should be" " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT."); +/** + * Netlink Infrastructure + **/ + +static atomic_t fc_event_seq; + +/** + * fc_get_event_number - Obtain the next sequential FC event number + * + * Notes: + * We could have inline'd this, but it would have required fc_event_seq to + * be exposed. For now, live with the subroutine call. + * Atomic used to avoid lock/unlock... + **/ +u32 +fc_get_event_number(void) +{ + return atomic_add_return(1, &fc_event_seq); +} +EXPORT_SYMBOL(fc_get_event_number); + + +/** + * fc_host_post_event - called to post an even on an fc_host. + * + * @shost: host the event occurred on + * @event_number: fc event number obtained from get_fc_event_number() + * @event_code: fc_host event being posted + * @event_data: 32bits of data for the event being posted + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +void +fc_host_post_event(struct Scsi_Host *shost, u32 event_number, + enum fc_host_event_code event_code, u32 event_data) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct fc_nl_event *event; + const char *name; + u32 len, skblen; + int err; + + if (!scsi_nl_sock) { + err = -ENOENT; + goto send_fail; + } + + len = FC_NL_MSGALIGN(sizeof(*event)); + skblen = NLMSG_SPACE(len); + + skb = alloc_skb(skblen, GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto send_fail; + } + + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, + skblen - sizeof(*nlh), 0); + if (!nlh) { + err = -ENOBUFS; + goto send_fail_skb; + } + event = NLMSG_DATA(nlh); + + INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, + FC_NL_ASYNC_EVENT, len); + event->seconds = get_seconds(); + event->vendor_id = 0; + event->host_no = shost->host_no; + event->event_datalen = sizeof(u32); /* bytes */ + event->event_num = event_number; + event->event_code = event_code; + event->event_data = event_data; + + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + if (err && (err != -ESRCH)) /* filter no recipient errors */ + /* nlmsg_multicast already kfree_skb'd */ + goto send_fail; + + return; + +send_fail_skb: + kfree_skb(skb); +send_fail: + name = get_fc_host_event_code_name(event_code); + printk(KERN_WARNING + "%s: Dropped Event : host %d %s data 0x%08x - err %d\n", + __FUNCTION__, shost->host_no, + (name) ? name : "", event_data, err); + return; +} +EXPORT_SYMBOL(fc_host_post_event); + + +/** + * fc_host_post_vendor_event - called to post a vendor unique event on + * a fc_host + * + * @shost: host the event occurred on + * @event_number: fc event number obtained from get_fc_event_number() + * @data_len: amount, in bytes, of vendor unique data + * @data_buf: pointer to vendor unique data + * + * Notes: + * This routine assumes no locks are held on entry. + **/ +void +fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, + u32 data_len, char * data_buf, u32 vendor_id) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct fc_nl_event *event; + u32 len, skblen; + int err; + + if (!scsi_nl_sock) { + err = -ENOENT; + goto send_vendor_fail; + } + + len = FC_NL_MSGALIGN(sizeof(*event) + data_len); + skblen = NLMSG_SPACE(len); + + skb = alloc_skb(skblen, GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto send_vendor_fail; + } + + nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG, + skblen - sizeof(*nlh), 0); + if (!nlh) { + err = -ENOBUFS; + goto send_vendor_fail_skb; + } + event = NLMSG_DATA(nlh); + + INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC, + FC_NL_ASYNC_EVENT, len); + event->seconds = get_seconds(); + event->vendor_id = vendor_id; + event->host_no = shost->host_no; + event->event_datalen = data_len; /* bytes */ + event->event_num = event_number; + event->event_code = FCH_EVT_VENDOR_UNIQUE; + memcpy(&event->event_data, data_buf, data_len); + + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + if (err && (err != -ESRCH)) /* filter no recipient errors */ + /* nlmsg_multicast already kfree_skb'd */ + goto send_vendor_fail; + + return; + +send_vendor_fail_skb: + kfree_skb(skb); +send_vendor_fail: + printk(KERN_WARNING + "%s: Dropped Event : host %d vendor_unique - err %d\n", + __FUNCTION__, shost->host_no, err); + return; +} +EXPORT_SYMBOL(fc_host_post_vendor_event); + + static __init int fc_transport_init(void) { - int error = transport_class_register(&fc_host_class); + int error; + + atomic_set(&fc_event_seq, 0); + + error = transport_class_register(&fc_host_class); if (error) return error; error = transport_class_register(&fc_rport_class); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 855b44668caa..66411622e06e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -21,6 +21,8 @@ #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 +/* leave room for NETLINK_DM (DM Events) */ +#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ #define MAX_LINKS 32 diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h new file mode 100644 index 000000000000..7a3a20e640c0 --- /dev/null +++ b/include/scsi/scsi_netlink.h @@ -0,0 +1,86 @@ +/* + * SCSI Transport Netlink Interface + * Used for the posting of outbound SCSI transport events + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef SCSI_NETLINK_H +#define SCSI_NETLINK_H + +/* + * This file intended to be included by both kernel and user space + */ + +/* Single Netlink Message type to send all SCSI Transport messages */ +#define SCSI_TRANSPORT_MSG NLMSG_MIN_TYPE + 1 + +/* SCSI Transport Broadcast Groups */ + /* leaving groups 0 and 1 unassigned */ +#define SCSI_NL_GRP_FC_EVENTS (1<<2) /* Group 2 */ +#define SCSI_NL_GRP_CNT 3 + + +/* SCSI_TRANSPORT_MSG event message header */ +struct scsi_nl_hdr { + uint8_t version; + uint8_t transport; + uint16_t magic; + uint16_t msgtype; + uint16_t msglen; +} __attribute__((aligned(sizeof(uint64_t)))); + +/* scsi_nl_hdr->version value */ +#define SCSI_NL_VERSION 1 + +/* scsi_nl_hdr->magic value */ +#define SCSI_NL_MAGIC 0xA1B2 + +/* scsi_nl_hdr->transport value */ +#define SCSI_NL_TRANSPORT 0 +#define SCSI_NL_TRANSPORT_FC 1 +#define SCSI_NL_MAX_TRANSPORTS 2 + +/* scsi_nl_hdr->msgtype values are defined in each transport */ + + +/* + * Vendor ID: + * If transports post vendor-unique events, they must pass a well-known + * 32-bit vendor identifier. This identifier consists of 8 bits indicating + * the "type" of identifier contained, and 24 bits of id data. + * + * Identifiers for each type: + * PCI : ID data is the 16 bit PCI Registered Vendor ID + */ +#define SCSI_NL_VID_ID_MASK 0x00FFFFFF +#define SCSI_NL_VID_TYPE_MASK 0xFF000000 +#define SCSI_NL_VID_TYPE_PCI 0x01000000 + + +#define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen) \ + { \ + (hdr)->version = SCSI_NL_VERSION; \ + (hdr)->transport = t; \ + (hdr)->magic = SCSI_NL_MAGIC; \ + (hdr)->msgtype = mtype; \ + (hdr)->msglen = mlen; \ + } + + +#endif /* SCSI_NETLINK_H */ + diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h new file mode 100644 index 000000000000..b213d2909fed --- /dev/null +++ b/include/scsi/scsi_netlink_fc.h @@ -0,0 +1,71 @@ +/* + * FC Transport Netlink Interface + * + * Copyright (C) 2006 James Smart, Emulex Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef SCSI_NETLINK_FC_H +#define SCSI_NETLINK_FC_H + +#include + +/* + * This file intended to be included by both kernel and user space + */ + +/* + * FC Transport Message Types + */ + /* kernel -> user */ +#define FC_NL_ASYNC_EVENT 0x0100 + /* user -> kernel */ +/* none */ + + +/* + * Message Structures : + */ + +/* macro to round up message lengths to 8byte boundary */ +#define FC_NL_MSGALIGN(len) (((len) + 7) & ~7) + + +/* + * FC Transport Broadcast Event Message : + * FC_NL_ASYNC_EVENT + * + * Note: if Vendor Unique message, &event_data will be start of + * vendor unique payload, and the length of the payload is + * per event_datalen + * + * Note: When specifying vendor_id, be sure to read the Vendor Type and ID + * formatting requirements specified in scsi_netlink.h + */ +struct fc_nl_event { + struct scsi_nl_hdr snlh; /* must be 1st element ! */ + uint64_t seconds; + uint32_t vendor_id; + uint16_t host_no; + uint16_t event_datalen; + uint32_t event_num; + uint32_t event_code; + uint32_t event_data; +} __attribute__((aligned(sizeof(uint64_t)))); + + +#endif /* SCSI_NETLINK_FC_H */ + diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index c74be5dabfeb..f91c5358af3a 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -29,6 +29,7 @@ #include #include +#include struct scsi_transport_template; @@ -283,6 +284,30 @@ struct fc_host_statistics { }; +/* + * FC Event Codes - Polled and Async, following FC HBAAPI v2.0 guidelines + */ + +/* + * fc_host_event_code: If you alter this, you also need to alter + * scsi_transport_fc.c (for the ascii descriptions). + */ +enum fc_host_event_code { + FCH_EVT_LIP = 0x1, + FCH_EVT_LINKUP = 0x2, + FCH_EVT_LINKDOWN = 0x3, + FCH_EVT_LIPRESET = 0x4, + FCH_EVT_RSCN = 0x5, + FCH_EVT_ADAPTER_CHANGE = 0x103, + FCH_EVT_PORT_UNKNOWN = 0x200, + FCH_EVT_PORT_OFFLINE = 0x201, + FCH_EVT_PORT_ONLINE = 0x202, + FCH_EVT_PORT_FABRIC = 0x204, + FCH_EVT_LINK_UNKNOWN = 0x500, + FCH_EVT_VENDOR_UNIQUE = 0xffff, +}; + + /* * FC Local Port (Host) Attributes * @@ -526,5 +551,14 @@ struct fc_rport *fc_remote_port_add(struct Scsi_Host *shost, void fc_remote_port_delete(struct fc_rport *rport); void fc_remote_port_rolechg(struct fc_rport *rport, u32 roles); int scsi_is_fc_rport(const struct device *); +u32 fc_get_event_number(void); +void fc_host_post_event(struct Scsi_Host *shost, u32 event_number, + enum fc_host_event_code event_code, u32 event_data); +void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, + u32 data_len, char * data_buf, u32 vendor_id); + /* Note: when specifying vendor_id to fc_host_post_vendor_event() + * be sure to read the Vendor Type and ID formatting requirements + * specified in scsi_netlink.h + */ #endif /* SCSI_TRANSPORT_FC_H */ From f14e2e29cdd07f80de6dec168dc2bb39de37eec3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 22 Aug 2006 09:55:23 -0400 Subject: [PATCH 0153/1063] [SCSI] SCSI & FC transport: extend event vendor id's to 64bits During discussions with Mike Christie, I became convinced that we needed a larger vendor id. This patch extends the id from 32 to 64 bits. This applies on top of the prior patches that add SCSI transport events via netlink. Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 2 +- include/scsi/scsi_netlink.h | 7 ++++--- include/scsi/scsi_netlink_fc.h | 2 +- include/scsi/scsi_transport_fc.h | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 05989f130554..293188cbff8c 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -513,7 +513,7 @@ EXPORT_SYMBOL(fc_host_post_event); **/ void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, - u32 data_len, char * data_buf, u32 vendor_id) + u32 data_len, char * data_buf, u64 vendor_id) { struct sk_buff *skb; struct nlmsghdr *nlh; diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h index 7a3a20e640c0..8c1470cc8209 100644 --- a/include/scsi/scsi_netlink.h +++ b/include/scsi/scsi_netlink.h @@ -67,9 +67,10 @@ struct scsi_nl_hdr { * Identifiers for each type: * PCI : ID data is the 16 bit PCI Registered Vendor ID */ -#define SCSI_NL_VID_ID_MASK 0x00FFFFFF -#define SCSI_NL_VID_TYPE_MASK 0xFF000000 -#define SCSI_NL_VID_TYPE_PCI 0x01000000 +#define SCSI_NL_VID_TYPE_SHIFT 56 +#define SCSI_NL_VID_TYPE_MASK ((u64)0xFF << SCSI_NL_VID_TYPE_SHIFT) +#define SCSI_NL_VID_TYPE_PCI ((u64)0x01 << SCSI_NL_VID_TYPE_SHIFT) +#define SCSI_NL_VID_ID_MASK (~ SCSI_NL_VID_TYPE_MASK) #define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen) \ diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h index b213d2909fed..cbf76e479761 100644 --- a/include/scsi/scsi_netlink_fc.h +++ b/include/scsi/scsi_netlink_fc.h @@ -58,7 +58,7 @@ struct fc_nl_event { struct scsi_nl_hdr snlh; /* must be 1st element ! */ uint64_t seconds; - uint32_t vendor_id; + uint64_t vendor_id; uint16_t host_no; uint16_t event_datalen; uint32_t event_num; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index f91c5358af3a..0b11eff989e0 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -555,7 +555,7 @@ u32 fc_get_event_number(void); void fc_host_post_event(struct Scsi_Host *shost, u32 event_number, enum fc_host_event_code event_code, u32 event_data); void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, - u32 data_len, char * data_buf, u32 vendor_id); + u32 data_len, char * data_buf, u64 vendor_id); /* Note: when specifying vendor_id to fc_host_post_vendor_event() * be sure to read the Vendor Type and ID formatting requirements * specified in scsi_netlink.h From d2873e4c1ef293ee6d66456fb84448e258a487fa Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:46:43 -0400 Subject: [PATCH 0154/1063] [SCSI] lpfc 8.1.10 : Add support to post events via new FC event interfaces Add support to post events via new FC event interfaces Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc.h | 2 ++ drivers/scsi/lpfc/lpfc_els.c | 5 +++++ drivers/scsi/lpfc/lpfc_hbadisc.c | 6 ++++++ drivers/scsi/lpfc/lpfc_init.c | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index d44f9aac6b8f..4a4048d7ba91 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -391,3 +391,5 @@ struct rnidrsp { struct list_head list; uint32_t data; }; + +#define FC_REG_DUMP_EVENT 0x10 /* Register for Dump events */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 3567de613162..71864cdc6c71 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -2506,6 +2506,7 @@ lpfc_els_rcv_rscn(struct lpfc_hba * phba, uint32_t *lp; IOCB_t *icmd; uint32_t payload_len, cmd; + int i; icmd = &cmdiocb->iocb; pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@ -2524,6 +2525,10 @@ lpfc_els_rcv_rscn(struct lpfc_hba * phba, phba->brd_no, phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt); + for (i = 0; i < payload_len/sizeof(uint32_t); i++) + fc_host_post_event(phba->host, fc_get_event_number(), + FCH_EVT_RSCN, lp[i]); + /* If we are about to begin discovery, just ACC the RSCN. * Discovery processing will satisfy it. */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index b2f1552f1848..53821e5778b3 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -340,6 +340,9 @@ lpfc_linkdown(struct lpfc_hba * phba) spin_unlock_irq(phba->host->host_lock); } + fc_host_post_event(phba->host, fc_get_event_number(), + FCH_EVT_LINKDOWN, 0); + /* Clean up any firmware default rpi's */ if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { lpfc_unreg_did(phba, 0xffffffff, mb); @@ -427,6 +430,9 @@ lpfc_linkup(struct lpfc_hba * phba) struct list_head *listp, *node_list[7]; int i; + fc_host_post_event(phba->host, fc_get_event_number(), + FCH_EVT_LINKUP, 0); + spin_lock_irq(phba->host->host_lock); phba->hba_state = LPFC_LINK_UP; phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f6948ffe689a..84e7fc595f5e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -511,6 +511,7 @@ lpfc_handle_eratt(struct lpfc_hba * phba) { struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; + uint32_t event_data; if (phba->work_hs & HS_FFER6) { /* Re-establishing Link */ @@ -555,6 +556,11 @@ lpfc_handle_eratt(struct lpfc_hba * phba) phba->brd_no, phba->work_hs, phba->work_status[0], phba->work_status[1]); + event_data = FC_REG_DUMP_EVENT; + fc_host_post_vendor_event(phba->host, fc_get_event_number(), + sizeof(event_data), (char *) &event_data, + SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX); + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; lpfc_offline(phba); phba->hba_state = LPFC_HBA_ERROR; From ae36764a230ff6a278ed93735acf5fcda08f2786 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:46:53 -0400 Subject: [PATCH 0155/1063] [SCSI] lpfc 8.1.10 : Add support to return adapter symbolic name Add support to return adapter symbolic name (now that attribute is dynamic) Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc_attr.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index d384c16f4a87..c6d683d86cff 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1204,6 +1204,15 @@ lpfc_get_host_fabric_name (struct Scsi_Host *shost) fc_host_fabric_name(shost) = node_name; } +static void +lpfc_get_host_symbolic_name (struct Scsi_Host *shost) +{ + struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; + + spin_lock_irq(shost->host_lock); + lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost)); + spin_unlock_irq(shost->host_lock); +} static struct fc_host_statistics * lpfc_get_stats(struct Scsi_Host *shost) @@ -1486,7 +1495,6 @@ struct fc_function_template lpfc_transport_functions = { .show_host_port_name = 1, .show_host_supported_classes = 1, .show_host_supported_fc4s = 1, - .show_host_symbolic_name = 1, .show_host_supported_speeds = 1, .show_host_maxframe_size = 1, @@ -1509,6 +1517,9 @@ struct fc_function_template lpfc_transport_functions = { .get_host_fabric_name = lpfc_get_host_fabric_name, .show_host_fabric_name = 1, + .get_host_symbolic_name = lpfc_get_host_symbolic_name, + .show_host_symbolic_name = 1, + /* * The LPFC driver treats linkdown handling as target loss events * so there are no sysfs handlers for link_down_tmo. From 0f29b966d60e9a4f5ecff9f3832257b38aea4f13 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:33:29 -0400 Subject: [PATCH 0156/1063] [SCSI] FC transport: Add dev_loss_tmo callbacks, and new fast_io_fail_tmo w/ callback This patch adds the following functionality to the FC transport: - dev_loss_tmo LLDD callback : Called to essentially confirm the deletion of an rport. Thus, it is called whenever the dev_loss_tmo fires, or when the rport is deleted due to other circumstances (module unload, etc). It is expected that the callback will initiate the termination of any outstanding i/o on the rport. - fast_io_fail_tmo and LLD callback: There are some cases where it may take a long while to truly determine device loss, but the system is in a multipathing configuration that if the i/o was failed quickly (faster than dev_loss_tmo), it could be redirected to a different path and completed sooner. Many thanks to Mike Reed who cleaned up the initial RFC in support of this post. The original RFC is at: http://marc.theaimsgroup.com/?l=linux-scsi&m=115505981027246&w=2 Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 134 ++++++++++++++++++++++++++++--- include/scsi/scsi_transport_fc.h | 5 ++ 2 files changed, 128 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 293188cbff8c..4ab176ed480d 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -242,6 +242,7 @@ fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names) static void fc_timeout_deleted_rport(void *data); +static void fc_timeout_fail_rport_io(void *data); static void fc_scsi_scan_rport(void *data); /* @@ -249,7 +250,7 @@ static void fc_scsi_scan_rport(void *data); * Increase these values if you add attributes */ #define FC_STARGET_NUM_ATTRS 3 -#define FC_RPORT_NUM_ATTRS 9 +#define FC_RPORT_NUM_ATTRS 10 #define FC_HOST_NUM_ATTRS 17 struct fc_internal { @@ -622,11 +623,14 @@ store_fc_rport_##field(struct class_device *cdev, const char *buf, \ struct fc_rport *rport = transport_class_to_rport(cdev); \ struct Scsi_Host *shost = rport_to_shost(rport); \ struct fc_internal *i = to_fc_internal(shost->transportt); \ + char *cp; \ if ((rport->port_state == FC_PORTSTATE_BLOCKED) || \ (rport->port_state == FC_PORTSTATE_DELETED) || \ (rport->port_state == FC_PORTSTATE_NOTPRESENT)) \ return -EBUSY; \ - val = simple_strtoul(buf, NULL, 0); \ + val = simple_strtoul(buf, &cp, 0); \ + if (*cp && (*cp != '\n')) \ + return -EINVAL; \ i->f->set_rport_##field(rport, val); \ return count; \ } @@ -708,6 +712,13 @@ static FC_CLASS_DEVICE_ATTR(rport, title, S_IRUGO, \ if (i->f->show_rport_##field) \ count++ +#define SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(field) \ +{ \ + i->private_rport_attrs[count] = class_device_attr_rport_##field; \ + i->rport_attrs[count] = &i->private_rport_attrs[count]; \ + count++; \ +} + /* The FC Transport Remote Port Attributes: */ @@ -740,12 +751,14 @@ store_fc_rport_dev_loss_tmo(struct class_device *cdev, const char *buf, struct fc_rport *rport = transport_class_to_rport(cdev); struct Scsi_Host *shost = rport_to_shost(rport); struct fc_internal *i = to_fc_internal(shost->transportt); + char *cp; if ((rport->port_state == FC_PORTSTATE_BLOCKED) || (rport->port_state == FC_PORTSTATE_DELETED) || (rport->port_state == FC_PORTSTATE_NOTPRESENT)) return -EBUSY; - val = simple_strtoul(buf, NULL, 0); - if ((val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)) + val = simple_strtoul(buf, &cp, 0); + if ((*cp && (*cp != '\n')) || + (val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)) return -EINVAL; i->f->set_rport_dev_loss_tmo(rport, val); return count; @@ -795,6 +808,44 @@ static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO, fc_private_rport_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN); fc_private_rport_rd_attr(scsi_target_id, "%d\n", 20); +/* + * fast_io_fail_tmo attribute + */ +static ssize_t +show_fc_rport_fast_io_fail_tmo (struct class_device *cdev, char *buf) +{ + struct fc_rport *rport = transport_class_to_rport(cdev); + + if (rport->fast_io_fail_tmo == -1) + return snprintf(buf, 5, "off\n"); + return snprintf(buf, 20, "%d\n", rport->fast_io_fail_tmo); +} + +static ssize_t +store_fc_rport_fast_io_fail_tmo(struct class_device *cdev, const char *buf, + size_t count) +{ + int val; + char *cp; + struct fc_rport *rport = transport_class_to_rport(cdev); + + if ((rport->port_state == FC_PORTSTATE_BLOCKED) || + (rport->port_state == FC_PORTSTATE_DELETED) || + (rport->port_state == FC_PORTSTATE_NOTPRESENT)) + return -EBUSY; + if (strncmp(buf, "off", 3) == 0) + rport->fast_io_fail_tmo = -1; + else { + val = simple_strtoul(buf, &cp, 0); + if ((*cp && (*cp != '\n')) || + (val < 0) || (val >= rport->dev_loss_tmo)) + return -EINVAL; + rport->fast_io_fail_tmo = val; + } + return count; +} +static FC_CLASS_DEVICE_ATTR(rport, fast_io_fail_tmo, S_IRUGO | S_IWUSR, + show_fc_rport_fast_io_fail_tmo, store_fc_rport_fast_io_fail_tmo); /* @@ -880,8 +931,11 @@ store_fc_host_##field(struct class_device *cdev, const char *buf, \ int val; \ struct Scsi_Host *shost = transport_class_to_shost(cdev); \ struct fc_internal *i = to_fc_internal(shost->transportt); \ + char *cp; \ \ - val = simple_strtoul(buf, NULL, 0); \ + val = simple_strtoul(buf, &cp, 0); \ + if (*cp && (*cp != '\n')) \ + return -EINVAL; \ i->f->set_host_##field(shost, val); \ return count; \ } @@ -1481,6 +1535,8 @@ fc_attach_transport(struct fc_function_template *ft) SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(roles); SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(port_state); SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(scsi_target_id); + if (ft->terminate_rport_io) + SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(fast_io_fail_tmo); BUG_ON(count > FC_RPORT_NUM_ATTRS); @@ -1552,7 +1608,7 @@ fc_flush_work(struct Scsi_Host *shost) * @delay: jiffies to delay the work queuing * * Return value: - * 0 on success / != 0 for error + * 1 on success / 0 already queued / < 0 for error **/ static int fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work, @@ -1567,6 +1623,9 @@ fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work, return -EINVAL; } + if (delay == 0) + return queue_work(fc_host_devloss_work_q(shost), work); + return queue_delayed_work(fc_host_devloss_work_q(shost), work, delay); } @@ -1659,10 +1718,23 @@ fc_starget_delete(void *data) struct fc_rport *rport = (struct fc_rport *)data; struct Scsi_Host *shost = rport_to_shost(rport); unsigned long flags; + struct fc_internal *i = to_fc_internal(shost->transportt); + + /* + * Involve the LLDD if possible. All io on the rport is to + * be terminated, either as part of the dev_loss_tmo callback + * processing, or via the terminate_rport_io function. + */ + if (i->f->dev_loss_tmo_callbk) + i->f->dev_loss_tmo_callbk(rport); + else if (i->f->terminate_rport_io) + i->f->terminate_rport_io(rport); spin_lock_irqsave(shost->host_lock, flags); if (rport->flags & FC_RPORT_DEVLOSS_PENDING) { spin_unlock_irqrestore(shost->host_lock, flags); + if (!cancel_delayed_work(&rport->fail_io_work)) + fc_flush_devloss(shost); if (!cancel_delayed_work(&rport->dev_loss_work)) fc_flush_devloss(shost); spin_lock_irqsave(shost->host_lock, flags); @@ -1685,10 +1757,7 @@ fc_rport_final_delete(void *data) struct fc_rport *rport = (struct fc_rport *)data; struct device *dev = &rport->dev; struct Scsi_Host *shost = rport_to_shost(rport); - - /* Delete SCSI target and sdevs */ - if (rport->scsi_target_id != -1) - fc_starget_delete(data); + struct fc_internal *i = to_fc_internal(shost->transportt); /* * if a scan is pending, flush the SCSI Host work_q so that @@ -1697,6 +1766,14 @@ fc_rport_final_delete(void *data) if (rport->flags & FC_RPORT_SCAN_PENDING) scsi_flush_work(shost); + /* Delete SCSI target and sdevs */ + if (rport->scsi_target_id != -1) + fc_starget_delete(data); + else if (i->f->dev_loss_tmo_callbk) + i->f->dev_loss_tmo_callbk(rport); + else if (i->f->terminate_rport_io) + i->f->terminate_rport_io(rport); + transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); @@ -1748,8 +1825,10 @@ fc_rport_create(struct Scsi_Host *shost, int channel, if (fci->f->dd_fcrport_size) rport->dd_data = &rport[1]; rport->channel = channel; + rport->fast_io_fail_tmo = -1; INIT_WORK(&rport->dev_loss_work, fc_timeout_deleted_rport, rport); + INIT_WORK(&rport->fail_io_work, fc_timeout_fail_rport_io, rport); INIT_WORK(&rport->scan_work, fc_scsi_scan_rport, rport); INIT_WORK(&rport->stgt_delete_work, fc_starget_delete, rport); INIT_WORK(&rport->rport_delete_work, fc_rport_final_delete, rport); @@ -1913,11 +1992,13 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel, /* restart the target */ /* - * Stop the target timer first. Take no action + * Stop the target timers first. Take no action * on the del_timer failure as the state * machine state change will validate the * transaction. */ + if (!cancel_delayed_work(&rport->fail_io_work)) + fc_flush_devloss(shost); if (!cancel_delayed_work(work)) fc_flush_devloss(shost); @@ -2061,6 +2142,7 @@ void fc_remote_port_delete(struct fc_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); + struct fc_internal *i = to_fc_internal(shost->transportt); int timeout = rport->dev_loss_tmo; unsigned long flags; @@ -2091,6 +2173,12 @@ fc_remote_port_delete(struct fc_rport *rport) scsi_target_block(&rport->dev); + /* see if we need to kill io faster than waiting for device loss */ + if ((rport->fast_io_fail_tmo != -1) && + (rport->fast_io_fail_tmo < timeout) && (i->f->terminate_rport_io)) + fc_queue_devloss_work(shost, &rport->fail_io_work, + rport->fast_io_fail_tmo * HZ); + /* cap the length the devices can be blocked until they are deleted */ fc_queue_devloss_work(shost, &rport->dev_loss_work, timeout * HZ); } @@ -2150,6 +2238,8 @@ fc_remote_port_rolechg(struct fc_rport *rport, u32 roles) * machine state change will validate the * transaction. */ + if (!cancel_delayed_work(&rport->fail_io_work)) + fc_flush_devloss(shost); if (!cancel_delayed_work(&rport->dev_loss_work)) fc_flush_devloss(shost); @@ -2270,6 +2360,28 @@ fc_timeout_deleted_rport(void *data) fc_queue_work(shost, &rport->stgt_delete_work); } +/** + * fc_timeout_fail_rport_io - Timeout handler for a fast io failing on a + * disconnected SCSI target. + * + * @data: rport to terminate io on. + * + * Notes: Only requests the failure of the io, not that all are flushed + * prior to returning. + **/ +static void +fc_timeout_fail_rport_io(void *data) +{ + struct fc_rport *rport = (struct fc_rport *)data; + struct Scsi_Host *shost = rport_to_shost(rport); + struct fc_internal *i = to_fc_internal(shost->transportt); + + if (rport->port_state != FC_PORTSTATE_BLOCKED) + return; + + i->f->terminate_rport_io(rport); +} + /** * fc_scsi_scan_rport - called to perform a scsi scan on a remote port. * diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 0b11eff989e0..fd352323378b 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -195,6 +195,7 @@ struct fc_rport { /* aka fc_starget_attrs */ u32 roles; enum fc_port_state port_state; /* Will only be ONLINE or UNKNOWN */ u32 scsi_target_id; + u32 fast_io_fail_tmo; /* exported data */ void *dd_data; /* Used for driver-specific storage */ @@ -207,6 +208,7 @@ struct fc_rport { /* aka fc_starget_attrs */ struct device dev; struct work_struct dev_loss_work; struct work_struct scan_work; + struct work_struct fail_io_work; struct work_struct stgt_delete_work; struct work_struct rport_delete_work; } __attribute__((aligned(sizeof(unsigned long)))); @@ -445,6 +447,9 @@ struct fc_function_template { int (*issue_fc_host_lip)(struct Scsi_Host *); + void (*dev_loss_tmo_callbk)(struct fc_rport *); + void (*terminate_rport_io)(struct fc_rport *); + /* allocation lengths for host-specific data */ u32 dd_fcrport_size; From c01f32087960edd60a302ad62ad6b8b525e4aeec Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:47:08 -0400 Subject: [PATCH 0157/1063] [SCSI] lpfc 8.1.10 : Add support for dev_loss_tmo_callbk and fast_io_fail_tmo_callbk Add support for new dev_loss_tmo callback Goodness is that it removes code for a parallel nodev timer that existed in the driver Add support for the new fast_io_fail callback Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc.h | 3 + drivers/scsi/lpfc/lpfc_attr.c | 155 ++++++++++++++++++++----- drivers/scsi/lpfc/lpfc_crtn.h | 3 + drivers/scsi/lpfc/lpfc_ct.c | 25 ---- drivers/scsi/lpfc/lpfc_disc.h | 6 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 180 +++++++++++------------------ drivers/scsi/lpfc/lpfc_nportdisc.c | 2 +- drivers/scsi/lpfc/lpfc_scsi.c | 10 +- 8 files changed, 210 insertions(+), 174 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 4a4048d7ba91..efec44d267c7 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -285,6 +285,7 @@ struct lpfc_hba { uint32_t cfg_log_verbose; uint32_t cfg_lun_queue_depth; uint32_t cfg_nodev_tmo; + uint32_t cfg_devloss_tmo; uint32_t cfg_hba_queue_depth; uint32_t cfg_fcp_class; uint32_t cfg_use_adisc; @@ -303,6 +304,8 @@ struct lpfc_hba { uint32_t cfg_sg_seg_cnt; uint32_t cfg_sg_dma_buf_size; + uint32_t dev_loss_tmo_changed; + lpfc_vpd_t vpd; /* vital product data */ struct Scsi_Host *host; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index c6d683d86cff..0de69324212e 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -39,6 +39,9 @@ #include "lpfc_compat.h" #include "lpfc_crtn.h" +#define LPFC_DEF_DEVLOSS_TMO 30 +#define LPFC_MIN_DEVLOSS_TMO 1 +#define LPFC_MAX_DEVLOSS_TMO 255 static void lpfc_jedec_to_ascii(int incr, char hdw[]) @@ -558,6 +561,123 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:" static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, lpfc_poll_show, lpfc_poll_store); +/* +# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear +# until the timer expires. Value range is [0,255]. Default value is 30. +*/ +static int lpfc_nodev_tmo = LPFC_DEF_DEVLOSS_TMO; +static int lpfc_devloss_tmo = LPFC_DEF_DEVLOSS_TMO; +module_param(lpfc_nodev_tmo, int, 0); +MODULE_PARM_DESC(lpfc_nodev_tmo, + "Seconds driver will hold I/O waiting " + "for a device to come back"); +static ssize_t +lpfc_nodev_tmo_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *host = class_to_shost(cdev); + struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + int val = 0; + val = phba->cfg_devloss_tmo; + return snprintf(buf, PAGE_SIZE, "%d\n", + phba->cfg_devloss_tmo); +} + +static int +lpfc_nodev_tmo_init(struct lpfc_hba *phba, int val) +{ + static int warned; + if (phba->cfg_devloss_tmo != LPFC_DEF_DEVLOSS_TMO) { + phba->cfg_nodev_tmo = phba->cfg_devloss_tmo; + if (!warned && val != LPFC_DEF_DEVLOSS_TMO) { + warned = 1; + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0402 Ignoring nodev_tmo module " + "parameter because devloss_tmo is" + " set.\n", + phba->brd_no); + } + return 0; + } + + if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) { + phba->cfg_nodev_tmo = val; + phba->cfg_devloss_tmo = val; + return 0; + } + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0400 lpfc_nodev_tmo attribute cannot be set to %d, " + "allowed range is [%d, %d]\n", + phba->brd_no, val, + LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO); + phba->cfg_nodev_tmo = LPFC_DEF_DEVLOSS_TMO; + return -EINVAL; +} + +static int +lpfc_nodev_tmo_set(struct lpfc_hba *phba, int val) +{ + if (phba->dev_loss_tmo_changed || + (lpfc_devloss_tmo != LPFC_DEF_DEVLOSS_TMO)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0401 Ignoring change to nodev_tmo " + "because devloss_tmo is set.\n", + phba->brd_no); + return 0; + } + + if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) { + phba->cfg_nodev_tmo = val; + phba->cfg_devloss_tmo = val; + return 0; + } + + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0403 lpfc_nodev_tmo attribute cannot be set to %d, " + "allowed range is [%d, %d]\n", + phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO, + LPFC_MAX_DEVLOSS_TMO); + return -EINVAL; +} + +lpfc_param_store(nodev_tmo) + +static CLASS_DEVICE_ATTR(lpfc_nodev_tmo, S_IRUGO | S_IWUSR, + lpfc_nodev_tmo_show, lpfc_nodev_tmo_store); + +/* +# lpfc_devloss_tmo: If set, it will hold all I/O errors on devices that +# disappear until the timer expires. Value range is [0,255]. Default +# value is 30. +*/ +module_param(lpfc_devloss_tmo, int, 0); +MODULE_PARM_DESC(lpfc_devloss_tmo, + "Seconds driver will hold I/O waiting " + "for a device to come back"); +lpfc_param_init(devloss_tmo, LPFC_DEF_DEVLOSS_TMO, + LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO) +lpfc_param_show(devloss_tmo) +static int +lpfc_devloss_tmo_set(struct lpfc_hba *phba, int val) +{ + if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) { + phba->cfg_nodev_tmo = val; + phba->cfg_devloss_tmo = val; + phba->dev_loss_tmo_changed = 1; + return 0; + } + + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0404 lpfc_devloss_tmo attribute cannot be set to" + " %d, allowed range is [%d, %d]\n", + phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO, + LPFC_MAX_DEVLOSS_TMO); + return -EINVAL; +} + +lpfc_param_store(devloss_tmo) +static CLASS_DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR, + lpfc_devloss_tmo_show, lpfc_devloss_tmo_store); + /* # lpfc_log_verbose: Only turn this flag on if you are willing to risk being # deluged with LOTS of information. @@ -616,14 +736,6 @@ LPFC_ATTR_R(hba_queue_depth, 8192, 32, 8192, LPFC_ATTR_R(scan_down, 1, 0, 1, "Start scanning for devices from highest ALPA to lowest"); -/* -# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear -# until the timer expires. Value range is [0,255]. Default value is 30. -# NOTE: this MUST be less then the SCSI Layer command timeout - 1. -*/ -LPFC_ATTR_RW(nodev_tmo, 30, 0, 255, - "Seconds driver will hold I/O waiting for a device to come back"); - /* # lpfc_topology: link topology for init link # 0x0 = attempt loop mode then point-to-point @@ -737,6 +849,7 @@ struct class_device_attribute *lpfc_host_attrs[] = { &class_device_attr_lpfc_lun_queue_depth, &class_device_attr_lpfc_hba_queue_depth, &class_device_attr_lpfc_nodev_tmo, + &class_device_attr_lpfc_devloss_tmo, &class_device_attr_lpfc_fcp_class, &class_device_attr_lpfc_use_adisc, &class_device_attr_lpfc_ack0, @@ -1449,28 +1562,13 @@ lpfc_get_starget_port_name(struct scsi_target *starget) fc_starget_port_name(starget) = port_name; } -static void -lpfc_get_rport_loss_tmo(struct fc_rport *rport) -{ - /* - * Return the driver's global value for device loss timeout plus - * five seconds to allow the driver's nodev timer to run. - */ - rport->dev_loss_tmo = lpfc_nodev_tmo + 5; -} - static void lpfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout) { - /* - * The driver doesn't have a per-target timeout setting. Set - * this value globally. lpfc_nodev_tmo should be greater then 0. - */ if (timeout) - lpfc_nodev_tmo = timeout; + rport->dev_loss_tmo = timeout; else - lpfc_nodev_tmo = 1; - rport->dev_loss_tmo = lpfc_nodev_tmo + 5; + rport->dev_loss_tmo = 1; } @@ -1532,7 +1630,6 @@ struct fc_function_template lpfc_transport_functions = { .show_rport_maxframe_size = 1, .show_rport_supported_classes = 1, - .get_rport_dev_loss_tmo = lpfc_get_rport_loss_tmo, .set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo, .show_rport_dev_loss_tmo = 1, @@ -1546,6 +1643,8 @@ struct fc_function_template lpfc_transport_functions = { .show_starget_port_name = 1, .issue_fc_host_lip = lpfc_issue_lip, + .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk, + .terminate_rport_io = lpfc_terminate_rport_io, }; void @@ -1561,13 +1660,13 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_ack0_init(phba, lpfc_ack0); lpfc_topology_init(phba, lpfc_topology); lpfc_scan_down_init(phba, lpfc_scan_down); - lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo); lpfc_link_speed_init(phba, lpfc_link_speed); lpfc_fdmi_on_init(phba, lpfc_fdmi_on); lpfc_discovery_threads_init(phba, lpfc_discovery_threads); lpfc_max_luns_init(phba, lpfc_max_luns); lpfc_poll_tmo_init(phba, lpfc_poll_tmo); - + lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo); + lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo); phba->cfg_poll = lpfc_poll; /* diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 2a176467f71b..3d684496acde 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -18,6 +18,7 @@ * included with this package. * *******************************************************************/ +struct fc_rport; void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t); void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, @@ -200,6 +201,8 @@ extern struct scsi_host_template lpfc_template; extern struct fc_function_template lpfc_transport_functions; void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp); +void lpfc_terminate_rport_io(struct fc_rport *); +void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport); #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code) #define HBA_EVENT_RSCN 5 diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index bbb7310210b0..ae4106458991 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -324,7 +324,6 @@ lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size) struct lpfc_sli_ct_request *Response = (struct lpfc_sli_ct_request *) mp->virt; struct lpfc_nodelist *ndlp = NULL; - struct lpfc_nodelist *next_ndlp; struct lpfc_dmabuf *mlast, *next_mp; uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; uint32_t Did; @@ -399,30 +398,6 @@ nsout1: * current driver state. */ if (phba->hba_state == LPFC_HBA_READY) { - - /* - * Switch ports that connect a loop of multiple targets need - * special consideration. The driver wants to unregister the - * rpi only on the target that was pulled from the loop. On - * RSCN, the driver wants to rediscover an NPort only if the - * driver flagged it as NLP_NPR_2B_DISC. Provided adisc is - * not enabled and the NPort is not capable of retransmissions - * (FC Tape) prevent timing races with the scsi error handler by - * unregistering the Nport's RPI. This action causes all - * outstanding IO to flush back to the midlayer. - */ - list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list, - nlp_listp) { - if (!(ndlp->nlp_flag & NLP_NPR_2B_DISC) && - (lpfc_rscn_payload_check(phba, ndlp->nlp_DID))) { - if ((phba->cfg_use_adisc == 0) && - !(ndlp->nlp_fcp_info & - NLP_FCP_2_DEVICE)) { - lpfc_unreg_rpi(phba, ndlp); - ndlp->nlp_flag &= ~NLP_NPR_ADISC; - } - } - } lpfc_els_flush_rscn(phba); spin_lock_irq(phba->host->host_lock); phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */ diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 41cf5d3ea6ce..9766f909c9c6 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -30,7 +30,6 @@ /* worker thread events */ enum lpfc_work_type { - LPFC_EVT_NODEV_TMO, LPFC_EVT_ONLINE, LPFC_EVT_OFFLINE, LPFC_EVT_WARM_START, @@ -74,11 +73,9 @@ struct lpfc_nodelist { #define NLP_FCP_2_DEVICE 0x10 /* FCP-2 device */ struct timer_list nlp_delayfunc; /* Used for delayed ELS cmds */ - struct timer_list nlp_tmofunc; /* Used for nodev tmo */ struct fc_rport *rport; /* Corresponding FC transport port structure */ struct lpfc_hba *nlp_phba; - struct lpfc_work_evt nodev_timeout_evt; struct lpfc_work_evt els_retry_evt; unsigned long last_ramp_up_time; /* jiffy of last ramp up */ unsigned long last_q_full_time; /* jiffy of last queue full */ @@ -102,7 +99,6 @@ struct lpfc_nodelist { #define NLP_LOGO_SND 0x100 /* sent LOGO request for this entry */ #define NLP_RNID_SND 0x400 /* sent RNID request for this entry */ #define NLP_ELS_SND_MASK 0x7e0 /* sent ELS request for this entry */ -#define NLP_NODEV_TMO 0x10000 /* nodev timeout is running for node */ #define NLP_DELAY_TMO 0x20000 /* delay timeout is running for node */ #define NLP_NPR_2B_DISC 0x40000 /* node is included in num_disc_nodes */ #define NLP_RCV_PLOGI 0x80000 /* Rcv'ed PLOGI from remote system */ @@ -169,7 +165,7 @@ struct lpfc_nodelist { */ /* * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped - * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers + * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers * expire, all effected nodes will receive a DEVICE_RM event. */ /* diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 53821e5778b3..97973af980a0 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -56,28 +56,63 @@ static uint8_t lpfcAlpaArray[] = { static void lpfc_disc_timeout_handler(struct lpfc_hba *); -static void -lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +void +lpfc_terminate_rport_io(struct fc_rport *rport) { - uint8_t *name = (uint8_t *)&ndlp->nlp_portname; - int warn_on = 0; + struct lpfc_rport_data *rdata; + struct lpfc_nodelist * ndlp; + struct lpfc_hba *phba; - spin_lock_irq(phba->host->host_lock); - if (!(ndlp->nlp_flag & NLP_NODEV_TMO)) { - spin_unlock_irq(phba->host->host_lock); + rdata = rport->dd_data; + ndlp = rdata->pnode; + + if (!ndlp) { + if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + printk(KERN_ERR "Cannot find remote node" + " to terminate I/O Data x%x\n", + rport->port_id); return; } - /* - * If a discovery event readded nodev_timer after timer - * firing and before processing the timer, cancel the - * nlp_tmofunc. - */ - spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&ndlp->nlp_tmofunc); - spin_lock_irq(phba->host->host_lock); + phba = ndlp->nlp_phba; - ndlp->nlp_flag &= ~NLP_NODEV_TMO; + spin_lock_irq(phba->host->host_lock); + if (ndlp->nlp_sid != NLP_NO_SID) { + lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], + ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); + } + spin_unlock_irq(phba->host->host_lock); + + return; +} + +/* + * This function will be called when dev_loss_tmo fire. + */ +void +lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) +{ + struct lpfc_rport_data *rdata; + struct lpfc_nodelist * ndlp; + uint8_t *name; + int warn_on = 0; + struct lpfc_hba *phba; + + rdata = rport->dd_data; + ndlp = rdata->pnode; + + if (!ndlp) { + if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) + printk(KERN_ERR "Cannot find remote node" + " for rport in dev_loss_tmo_callbk x%x\n", + rport->port_id); + return; + } + + name = (uint8_t *)&ndlp->nlp_portname; + phba = ndlp->nlp_phba; + + spin_lock_irq(phba->host->host_lock); if (ndlp->nlp_sid != NLP_NO_SID) { warn_on = 1; @@ -85,11 +120,14 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); } + if (phba->fc_flag & FC_UNLOADING) + warn_on = 0; + spin_unlock_irq(phba->host->host_lock); if (warn_on) { lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, - "%d:0203 Nodev timeout on " + "%d:0203 Devloss timeout on " "WWPN %x:%x:%x:%x:%x:%x:%x:%x " "NPort x%x Data: x%x x%x x%x\n", phba->brd_no, @@ -99,7 +137,7 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); } else { lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, - "%d:0204 Nodev timeout on " + "%d:0204 Devloss timeout on " "WWPN %x:%x:%x:%x:%x:%x:%x:%x " "NPort x%x Data: x%x x%x x%x\n", phba->brd_no, @@ -109,7 +147,12 @@ lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); } - lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM); + ndlp->rport = NULL; + rdata->pnode = NULL; + + if (!(phba->fc_flag & FC_UNLOADING)) + lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM); + return; } @@ -127,11 +170,6 @@ lpfc_work_list_done(struct lpfc_hba * phba) spin_unlock_irq(phba->host->host_lock); free_evt = 1; switch (evtp->evt) { - case LPFC_EVT_NODEV_TMO: - ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); - lpfc_process_nodev_timeout(phba, ndlp); - free_evt = 0; - break; case LPFC_EVT_ELS_RETRY: ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); lpfc_els_retry_delay_handler(ndlp); @@ -377,16 +415,6 @@ lpfc_linkdown(struct lpfc_hba * phba) rc = lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); - /* Check config parameter use-adisc or FCP-2 */ - if ((rc != NLP_STE_FREED_NODE) && - (phba->cfg_use_adisc == 0) && - !(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE)) { - /* We know we will have to relogin, so - * unreglogin the rpi right now to fail - * any outstanding I/Os quickly. - */ - lpfc_unreg_rpi(phba, ndlp); - } } } @@ -1104,8 +1132,11 @@ lpfc_unregister_remote_port(struct lpfc_hba * phba, struct fc_rport *rport = ndlp->rport; struct lpfc_rport_data *rdata = rport->dd_data; - ndlp->rport = NULL; - rdata->pnode = NULL; + if (rport->scsi_target_id == -1) { + ndlp->rport = NULL; + rdata->pnode = NULL; + } + fc_remote_port_delete(rport); return; @@ -1233,17 +1264,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list) list_add_tail(&nlp->nlp_listp, &phba->fc_nlpunmap_list); phba->fc_unmap_cnt++; phba->nport_event_cnt++; - /* stop nodev tmo if running */ - if (nlp->nlp_flag & NLP_NODEV_TMO) { - nlp->nlp_flag &= ~NLP_NODEV_TMO; - spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&nlp->nlp_tmofunc); - spin_lock_irq(phba->host->host_lock); - if (!list_empty(&nlp->nodev_timeout_evt.evt_listp)) - list_del_init(&nlp->nodev_timeout_evt. - evt_listp); - - } nlp->nlp_flag &= ~NLP_NODEV_REMOVE; nlp->nlp_type |= NLP_FC_NODE; break; @@ -1254,17 +1274,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list) list_add_tail(&nlp->nlp_listp, &phba->fc_nlpmap_list); phba->fc_map_cnt++; phba->nport_event_cnt++; - /* stop nodev tmo if running */ - if (nlp->nlp_flag & NLP_NODEV_TMO) { - nlp->nlp_flag &= ~NLP_NODEV_TMO; - spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&nlp->nlp_tmofunc); - spin_lock_irq(phba->host->host_lock); - if (!list_empty(&nlp->nodev_timeout_evt.evt_listp)) - list_del_init(&nlp->nodev_timeout_evt. - evt_listp); - - } nlp->nlp_flag &= ~NLP_NODEV_REMOVE; break; case NLP_NPR_LIST: @@ -1273,11 +1282,6 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list) list_add_tail(&nlp->nlp_listp, &phba->fc_npr_list); phba->fc_npr_cnt++; - if (!(nlp->nlp_flag & NLP_NODEV_TMO)) - mod_timer(&nlp->nlp_tmofunc, - jiffies + HZ * phba->cfg_nodev_tmo); - - nlp->nlp_flag |= NLP_NODEV_TMO; nlp->nlp_flag &= ~NLP_RCV_PLOGI; break; case NLP_JUST_DQ: @@ -1307,7 +1311,8 @@ lpfc_nlp_list(struct lpfc_hba * phba, struct lpfc_nodelist * nlp, int list) * already. If we have, and it's a scsi entity, be * sure to unblock any attached scsi devices */ - if (!nlp->rport) + if ((!nlp->rport) || (nlp->rport->port_state == + FC_PORTSTATE_BLOCKED)) lpfc_register_remote_port(phba, nlp); /* @@ -1581,15 +1586,12 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) lpfc_els_abort(phba,ndlp,0); spin_lock_irq(phba->host->host_lock); - ndlp->nlp_flag &= ~(NLP_NODEV_TMO|NLP_DELAY_TMO); + ndlp->nlp_flag &= ~NLP_DELAY_TMO; spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&ndlp->nlp_tmofunc); ndlp->nlp_last_elscmd = 0; del_timer_sync(&ndlp->nlp_delayfunc); - if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp)) - list_del_init(&ndlp->nodev_timeout_evt.evt_listp); if (!list_empty(&ndlp->els_retry_evt.evt_listp)) list_del_init(&ndlp->els_retry_evt.evt_listp); @@ -1606,16 +1608,6 @@ lpfc_freenode(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) int lpfc_nlp_remove(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) { - if (ndlp->nlp_flag & NLP_NODEV_TMO) { - spin_lock_irq(phba->host->host_lock); - ndlp->nlp_flag &= ~NLP_NODEV_TMO; - spin_unlock_irq(phba->host->host_lock); - del_timer_sync(&ndlp->nlp_tmofunc); - if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp)) - list_del_init(&ndlp->nodev_timeout_evt.evt_listp); - - } - if (ndlp->nlp_flag & NLP_DELAY_TMO) { lpfc_cancel_retry_delay_tmo(phba, ndlp); @@ -2430,34 +2422,6 @@ lpfc_disc_timeout_handler(struct lpfc_hba *phba) return; } -static void -lpfc_nodev_timeout(unsigned long ptr) -{ - struct lpfc_hba *phba; - struct lpfc_nodelist *ndlp; - unsigned long iflag; - struct lpfc_work_evt *evtp; - - ndlp = (struct lpfc_nodelist *)ptr; - phba = ndlp->nlp_phba; - evtp = &ndlp->nodev_timeout_evt; - spin_lock_irqsave(phba->host->host_lock, iflag); - - if (!list_empty(&evtp->evt_listp)) { - spin_unlock_irqrestore(phba->host->host_lock, iflag); - return; - } - evtp->evt_arg1 = ndlp; - evtp->evt = LPFC_EVT_NODEV_TMO; - list_add_tail(&evtp->evt_listp, &phba->work_list); - if (phba->work_wait) - wake_up(phba->work_wait); - - spin_unlock_irqrestore(phba->host->host_lock, iflag); - return; -} - - /* * This routine handles processing a NameServer REG_LOGIN mailbox * command upon completion. It is setup in the LPFC_MBOXQ @@ -2581,11 +2545,7 @@ lpfc_nlp_init(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, uint32_t did) { memset(ndlp, 0, sizeof (struct lpfc_nodelist)); - INIT_LIST_HEAD(&ndlp->nodev_timeout_evt.evt_listp); INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); - init_timer(&ndlp->nlp_tmofunc); - ndlp->nlp_tmofunc.function = lpfc_nodev_timeout; - ndlp->nlp_tmofunc.data = (unsigned long)ndlp; init_timer(&ndlp->nlp_delayfunc); ndlp->nlp_delayfunc.function = lpfc_els_retry_delay; ndlp->nlp_delayfunc.data = (unsigned long)ndlp; diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 20449a8dd53d..d5f415007db2 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1813,7 +1813,7 @@ lpfc_device_recov_npr_node(struct lpfc_hba * phba, */ /* * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped - * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers + * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers * expire, all effected nodes will receive a DEVICE_RM event. */ /* diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index a8816a8738f8..97ae98dc95d0 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -935,7 +935,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ); spin_lock_irq(phba->host->host_lock); if (++loop_count - > (2 * phba->cfg_nodev_tmo)/LPFC_ABORT_WAIT) + > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT) break; } @@ -978,7 +978,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd) spin_lock_irq(shost->host_lock); /* * If target is not in a MAPPED state, delay the reset until - * target is rediscovered or nodev timeout expires. + * target is rediscovered or devloss timeout expires. */ while ( 1 ) { if (!pnode) @@ -1050,7 +1050,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd) spin_lock_irq(phba->host->host_lock); if (++loopcnt - > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT) + > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) break; cnt = lpfc_sli_sum_iocb(phba, @@ -1151,7 +1151,7 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd) spin_lock_irq(phba->host->host_lock); if (++loopcnt - > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT) + > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) break; cnt = lpfc_sli_sum_iocb(phba, @@ -1249,7 +1249,7 @@ lpfc_slave_configure(struct scsi_device *sdev) * target pointer is stored in the starget_data for the * driver's sysfs entry point functions. */ - rport->dev_loss_tmo = phba->cfg_nodev_tmo + 5; + rport->dev_loss_tmo = phba->cfg_devloss_tmo; if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) { lpfc_sli_poll_fcp_ring(phba); From c3f28afa61343e3e010e3014aa0d6eba271c1558 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:47:18 -0400 Subject: [PATCH 0158/1063] [SCSI] lpfc 8.1.10 : Add support for new lpfc soft_wwpn attribute Add support for a new lpfc soft_wwpn sysfs attribute Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc.h | 3 + drivers/scsi/lpfc/lpfc_attr.c | 117 +++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_hbadisc.c | 2 + drivers/scsi/lpfc/lpfc_init.c | 2 + 4 files changed, 124 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index efec44d267c7..3f7f5f8abd75 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -303,6 +303,7 @@ struct lpfc_hba { uint32_t cfg_poll_tmo; uint32_t cfg_sg_seg_cnt; uint32_t cfg_sg_dma_buf_size; + uint64_t cfg_soft_wwpn; uint32_t dev_loss_tmo_changed; @@ -354,6 +355,8 @@ struct lpfc_hba { #define VPD_PORT 0x8 /* valid vpd port data */ #define VPD_MASK 0xf /* mask for any vpd data */ + uint8_t soft_wwpn_enable; + struct timer_list fcp_poll_timer; struct timer_list els_tmofunc; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 0de69324212e..9496e87c135e 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -551,6 +551,119 @@ static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR, lpfc_board_mode_show, lpfc_board_mode_store); static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset); + +static char *lpfc_soft_wwpn_key = "C99G71SL8032A"; + +static ssize_t +lpfc_soft_wwpn_enable_store(struct class_device *cdev, const char *buf, + size_t count) +{ + struct Scsi_Host *host = class_to_shost(cdev); + struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + unsigned int cnt = count; + + /* + * We're doing a simple sanity check for soft_wwpn setting. + * We require that the user write a specific key to enable + * the soft_wwpn attribute to be settable. Once the attribute + * is written, the enable key resets. If further updates are + * desired, the key must be written again to re-enable the + * attribute. + * + * The "key" is not secret - it is a hardcoded string shown + * here. The intent is to protect against the random user or + * application that is just writing attributes. + */ + + /* count may include a LF at end of string */ + if (buf[cnt-1] == '\n') + cnt--; + + if ((cnt != strlen(lpfc_soft_wwpn_key)) || + (strncmp(buf, lpfc_soft_wwpn_key, strlen(lpfc_soft_wwpn_key)) != 0)) + return -EINVAL; + + phba->soft_wwpn_enable = 1; + return count; +} +static CLASS_DEVICE_ATTR(lpfc_soft_wwpn_enable, S_IWUSR, NULL, + lpfc_soft_wwpn_enable_store); + +static ssize_t +lpfc_soft_wwpn_show(struct class_device *cdev, char *buf) +{ + struct Scsi_Host *host = class_to_shost(cdev); + struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + return snprintf(buf, PAGE_SIZE, "0x%llx\n", phba->cfg_soft_wwpn); +} + + +static ssize_t +lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count) +{ + struct Scsi_Host *host = class_to_shost(cdev); + struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; + struct completion online_compl; + int stat1=0, stat2=0; + unsigned int i, j, cnt=count; + u8 wwpn[8]; + + /* count may include a LF at end of string */ + if (buf[cnt-1] == '\n') + cnt--; + + if (!phba->soft_wwpn_enable || (cnt < 16) || (cnt > 18) || + ((cnt == 17) && (*buf++ != 'x')) || + ((cnt == 18) && ((*buf++ != '0') || (*buf++ != 'x')))) + return -EINVAL; + + phba->soft_wwpn_enable = 0; + + memset(wwpn, 0, sizeof(wwpn)); + + /* Validate and store the new name */ + for (i=0, j=0; i < 16; i++) { + if ((*buf >= 'a') && (*buf <= 'f')) + j = ((j << 4) | ((*buf++ -'a') + 10)); + else if ((*buf >= 'A') && (*buf <= 'F')) + j = ((j << 4) | ((*buf++ -'A') + 10)); + else if ((*buf >= '0') && (*buf <= '9')) + j = ((j << 4) | (*buf++ -'0')); + else + return -EINVAL; + if (i % 2) { + wwpn[i/2] = j & 0xff; + j = 0; + } + } + phba->cfg_soft_wwpn = wwn_to_u64(wwpn); + fc_host_port_name(host) = phba->cfg_soft_wwpn; + + dev_printk(KERN_NOTICE, &phba->pcidev->dev, + "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no); + + init_completion(&online_compl); + lpfc_workq_post_event(phba, &stat1, &online_compl, LPFC_EVT_OFFLINE); + wait_for_completion(&online_compl); + if (stat1) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0463 lpfc_soft_wwpn attribute set failed to reinit " + "adapter - %d\n", phba->brd_no, stat1); + + init_completion(&online_compl); + lpfc_workq_post_event(phba, &stat2, &online_compl, LPFC_EVT_ONLINE); + wait_for_completion(&online_compl); + if (stat2) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0464 lpfc_soft_wwpn attribute set failed to reinit " + "adapter - %d\n", phba->brd_no, stat2); + + return (stat1 || stat2) ? -EIO : count; +} +static CLASS_DEVICE_ATTR(lpfc_soft_wwpn, S_IRUGO | S_IWUSR,\ + lpfc_soft_wwpn_show, lpfc_soft_wwpn_store); + + static int lpfc_poll = 0; module_param(lpfc_poll, int, 0); MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:" @@ -832,6 +945,7 @@ LPFC_ATTR_R(max_luns, 255, 0, 65535, LPFC_ATTR_RW(poll_tmo, 10, 1, 255, "Milliseconds driver will wait between polling FCP ring"); + struct class_device_attribute *lpfc_host_attrs[] = { &class_device_attr_info, &class_device_attr_serialnum, @@ -867,6 +981,8 @@ struct class_device_attribute *lpfc_host_attrs[] = { &class_device_attr_issue_reset, &class_device_attr_lpfc_poll, &class_device_attr_lpfc_poll_tmo, + &class_device_attr_lpfc_soft_wwpn, + &class_device_attr_lpfc_soft_wwpn_enable, NULL, }; @@ -1668,6 +1784,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo); lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo); phba->cfg_poll = lpfc_poll; + phba->cfg_soft_wwpn = 0L; /* * The total number of segments is the configuration value plus 2 diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 97973af980a0..d586c3d3b0d0 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -672,6 +672,8 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt, sizeof (struct serv_parm)); + if (phba->cfg_soft_wwpn) + u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); memcpy((uint8_t *) & phba->fc_nodename, (uint8_t *) & phba->fc_sparam.nodeName, sizeof (struct lpfc_name)); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 84e7fc595f5e..4cdf3464267f 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -268,6 +268,8 @@ lpfc_config_port_post(struct lpfc_hba * phba) kfree(mp); pmb->context1 = NULL; + if (phba->cfg_soft_wwpn) + u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName, sizeof (struct lpfc_name)); memcpy(&phba->fc_portname, &phba->fc_sparam.portName, From 26dacd0c9b2dc1dc987c376aeee4e80691a7dd0b Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 18 Aug 2006 17:47:24 -0400 Subject: [PATCH 0159/1063] [SCSI] lpfc 8.1.10 : Change version number to 8.1.10 Change version number to 8.1.10 Signed-off-by: James Smart Signed-off-by: James Bottomley --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index c7091ea29f3f..ac417908b407 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.1.9" +#define LPFC_DRIVER_VERSION "8.1.10" #define LPFC_DRIVER_NAME "lpfc" From fa6c220a7f01257b1c2c4203d48aaaedb0c4416f Mon Sep 17 00:00:00 2001 From: Aubrey Lee Date: Tue, 5 Sep 2006 05:55:07 -0500 Subject: [PATCH 0160/1063] [PATCH] [MTD] DEVICES: Fill more device IDs in the structure of m25p80 The flash_info structure has a bunch of missing fields which causes problems when actually tryin to use some ST parts as it gets detected incorrectly. Signed-off-by: Aubrey L1 Signed-off-by: Josh Boyer --- drivers/mtd/devices/m25p80.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index a8466141e914..ef4a731ca5c2 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -406,13 +406,13 @@ struct flash_info { static struct flash_info __devinitdata m25p_data [] = { /* REVISIT: fill in JEDEC ids, for parts that have them */ - { "m25p05", 0x05, 0x0000, 32 * 1024, 2 }, - { "m25p10", 0x10, 0x0000, 32 * 1024, 4 }, - { "m25p20", 0x11, 0x0000, 64 * 1024, 4 }, - { "m25p40", 0x12, 0x0000, 64 * 1024, 8 }, + { "m25p05", 0x05, 0x2010, 32 * 1024, 2 }, + { "m25p10", 0x10, 0x2011, 32 * 1024, 4 }, + { "m25p20", 0x11, 0x2012, 64 * 1024, 4 }, + { "m25p40", 0x12, 0x2013, 64 * 1024, 8 }, { "m25p80", 0x13, 0x0000, 64 * 1024, 16 }, - { "m25p16", 0x14, 0x0000, 64 * 1024, 32 }, - { "m25p32", 0x15, 0x0000, 64 * 1024, 64 }, + { "m25p16", 0x14, 0x2015, 64 * 1024, 32 }, + { "m25p32", 0x15, 0x2016, 64 * 1024, 64 }, { "m25p64", 0x16, 0x2017, 64 * 1024, 128 }, }; From 3906f4edeef976c081c4e7bd92164d2f59c325ae Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 5 Sep 2006 17:15:47 -0400 Subject: [PATCH 0161/1063] [CPUFREQ] Fix sparse warning in ondemand drivers/cpufreq/cpufreq_ondemand.c:323:2: warning: Using plain integer as NULL pointer Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_ondemand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 5ca2fd5d1ed1..bf8aa45d4f01 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -449,7 +449,7 @@ static inline void dbs_timer_init(unsigned int cpu) delay -= jiffies % delay; ondemand_powersave_bias_init(); - INIT_WORK(&dbs_info->work, do_dbs_timer, 0); + INIT_WORK(&dbs_info->work, do_dbs_timer, NULL); queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); } From 8eb7925f93af75e66a240d148efdec212f95bcb7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 20 Aug 2006 18:48:13 +0400 Subject: [PATCH 0162/1063] [AGPGART] agp.h: constify struct agp_bridge_data::version drivers/char/agp/backend.c: In function `agp_backend_initialize': drivers/char/agp/backend.c:141: warning: assignment discards qualifiers from pointer target type Signed-off-by: Alexey Dobriyan Signed-off-by: Dave Jones --- drivers/char/agp/agp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 3c623b67ea1c..8b3317fd46c9 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -117,7 +117,7 @@ struct agp_bridge_driver { }; struct agp_bridge_data { - struct agp_version *version; + const struct agp_version *version; struct agp_bridge_driver *driver; struct vm_operations_struct *vm_ops; void *previous_size; From db44aaf3a2f599163c53ce96658aca688b3466f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=B3=20Bilski?= Date: Wed, 16 Aug 2006 01:07:33 +0200 Subject: [PATCH 0163/1063] [CPUFREQ] Longhaul - Add voltage scaling to driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename option "dont_scale_voltage" to "scale_voltage" because don't will be default. Use "pos" for calculating voltage. In this way driver don't need to know mV value or low level value. Simply min U is one pos and max U is second pos. All pos between these two are used. Assume that min U is for min f and max U for max f. For frequency between min and max calculate pos based on difference between current frequency and min f. Values in mobile VRM table changed to values from C3-M datasheet. Signed-off-by: Rafa³ Bilski Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/longhaul.c | 97 ++++++++++++++----------- arch/i386/kernel/cpu/cpufreq/longhaul.h | 48 +++++++++--- 2 files changed, 94 insertions(+), 51 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 43bbf948d45d..f5cc9f5c9bab 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -53,19 +53,26 @@ #define CPU_NEHEMIAH 5 static int cpu_model; -static unsigned int numscales=16, numvscales; +static unsigned int numscales=16; static unsigned int fsb; -static int minvid, maxvid; + +static struct mV_pos *vrm_mV_table; +static unsigned char *mV_vrm_table; +struct f_msr { + unsigned char vrm; +}; +static struct f_msr f_msr_table[32]; + +static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; -static int vrmrev; static struct acpi_processor *pr = NULL; static struct acpi_processor_cx *cx = NULL; -static int port22_en = 0; +static int port22_en; /* Module parameters */ -static int dont_scale_voltage; -static int ignore_latency = 0; +static int scale_voltage; +static int ignore_latency; #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) @@ -73,7 +80,6 @@ static int ignore_latency = 0; /* Clock ratios multiplied by 10 */ static int clock_ratio[32]; static int eblcr_table[32]; -static int voltage_table[32]; static unsigned int highest_speed, lowest_speed; /* kHz */ static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -163,6 +169,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; longhaul.bits.EnableSoftBusRatio = 1; + if (can_scale_voltage) { + longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; + longhaul.bits.EnableSoftVID = 1; + } + /* Sync to timer tick */ safe_halt(); /* Change frequency on next halt or sleep */ @@ -454,53 +465,57 @@ static int __init longhaul_get_ranges(void) static void __init longhaul_setup_voltagescaling(void) { union msr_longhaul longhaul; + struct mV_pos minvid, maxvid; + unsigned int j, speed, pos, kHz_step, numvscales; - rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); - - if (!(longhaul.bits.RevisionID & 1)) + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!(longhaul.bits.RevisionID & 1)) { + printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); return; + } - minvid = longhaul.bits.MinimumVID; - maxvid = longhaul.bits.MaximumVID; - vrmrev = longhaul.bits.VRMRev; + if (!longhaul.bits.VRMRev) { + printk (KERN_INFO PFX "VRM 8.5\n"); + vrm_mV_table = &vrm85_mV[0]; + mV_vrm_table = &mV_vrm85[0]; + } else { + printk (KERN_INFO PFX "Mobile VRM\n"); + vrm_mV_table = &mobilevrm_mV[0]; + mV_vrm_table = &mV_mobilevrm[0]; + } - if (minvid == 0 || maxvid == 0) { + minvid = vrm_mV_table[longhaul.bits.MinimumVID]; + maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; + numvscales = maxvid.pos - minvid.pos + 1; + kHz_step = (highest_speed - lowest_speed) / numvscales; + + if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " "Voltage scaling disabled.\n", - minvid/1000, minvid%1000, maxvid/1000, maxvid%1000); + minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); return; } - if (minvid == maxvid) { + if (minvid.mV == maxvid.mV) { printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " "both %d.%03d. Voltage scaling disabled\n", - maxvid/1000, maxvid%1000); + maxvid.mV/1000, maxvid.mV%1000); return; } - if (vrmrev==0) { - dprintk ("VRM 8.5\n"); - memcpy (voltage_table, vrm85scales, sizeof(voltage_table)); - numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25; - } else { - dprintk ("Mobile VRM\n"); - memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table)); - numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5; + printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", + maxvid.mV/1000, maxvid.mV%1000, + minvid.mV/1000, minvid.mV%1000, + numvscales); + + j = 0; + while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { + speed = longhaul_table[j].frequency; + pos = (speed - lowest_speed) / kHz_step + minvid.pos; + f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; + j++; } - /* Current voltage isn't readable at first, so we need to - set it to a known value. The spec says to use maxvid */ - longhaul.bits.RevisionKey = longhaul.bits.RevisionID; /* FIXME: This is bad. */ - longhaul.bits.EnableSoftVID = 1; - longhaul.bits.SoftVID = maxvid; - wrmsrl (MSR_VIA_LONGHAUL, longhaul.val); - - minvid = voltage_table[minvid]; - maxvid = voltage_table[maxvid]; - - dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n", - maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales); - can_scale_voltage = 1; } @@ -685,7 +700,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) return ret; if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && - (dont_scale_voltage==0)) + (scale_voltage != 0)) longhaul_setup_voltagescaling(); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; @@ -773,8 +788,8 @@ static void __exit longhaul_exit(void) kfree(longhaul_table); } -module_param (dont_scale_voltage, int, 0644); -MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor"); +module_param (scale_voltage, int, 0644); +MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(ignore_latency, int, 0644); MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index d3a95d77ee85..bc4682aad69b 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = { * Voltage scales. Div/Mod by 1000 to get actual voltage. * Which scale to use depends on the VRM type in use. */ -static int __initdata vrm85scales[32] = { - 1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700, - 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, - 1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725, - 1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325, + +struct mV_pos { + unsigned short mV; + unsigned short pos; }; -static int __initdata mobilevrmscales[32] = { - 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, - 1600, 1550, 1500, 1450, 1500, 1350, 1300, -1, - 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, - 1075, 1050, 1025, 1000, 975, 950, 925, -1, +static struct mV_pos __initdata vrm85_mV[32] = { + {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, + {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, + {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, + {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, + {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, + {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, + {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, + {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} +}; + +static unsigned char __initdata mV_vrm85[32] = { + 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, + 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, + 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, + 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 +}; + +static struct mV_pos __initdata mobilevrm_mV[32] = { + {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, + {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, + {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, + {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, + {975, 15}, {950, 14}, {925, 13}, {900, 12}, + {875, 11}, {850, 10}, {825, 9}, {800, 8}, + {775, 7}, {750, 6}, {725, 5}, {700, 4}, + {675, 3}, {650, 2}, {625, 1}, {600, 0} +}; + +static unsigned char __initdata mV_mobilevrm[32] = { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }; From 8adcc0c674004c0f9467031a93dc639c2b01411f Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 1 Sep 2006 14:02:24 -0700 Subject: [PATCH 0164/1063] [CPUFREQ] Workaround for BIOS bug in software coordination of frequency Some buggy BIOSes do a "software any" kind of coordination without telling about it to OS. So, when OS sets frequency on one CPU on these platforms, it will also impact all the other logical CPUs that are in the same power domain. Attached patch is a workaround for those buggy BIOSes. Patch should be a noop on the normal non-buggy platforms. Applies over previously sent acpi-cpufreq and software coordination bug fix patch Signed-off-by: Denis Sadykov Signed-off-by: Venkatesh Pallipadi Signed-off-by: Alexey Starikovskiy Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 39 ++++++++++++++++- .../kernel/cpu/cpufreq/speedstep-centrino.c | 42 ++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index e6ea00edcb54..ea19d091fd41 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -32,6 +32,7 @@ #include #include #include /* current */ +#include #include #include #include @@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void) return acpi_processor_preregister_performance(acpi_perf_data); } +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; + +static int __init sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + +static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; + static int acpi_cpufreq_cpu_init ( struct cpufreq_policy *policy) @@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init ( * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = perf->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index b77f1358bd79..dba6bb28d298 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -23,6 +23,7 @@ #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI #include +#include #include #endif @@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void) return 0; } + +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; +static int __init sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + + +static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; + + /* * centrino_cpu_init_acpi - register with ACPI P-States library * @@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) dprintk(PFX "obtaining ACPI data failed\n"); return -EIO; } + policy->shared_type = p->shared_type; /* * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = p->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif /* verify the acpi_data */ if (p->state_count <= 1) { From 65c25aadfa4e917060e99fe459f33a6a07db53cc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 6 Sep 2006 11:57:18 -0400 Subject: [PATCH 0165/1063] [AGPGART] Intel 965 Express support. From: Alan Hourihane From: Eric Anholt Signed-off-by: Dave Jones --- drivers/char/agp/intel-agp.c | 163 ++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 11 deletions(-) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 42a1cb871992..a425f27af9ea 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -2,14 +2,6 @@ * Intel AGPGART routines. */ -/* - * Intel(R) 855GM/852GM and 865G support added by David Dawes - * . - * - * Intel(R) 915G/915GM support added by Alan Hourihane - * . - */ - #include #include #include @@ -17,6 +9,21 @@ #include #include "agp.h" +#define PCI_DEVICE_ID_INTEL_82946GZ_HB 0x2970 +#define PCI_DEVICE_ID_INTEL_82946GZ_IG 0x2972 +#define PCI_DEVICE_ID_INTEL_82965G_1_HB 0x2980 +#define PCI_DEVICE_ID_INTEL_82965G_1_IG 0x2982 +#define PCI_DEVICE_ID_INTEL_82965Q_HB 0x2990 +#define PCI_DEVICE_ID_INTEL_82965Q_IG 0x2992 +#define PCI_DEVICE_ID_INTEL_82965G_HB 0x29A0 +#define PCI_DEVICE_ID_INTEL_82965G_IG 0x29A2 + +#define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB) + + /* Intel 815 register */ #define INTEL_815_APCONT 0x51 #define INTEL_815_ATTBASE_MASK ~0x1FFFFFFF @@ -40,6 +47,8 @@ #define I915_GMCH_GMS_STOLEN_48M (0x6 << 4) #define I915_GMCH_GMS_STOLEN_64M (0x7 << 4) +/* Intel 965G registers */ +#define I965_MSAC 0x62 /* Intel 7505 registers */ #define INTEL_I7505_APSIZE 0x74 @@ -354,6 +363,7 @@ static struct aper_size_info_fixed intel_i830_sizes[] = /* The 64M mode still requires a 128k gatt */ {64, 16384, 5}, {256, 65536, 6}, + {512, 131072, 7}, }; static struct _intel_i830_private { @@ -377,7 +387,11 @@ static void intel_i830_init_gtt_entries(void) /* We obtain the size of the GTT, which is also stored (for some * reason) at the top of stolen memory. Then we add 4KB to that * for the video BIOS popup, which is also stored in there. */ - size = agp_bridge->driver->fetch_size() + 4; + + if (IS_I965) + size = 512 + 4; + else + size = agp_bridge->driver->fetch_size() + 4; if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) { @@ -423,7 +437,7 @@ static void intel_i830_init_gtt_entries(void) if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB) + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965 ) gtt_entries = MB(48) - KB(size); else gtt_entries = 0; @@ -433,7 +447,7 @@ static void intel_i830_init_gtt_entries(void) if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB) + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965) gtt_entries = MB(64) - KB(size); else gtt_entries = 0; @@ -791,6 +805,77 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge) return 0; } +static int intel_i965_fetch_size(void) +{ + struct aper_size_info_fixed *values; + u32 offset = 0; + u8 temp; + +#define I965_512MB_ADDRESS_MASK (3<<1) + + values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes); + + pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp); + temp &= I965_512MB_ADDRESS_MASK; + switch (temp) { + case 0x00: + offset = 0; /* 128MB */ + break; + case 0x06: + offset = 3; /* 512MB */ + break; + default: + case 0x02: + offset = 2; /* 256MB */ + break; + } + + agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset); + + return values[offset].size; +} + +/* The intel i965 automatically initializes the agp aperture during POST. ++ * Use the memory already set aside for in the GTT. ++ */ +static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) +{ + int page_order; + struct aper_size_info_fixed *size; + int num_entries; + u32 temp; + + size = agp_bridge->current_size; + page_order = size->page_order; + num_entries = size->num_entries; + agp_bridge->gatt_table_real = NULL; + + pci_read_config_dword(intel_i830_private.i830_dev, I915_MMADDR, &temp); + + temp &= 0xfff00000; + intel_i830_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024); + + if (!intel_i830_private.gtt) + return -ENOMEM; + + + intel_i830_private.registers = ioremap(temp,128 * 4096); + if (!intel_i830_private.registers) + return -ENOMEM; + + temp = readl(intel_i830_private.registers+I810_PGETBL_CTL) & 0xfffff000; + global_cache_flush(); /* FIXME: ? */ + + /* we have to call this as early as possible after the MMIO base address is known */ + intel_i830_init_gtt_entries(); + + agp_bridge->gatt_table = NULL; + + agp_bridge->gatt_bus_addr = temp; + + return 0; +} + static int intel_fetch_size(void) { @@ -1489,6 +1574,29 @@ static struct agp_bridge_driver intel_915_driver = { .agp_destroy_page = agp_generic_destroy_page, }; +static struct agp_bridge_driver intel_i965_driver = { + .owner = THIS_MODULE, + .aperture_sizes = intel_i830_sizes, + .size_type = FIXED_APER_SIZE, + .num_aperture_sizes = 4, + .needs_scratch_page = TRUE, + .configure = intel_i915_configure, + .fetch_size = intel_i965_fetch_size, + .cleanup = intel_i915_cleanup, + .tlb_flush = intel_i810_tlbflush, + .mask_memory = intel_i810_mask_memory, + .masks = intel_i810_masks, + .agp_enable = intel_i810_agp_enable, + .cache_flush = global_cache_flush, + .create_gatt_table = intel_i965_create_gatt_table, + .free_gatt_table = intel_i830_free_gatt_table, + .insert_memory = intel_i915_insert_entries, + .remove_memory = intel_i915_remove_entries, + .alloc_by_type = intel_i830_alloc_by_type, + .free_by_type = intel_i810_free_by_type, + .agp_alloc_page = agp_generic_alloc_page, + .agp_destroy_page = agp_generic_destroy_page, +}; static struct agp_bridge_driver intel_7505_driver = { .owner = THIS_MODULE, @@ -1684,6 +1792,35 @@ static int __devinit agp_intel_probe(struct pci_dev *pdev, bridge->driver = &intel_845_driver; name = "945GM"; break; + case PCI_DEVICE_ID_INTEL_82946GZ_HB: + if (find_i830(PCI_DEVICE_ID_INTEL_82946GZ_IG)) + bridge->driver = &intel_i965_driver; + else + bridge->driver = &intel_845_driver; + name = "946GZ"; + break; + case PCI_DEVICE_ID_INTEL_82965G_1_HB: + if (find_i830(PCI_DEVICE_ID_INTEL_82965G_1_IG)) + bridge->driver = &intel_i965_driver; + else + bridge->driver = &intel_845_driver; + name = "965G"; + break; + case PCI_DEVICE_ID_INTEL_82965Q_HB: + if (find_i830(PCI_DEVICE_ID_INTEL_82965Q_IG)) + bridge->driver = &intel_i965_driver; + else + bridge->driver = &intel_845_driver; + name = "965Q"; + break; + case PCI_DEVICE_ID_INTEL_82965G_HB: + if (find_i830(PCI_DEVICE_ID_INTEL_82965G_IG)) + bridge->driver = &intel_i965_driver; + else + bridge->driver = &intel_845_driver; + name = "965G"; + break; + case PCI_DEVICE_ID_INTEL_7505_0: bridge->driver = &intel_7505_driver; name = "E7505"; @@ -1827,6 +1964,10 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_82915GM_HB), ID(PCI_DEVICE_ID_INTEL_82945G_HB), ID(PCI_DEVICE_ID_INTEL_82945GM_HB), + ID(PCI_DEVICE_ID_INTEL_82946GZ_HB), + ID(PCI_DEVICE_ID_INTEL_82965G_1_HB), + ID(PCI_DEVICE_ID_INTEL_82965Q_HB), + ID(PCI_DEVICE_ID_INTEL_82965G_HB), { } }; From c14635eb4e591c61e419c065df1fdacf9ff90c00 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 6 Sep 2006 11:59:35 -0400 Subject: [PATCH 0166/1063] [AGPGART] Fix number of aperture sizes in 830 gart structs. Spotted by Eric Anholt. Signed-off-by: Dave Jones --- drivers/char/agp/intel-agp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index a425f27af9ea..42c7d8dec635 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -1392,7 +1392,7 @@ static struct agp_bridge_driver intel_830_driver = { .owner = THIS_MODULE, .aperture_sizes = intel_i830_sizes, .size_type = FIXED_APER_SIZE, - .num_aperture_sizes = 3, + .num_aperture_sizes = 4, .needs_scratch_page = TRUE, .configure = intel_i830_configure, .fetch_size = intel_i830_fetch_size, @@ -1554,7 +1554,7 @@ static struct agp_bridge_driver intel_915_driver = { .owner = THIS_MODULE, .aperture_sizes = intel_i830_sizes, .size_type = FIXED_APER_SIZE, - .num_aperture_sizes = 3, + .num_aperture_sizes = 4, .needs_scratch_page = TRUE, .configure = intel_i915_configure, .fetch_size = intel_i915_fetch_size, From 884d25cc4fda20908fd4ef93dbb41d817984b68b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 5 Sep 2006 16:26:41 -0500 Subject: [PATCH 0167/1063] [SCSI] Fix refcount breakage with 'echo "1" > scan' when target already present Spotted by: Dan Aloni The problem is there's inconsistent locking semantic usage of scsi_alloc_target(). Two callers assume the target comes back with reference unincremented and the third assumes its incremented. Fix by always making the reference incremented on return. Also fix path in target alloc that could consistently increment the parent lock. Finally document scsi_alloc_target() so its callers know what the expectations are. Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 31d05ab0b2fc..fd9e281c3bfe 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -266,6 +266,18 @@ static struct scsi_target *__scsi_find_target(struct device *parent, return found_starget; } +/** + * scsi_alloc_target - allocate a new or find an existing target + * @parent: parent of the target (need not be a scsi host) + * @channel: target channel number (zero if no channels) + * @id: target id number + * + * Return an existing target if one exists, provided it hasn't already + * gone into STARGET_DEL state, otherwise allocate a new target. + * + * The target is returned with an incremented reference, so the caller + * is responsible for both reaping and doing a last put + */ static struct scsi_target *scsi_alloc_target(struct device *parent, int channel, uint id) { @@ -331,14 +343,15 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, return NULL; } } + get_device(dev); return starget; found: found_target->reap_ref++; spin_unlock_irqrestore(shost->host_lock, flags); - put_device(parent); if (found_target->state != STARGET_DEL) { + put_device(parent); kfree(starget); return found_target; } @@ -1341,7 +1354,6 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel, if (!starget) return ERR_PTR(-ENOMEM); - get_device(&starget->dev); mutex_lock(&shost->scan_mutex); if (scsi_host_scan_allowed(shost)) scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata); @@ -1400,7 +1412,6 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel, if (!starget) return; - get_device(&starget->dev); if (lun != SCAN_WILD_CARD) { /* * Scan for a specific host/chan/id/lun. @@ -1582,7 +1593,8 @@ struct scsi_device *scsi_get_host_dev(struct Scsi_Host *shost) if (sdev) { sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->borken = 0; - } + } else + scsi_target_reap(starget); put_device(&starget->dev); out: mutex_unlock(&shost->scan_mutex); From f479ab87936563a286b8aa0e39003c40fa31c6da Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Sep 2006 09:00:29 -0500 Subject: [PATCH 0168/1063] [SCSI] fix up non-modular SCSI The recent change to the way scsi_device_get()/put() work broke the non modular build (we do a module_refcount on a NULL). Fix this by checking for non-null before checking module_refcount(). Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index eedfd059b82b..c35f5fc0d668 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -873,10 +873,12 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { + struct module *module = sdev->host->hostt->module; + /* The module refcount will be zero if scsi_device_get() * was called from a module removal routine */ - if (likely(module_refcount(sdev->host->hostt->module) != 0)) - module_put(sdev->host->hostt->module); + if (module && module_refcount(module) != 0) + module_put(module); put_device(&sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_device_put); From b4620233d6a3510564c561a5a2a365a1d8a34b68 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Wed, 6 Sep 2006 10:49:48 +0200 Subject: [PATCH 0169/1063] [SCSI] scsi-driver ultrastore replace Scsi_Cmnd with struct scsi_cmnd Signed-off-by: Henrik Kretzschmar Signed-off-by: James Bottomley --- drivers/scsi/ultrastor.c | 23 ++++++++++++----------- drivers/scsi/ultrastor.h | 12 +++++++----- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c index e681681ab7a2..0372aa9fa190 100644 --- a/drivers/scsi/ultrastor.c +++ b/drivers/scsi/ultrastor.c @@ -196,8 +196,8 @@ struct mscp { u32 sense_data PACKED; /* The following fields are for software only. They are included in the MSCP structure because they are associated with SCSI requests. */ - void (*done)(Scsi_Cmnd *); - Scsi_Cmnd *SCint; + void (*done) (struct scsi_cmnd *); + struct scsi_cmnd *SCint; ultrastor_sg_list sglist[ULTRASTOR_24F_MAX_SG]; /* use larger size for 24F */ }; @@ -289,7 +289,7 @@ static const unsigned short ultrastor_ports_14f[] = { static void ultrastor_interrupt(int, void *, struct pt_regs *); static irqreturn_t do_ultrastor_interrupt(int, void *, struct pt_regs *); -static inline void build_sg_list(struct mscp *, Scsi_Cmnd *SCpnt); +static inline void build_sg_list(struct mscp *, struct scsi_cmnd *SCpnt); /* Always called with host lock held */ @@ -673,7 +673,7 @@ static const char *ultrastor_info(struct Scsi_Host * shpnt) return buf; } -static inline void build_sg_list(struct mscp *mscp, Scsi_Cmnd *SCpnt) +static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt) { struct scatterlist *sl; long transfer_length = 0; @@ -694,7 +694,8 @@ static inline void build_sg_list(struct mscp *mscp, Scsi_Cmnd *SCpnt) mscp->transfer_data_length = transfer_length; } -static int ultrastor_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) +static int ultrastor_queuecommand(struct scsi_cmnd *SCpnt, + void (*done) (struct scsi_cmnd *)) { struct mscp *my_mscp; #if ULTRASTOR_MAX_CMDS > 1 @@ -833,7 +834,7 @@ retry: */ -static int ultrastor_abort(Scsi_Cmnd *SCpnt) +static int ultrastor_abort(struct scsi_cmnd *SCpnt) { #if ULTRASTOR_DEBUG & UD_ABORT char out[108]; @@ -843,7 +844,7 @@ static int ultrastor_abort(Scsi_Cmnd *SCpnt) unsigned int mscp_index; unsigned char old_aborted; unsigned long flags; - void (*done)(Scsi_Cmnd *); + void (*done)(struct scsi_cmnd *); struct Scsi_Host *host = SCpnt->device->host; if(config.slot) @@ -960,7 +961,7 @@ static int ultrastor_abort(Scsi_Cmnd *SCpnt) return SUCCESS; } -static int ultrastor_host_reset(Scsi_Cmnd * SCpnt) +static int ultrastor_host_reset(struct scsi_cmnd * SCpnt) { unsigned long flags; int i; @@ -1045,8 +1046,8 @@ static void ultrastor_interrupt(int irq, void *dev_id, struct pt_regs *regs) unsigned int mscp_index; #endif struct mscp *mscp; - void (*done)(Scsi_Cmnd *); - Scsi_Cmnd *SCtmp; + void (*done) (struct scsi_cmnd *); + struct scsi_cmnd *SCtmp; #if ULTRASTOR_MAX_CMDS == 1 mscp = &config.mscp[0]; @@ -1079,7 +1080,7 @@ static void ultrastor_interrupt(int irq, void *dev_id, struct pt_regs *regs) return; } if (icm_status == 3) { - void (*done)(Scsi_Cmnd *) = mscp->done; + void (*done)(struct scsi_cmnd *) = mscp->done; if (done) { mscp->done = NULL; mscp->SCint->result = DID_ABORT << 16; diff --git a/drivers/scsi/ultrastor.h b/drivers/scsi/ultrastor.h index da759a11deff..a692905f95f7 100644 --- a/drivers/scsi/ultrastor.h +++ b/drivers/scsi/ultrastor.h @@ -14,11 +14,13 @@ #define _ULTRASTOR_H static int ultrastor_detect(struct scsi_host_template *); -static const char *ultrastor_info(struct Scsi_Host * shpnt); -static int ultrastor_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -static int ultrastor_abort(Scsi_Cmnd *); -static int ultrastor_host_reset(Scsi_Cmnd *); -static int ultrastor_biosparam(struct scsi_device *, struct block_device *, sector_t, int *); +static const char *ultrastor_info(struct Scsi_Host *shpnt); +static int ultrastor_queuecommand(struct scsi_cmnd *, + void (*done)(struct scsi_cmnd *)); +static int ultrastor_abort(struct scsi_cmnd *); +static int ultrastor_host_reset(struct scsi_cmnd *); +static int ultrastor_biosparam(struct scsi_device *, struct block_device *, + sector_t, int *); #define ULTRASTOR_14F_MAX_SG 16 From 88edf74610bd894b93438f389688bc8b4a2d3414 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Sep 2006 17:36:13 -0500 Subject: [PATCH 0170/1063] [SCSI] SAS: consolidate linkspeed definitions At the moment we have two separate linkspeed enumerations covering roughly the same values. This patch consolidates on a single one enum sas_linkspeed in scsi_transport_sas.h and uses it everywhere in the aic94xx driver. Eventually I'll get around to removing the duplicated fields in asd_sas_phy and sas_phy ... Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_hwi.c | 2 +- drivers/scsi/aic94xx/aic94xx_init.c | 12 ++++++++---- drivers/scsi/aic94xx/aic94xx_scb.c | 26 +++++++++++++------------- drivers/scsi/libsas/sas_expander.c | 27 +++++++-------------------- drivers/scsi/libsas/sas_internal.h | 2 +- include/scsi/libsas.h | 14 +++++++------- include/scsi/sas.h | 14 -------------- include/scsi/scsi_transport_sas.h | 26 +++++++++++++++++--------- 8 files changed, 54 insertions(+), 69 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c index 075cea85b56b..a24201351108 100644 --- a/drivers/scsi/aic94xx/aic94xx_hwi.c +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -96,7 +96,7 @@ static int asd_init_phy(struct asd_phy *phy) sas_phy->type = PHY_TYPE_PHYSICAL; sas_phy->role = PHY_ROLE_INITIATOR; sas_phy->oob_mode = OOB_NOT_CONNECTED; - sas_phy->linkrate = PHY_LINKRATE_NONE; + sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN; phy->id_frm_tok = asd_alloc_coherent(asd_ha, sizeof(*phy->identify_frame), diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 69aa70887530..302b54fddf3c 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -240,10 +240,14 @@ static int __devinit asd_common_setup(struct asd_ha_struct *asd_ha) /* All phys are enabled, by default. */ asd_ha->hw_prof.enabled_phys = 0xFF; for (i = 0; i < ASD_MAX_PHYS; i++) { - asd_ha->hw_prof.phy_desc[i].max_sas_lrate = PHY_LINKRATE_3; - asd_ha->hw_prof.phy_desc[i].min_sas_lrate = PHY_LINKRATE_1_5; - asd_ha->hw_prof.phy_desc[i].max_sata_lrate= PHY_LINKRATE_1_5; - asd_ha->hw_prof.phy_desc[i].min_sata_lrate= PHY_LINKRATE_1_5; + asd_ha->hw_prof.phy_desc[i].max_sas_lrate = + SAS_LINK_RATE_3_0_GBPS; + asd_ha->hw_prof.phy_desc[i].min_sas_lrate = + SAS_LINK_RATE_1_5_GBPS; + asd_ha->hw_prof.phy_desc[i].max_sata_lrate = + SAS_LINK_RATE_1_5_GBPS; + asd_ha->hw_prof.phy_desc[i].min_sata_lrate = + SAS_LINK_RATE_1_5_GBPS; } return 0; diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index fc1b7438a913..ef8ca08b545f 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -55,15 +55,15 @@ static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode) switch (oob_mode & 7) { case PHY_SPEED_60: /* FIXME: sas transport class doesn't have this */ - phy->sas_phy.linkrate = PHY_LINKRATE_6; + phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS; phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; break; case PHY_SPEED_30: - phy->sas_phy.linkrate = PHY_LINKRATE_3; + phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS; phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; break; case PHY_SPEED_15: - phy->sas_phy.linkrate = PHY_LINKRATE_1_5; + phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS; phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; break; } @@ -540,39 +540,39 @@ static inline void set_speed_mask(u8 *speed_mask, struct asd_phy_desc *pd) | SATA_SPEED_30_DIS | SATA_SPEED_15_DIS; switch (pd->max_sas_lrate) { - case PHY_LINKRATE_6: + case SAS_LINK_RATE_6_0_GBPS: *speed_mask &= ~SAS_SPEED_60_DIS; default: - case PHY_LINKRATE_3: + case SAS_LINK_RATE_3_0_GBPS: *speed_mask &= ~SAS_SPEED_30_DIS; - case PHY_LINKRATE_1_5: + case SAS_LINK_RATE_1_5_GBPS: *speed_mask &= ~SAS_SPEED_15_DIS; } switch (pd->min_sas_lrate) { - case PHY_LINKRATE_6: + case SAS_LINK_RATE_6_0_GBPS: *speed_mask |= SAS_SPEED_30_DIS; - case PHY_LINKRATE_3: + case SAS_LINK_RATE_3_0_GBPS: *speed_mask |= SAS_SPEED_15_DIS; default: - case PHY_LINKRATE_1_5: + case SAS_LINK_RATE_1_5_GBPS: /* nothing to do */ ; } switch (pd->max_sata_lrate) { - case PHY_LINKRATE_3: + case SAS_LINK_RATE_3_0_GBPS: *speed_mask &= ~SATA_SPEED_30_DIS; default: - case PHY_LINKRATE_1_5: + case SAS_LINK_RATE_1_5_GBPS: *speed_mask &= ~SATA_SPEED_15_DIS; } switch (pd->min_sata_lrate) { - case PHY_LINKRATE_3: + case SAS_LINK_RATE_3_0_GBPS: *speed_mask |= SATA_SPEED_15_DIS; default: - case PHY_LINKRATE_1_5: + case SAS_LINK_RATE_1_5_GBPS: /* nothing to do */ ; } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index b653a263f76a..02e796ee027e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -191,20 +191,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; - switch (phy->linkrate) { - case PHY_LINKRATE_1_5: - phy->phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; - break; - case PHY_LINKRATE_3: - phy->phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; - break; - case PHY_LINKRATE_6: - phy->phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS; - break; - default: - phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; - break; - } + phy->phy->negotiated_linkrate = phy->linkrate; if (!rediscover) sas_phy_add(phy->phy); @@ -450,7 +437,7 @@ static void sas_ex_disable_phy(struct domain_device *dev, int phy_id) struct ex_phy *phy = &ex->ex_phy[phy_id]; sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE); - phy->linkrate = PHY_DISABLED; + phy->linkrate = SAS_PHY_DISABLED; } static void sas_ex_disable_port(struct domain_device *dev, u8 *sas_addr) @@ -743,7 +730,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) int res = 0; /* Phy state */ - if (ex_phy->linkrate == PHY_SPINUP_HOLD) { + if (ex_phy->linkrate == SAS_SATA_SPINUP_HOLD) { if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET)) res = sas_ex_phy_discover(dev, phy_id); if (res) @@ -773,7 +760,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) sas_configure_routing(dev, ex_phy->attached_sas_addr); } return 0; - } else if (ex_phy->linkrate == PHY_LINKRATE_UNKNOWN) + } else if (ex_phy->linkrate == SAS_LINK_RATE_UNKNOWN) return 0; if (ex_phy->attached_dev_type != SAS_END_DEV && @@ -922,9 +909,9 @@ static int sas_ex_discover_devices(struct domain_device *dev, int single) continue; switch (ex_phy->linkrate) { - case PHY_DISABLED: - case PHY_RESET_PROBLEM: - case PHY_PORT_SELECTOR: + case SAS_PHY_DISABLED: + case SAS_PHY_RESET_PROBLEM: + case SAS_SATA_PORT_SELECTOR: continue; default: res = sas_ex_discover_dev(dev, i); diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 89c397680846..0d69ede4b944 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -43,7 +43,7 @@ void sas_scsi_recover_host(struct Scsi_Host *shost); int sas_show_class(enum sas_class class, char *buf); int sas_show_proto(enum sas_proto proto, char *buf); -int sas_show_linkrate(enum sas_phy_linkrate linkrate, char *buf); +int sas_show_linkrate(enum sas_linkrate linkrate, char *buf); int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf); int sas_register_phys(struct sas_ha_struct *sas_ha); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 72acdabe7f80..8d91313dd888 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -114,7 +114,7 @@ struct ex_phy { enum ex_phy_state phy_state; enum sas_dev_type attached_dev_type; - enum sas_phy_linkrate linkrate; + enum sas_linkrate linkrate; u8 attached_sata_host:1; u8 attached_sata_dev:1; @@ -170,9 +170,9 @@ struct sata_device { struct domain_device { enum sas_dev_type dev_type; - enum sas_phy_linkrate linkrate; - enum sas_phy_linkrate min_linkrate; - enum sas_phy_linkrate max_linkrate; + enum sas_linkrate linkrate; + enum sas_linkrate min_linkrate; + enum sas_linkrate max_linkrate; int pathways; @@ -220,7 +220,7 @@ struct asd_sas_port { struct domain_device *port_dev; spinlock_t dev_list_lock; struct list_head dev_list; - enum sas_phy_linkrate linkrate; + enum sas_linkrate linkrate; struct sas_phy *phy; struct work_struct work; @@ -276,7 +276,7 @@ struct asd_sas_phy { enum sas_phy_type type; enum sas_phy_role role; enum sas_oob_mode oob_mode; - enum sas_phy_linkrate linkrate; + enum sas_linkrate linkrate; u8 *sas_addr; /* must be set */ u8 attached_sas_addr[SAS_ADDR_SIZE]; /* class:RO, driver: R/W */ @@ -368,7 +368,7 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr); static inline void sas_phy_disconnected(struct asd_sas_phy *phy) { phy->oob_mode = OOB_NOT_CONNECTED; - phy->linkrate = PHY_LINKRATE_NONE; + phy->linkrate = SAS_LINK_RATE_UNKNOWN; } /* ---------- Tasks ---------- */ diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 752853a113dc..9c8a5b91ae64 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -102,20 +102,6 @@ enum sas_dev_type { SATA_PM_PORT= 8, }; -enum sas_phy_linkrate { - PHY_LINKRATE_NONE = 0, - PHY_LINKRATE_UNKNOWN = 0, - PHY_DISABLED, - PHY_RESET_PROBLEM, - PHY_SPINUP_HOLD, - PHY_PORT_SELECTOR, - PHY_LINKRATE_1_5 = 0x08, - PHY_LINKRATE_G1 = PHY_LINKRATE_1_5, - PHY_LINKRATE_3 = 0x09, - PHY_LINKRATE_G2 = PHY_LINKRATE_3, - PHY_LINKRATE_6 = 0x0A, -}; - /* Partly from IDENTIFY address frame. */ enum sas_proto { SATA_PROTO = 1, diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index eeb2200de855..87de518960c1 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -24,15 +24,23 @@ enum sas_protocol { }; enum sas_linkrate { - SAS_LINK_RATE_UNKNOWN, - SAS_PHY_DISABLED, - SAS_LINK_RATE_FAILED, - SAS_SATA_SPINUP_HOLD, - SAS_SATA_PORT_SELECTOR, - SAS_LINK_RATE_1_5_GBPS, - SAS_LINK_RATE_3_0_GBPS, - SAS_LINK_RATE_6_0_GBPS, - SAS_LINK_VIRTUAL, + /* These Values are defined in the SAS standard */ + SAS_LINK_RATE_UNKNOWN = 0, + SAS_PHY_DISABLED = 1, + SAS_PHY_RESET_PROBLEM = 2, + SAS_SATA_SPINUP_HOLD = 3, + SAS_SATA_PORT_SELECTOR = 4, + SAS_PHY_RESET_IN_PROGRESS = 5, + SAS_LINK_RATE_1_5_GBPS = 8, + SAS_LINK_RATE_G1 = SAS_LINK_RATE_1_5_GBPS, + SAS_LINK_RATE_3_0_GBPS = 9, + SAS_LINK_RATE_G2 = SAS_LINK_RATE_3_0_GBPS, + SAS_LINK_RATE_6_0_GBPS = 10, + /* These are virtual to the transport class and may never + * be signalled normally since the standard defined field + * is only 4 bits */ + SAS_LINK_RATE_FAILED = 0x10, + SAS_PHY_VIRTUAL = 0x11, }; struct sas_identify { From d24e1eeb3a16e4944288c2f3bf082e1513f4b425 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Sep 2006 19:25:22 -0500 Subject: [PATCH 0171/1063] [SCSI] scsi_transport_sas: make minimum and maximum linkrate settable quantities According to SPEC, the minimum_linkrate and maximum_linkrate should be settable by the user. This patch introduces a callback that allows the sas class to pass these settings on to the driver. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_sas.c | 73 ++++++++++++++++++++++++++++--- include/scsi/scsi_transport_sas.h | 6 +++ 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index d518c1207fb4..b5b0c2cba96b 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -77,6 +77,24 @@ get_sas_##title##_names(u32 table_key, char *buf) \ return len; \ } +#define sas_bitfield_name_set(title, table) \ +static ssize_t \ +set_sas_##title##_names(u32 *table_key, const char *buf) \ +{ \ + ssize_t len = 0; \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(table); i++) { \ + len = strlen(table[i].name); \ + if (strncmp(buf, table[i].name, len) == 0 && \ + (buf[len] == '\n' || buf[len] == '\0')) { \ + *table_key = table[i].value; \ + return 0; \ + } \ + } \ + return -EINVAL; \ +} + #define sas_bitfield_name_search(title, table) \ static ssize_t \ get_sas_##title##_names(u32 table_key, char *buf) \ @@ -131,7 +149,7 @@ static struct { { SAS_LINK_RATE_6_0_GBPS, "6.0 Gbit" }, }; sas_bitfield_name_search(linkspeed, sas_linkspeed_names) - +sas_bitfield_name_set(linkspeed, sas_linkspeed_names) /* * SAS host attributes @@ -253,10 +271,39 @@ show_sas_phy_##field(struct class_device *cdev, char *buf) \ return get_sas_linkspeed_names(phy->field, buf); \ } +/* Fudge to tell if we're minimum or maximum */ +#define sas_phy_store_linkspeed(field) \ +static ssize_t \ +store_sas_phy_##field(struct class_device *cdev, const char *buf, \ + size_t count) \ +{ \ + struct sas_phy *phy = transport_class_to_phy(cdev); \ + struct Scsi_Host *shost = dev_to_shost(phy->dev.parent); \ + struct sas_internal *i = to_sas_internal(shost->transportt); \ + u32 value; \ + struct sas_phy_linkrates rates = {0}; \ + int error; \ + \ + error = set_sas_linkspeed_names(&value, buf); \ + if (error) \ + return error; \ + rates.field = value; \ + error = i->f->set_phy_speed(phy, &rates); \ + \ + return error ? error : count; \ +} + +#define sas_phy_linkspeed_rw_attr(field) \ + sas_phy_show_linkspeed(field) \ + sas_phy_store_linkspeed(field) \ +static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field, \ + store_sas_phy_##field) + #define sas_phy_linkspeed_attr(field) \ sas_phy_show_linkspeed(field) \ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field, NULL) + #define sas_phy_show_linkerror(field) \ static ssize_t \ show_sas_phy_##field(struct class_device *cdev, char *buf) \ @@ -326,9 +373,9 @@ sas_phy_simple_attr(identify.phy_identifier, phy_identifier, "%d\n", u8); //sas_phy_simple_attr(port_identifier, port_identifier, "%d\n", int); sas_phy_linkspeed_attr(negotiated_linkrate); sas_phy_linkspeed_attr(minimum_linkrate_hw); -sas_phy_linkspeed_attr(minimum_linkrate); +sas_phy_linkspeed_rw_attr(minimum_linkrate); sas_phy_linkspeed_attr(maximum_linkrate_hw); -sas_phy_linkspeed_attr(maximum_linkrate); +sas_phy_linkspeed_rw_attr(maximum_linkrate); sas_phy_linkerror_attr(invalid_dword_count); sas_phy_linkerror_attr(running_disparity_error_count); sas_phy_linkerror_attr(loss_of_dword_sync_count); @@ -1310,13 +1357,23 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, * Setup / Teardown code */ -#define SETUP_TEMPLATE(attrb, field, perm, test) \ +#define SETUP_TEMPLATE(attrb, field, perm, test) \ i->private_##attrb[count] = class_device_attr_##field; \ i->private_##attrb[count].attr.mode = perm; \ i->attrb[count] = &i->private_##attrb[count]; \ if (test) \ count++ +#define SETUP_TEMPLATE_RW(attrb, field, perm, test, ro_test, ro_perm) \ + i->private_##attrb[count] = class_device_attr_##field; \ + i->private_##attrb[count].attr.mode = perm; \ + if (ro_test) { \ + i->private_##attrb[count].attr.mode = ro_perm; \ + i->private_##attrb[count].store = NULL; \ + } \ + i->attrb[count] = &i->private_##attrb[count]; \ + if (test) \ + count++ #define SETUP_RPORT_ATTRIBUTE(field) \ SETUP_TEMPLATE(rphy_attrs, field, S_IRUGO, 1) @@ -1327,6 +1384,10 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel, #define SETUP_PHY_ATTRIBUTE(field) \ SETUP_TEMPLATE(phy_attrs, field, S_IRUGO, 1) +#define SETUP_PHY_ATTRIBUTE_RW(field) \ + SETUP_TEMPLATE_RW(phy_attrs, field, S_IRUGO | S_IWUSR, 1, \ + !i->f->set_phy_speed, S_IRUGO) + #define SETUP_PORT_ATTRIBUTE(field) \ SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1) @@ -1407,9 +1468,9 @@ sas_attach_transport(struct sas_function_template *ft) //SETUP_PHY_ATTRIBUTE(port_identifier); SETUP_PHY_ATTRIBUTE(negotiated_linkrate); SETUP_PHY_ATTRIBUTE(minimum_linkrate_hw); - SETUP_PHY_ATTRIBUTE(minimum_linkrate); + SETUP_PHY_ATTRIBUTE_RW(minimum_linkrate); SETUP_PHY_ATTRIBUTE(maximum_linkrate_hw); - SETUP_PHY_ATTRIBUTE(maximum_linkrate); + SETUP_PHY_ATTRIBUTE_RW(maximum_linkrate); SETUP_PHY_ATTRIBUTE(invalid_dword_count); SETUP_PHY_ATTRIBUTE(running_disparity_error_count); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 87de518960c1..53024377f3b8 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -150,12 +150,18 @@ struct sas_port { #define transport_class_to_sas_port(cdev) \ dev_to_sas_port((cdev)->dev) +struct sas_phy_linkrates { + enum sas_linkrate maximum_linkrate; + enum sas_linkrate minimum_linkrate; +}; + /* The functions by which the transport class and the driver communicate */ struct sas_function_template { int (*get_linkerrors)(struct sas_phy *); int (*get_enclosure_identifier)(struct sas_rphy *, u64 *); int (*get_bay_identifier)(struct sas_rphy *); int (*phy_reset)(struct sas_phy *, int); + int (*set_phy_speed)(struct sas_phy *, struct sas_phy_linkrates *); }; From a01e70e570a72b8a8c9a58062e4f5bdcd3986222 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Sep 2006 19:28:07 -0500 Subject: [PATCH 0172/1063] [SCSI] aci94xx: implement link rate setting This patch implements the ability to set the minimum and maximum linkrates for both libsas (for expanders) and aic94xx (for the host phys). It also tidies up the setting of the hardware min and max to make sure they're updated when the expander emits a change broadcast. Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx.h | 2 +- drivers/scsi/aic94xx/aic94xx_scb.c | 30 ++++++++++++++++++-- drivers/scsi/libsas/sas_expander.c | 20 +++++++++----- drivers/scsi/libsas/sas_init.c | 44 ++++++++++++++++++++++++++++-- drivers/scsi/libsas/sas_internal.h | 2 +- drivers/scsi/libsas/sas_phy.c | 15 +++++----- include/scsi/libsas.h | 2 +- include/scsi/sas.h | 1 + 8 files changed, 95 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h index cb7caf1c9ce1..1bd5b4ecf3d5 100644 --- a/drivers/scsi/aic94xx/aic94xx.h +++ b/drivers/scsi/aic94xx/aic94xx.h @@ -109,6 +109,6 @@ int asd_clear_nexus_port(struct asd_sas_port *port); int asd_clear_nexus_ha(struct sas_ha_struct *sas_ha); /* ---------- Phy Management ---------- */ -int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func); +int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg); #endif diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index ef8ca08b545f..7ee49b51b724 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -52,6 +52,8 @@ static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode) { + struct sas_phy *sas_phy = phy->sas_phy.phy; + switch (oob_mode & 7) { case PHY_SPEED_60: /* FIXME: sas transport class doesn't have this */ @@ -67,6 +69,12 @@ static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode) phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; break; } + sas_phy->negotiated_linkrate = phy->sas_phy.linkrate; + sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; + sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + sas_phy->maximum_linkrate = phy->phy_desc->max_sas_lrate; + sas_phy->minimum_linkrate = phy->phy_desc->min_sas_lrate; + if (oob_mode & SAS_MODE) phy->sas_phy.oob_mode = SAS_OOB_MODE; else if (oob_mode & SATA_MODE) @@ -710,14 +718,32 @@ static const int phy_func_table[] = { [PHY_FUNC_RELEASE_SPINUP_HOLD] = RELEASE_SPINUP_HOLD, }; -int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func) +int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg) { struct asd_ha_struct *asd_ha = phy->ha->lldd_ha; + struct asd_phy_desc *pd = asd_ha->phys[phy->id].phy_desc; struct asd_ascb *ascb; + struct sas_phy_linkrates *rates; int res = 1; - if (func == PHY_FUNC_CLEAR_ERROR_LOG) + switch (func) { + case PHY_FUNC_CLEAR_ERROR_LOG: return -ENOSYS; + case PHY_FUNC_SET_LINK_RATE: + rates = arg; + if (rates->minimum_linkrate) { + pd->min_sas_lrate = rates->minimum_linkrate; + pd->min_sata_lrate = rates->minimum_linkrate; + } + if (rates->maximum_linkrate) { + pd->max_sas_lrate = rates->maximum_linkrate; + pd->max_sata_lrate = rates->maximum_linkrate; + } + func = PHY_FUNC_LINK_RESET; + break; + default: + break; + } ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); if (!ascb) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 02e796ee027e..30b8014bcc7a 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -187,10 +187,10 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, phy->phy->identify.initiator_port_protocols = phy->attached_iproto; phy->phy->identify.target_port_protocols = phy->attached_tproto; phy->phy->identify.phy_identifier = phy_id; - phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; - phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; - phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; - phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; + phy->phy->minimum_linkrate_hw = dr->hmin_linkrate; + phy->phy->maximum_linkrate_hw = dr->hmax_linkrate; + phy->phy->minimum_linkrate = dr->pmin_linkrate; + phy->phy->maximum_linkrate = dr->pmax_linkrate; phy->phy->negotiated_linkrate = phy->linkrate; if (!rediscover) @@ -404,7 +404,8 @@ out: #define PC_RESP_SIZE 8 int sas_smp_phy_control(struct domain_device *dev, int phy_id, - enum phy_func phy_func) + enum phy_func phy_func, + struct sas_phy_linkrates *rates) { u8 *pc_req; u8 *pc_resp; @@ -423,6 +424,10 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id, pc_req[1] = SMP_PHY_CONTROL; pc_req[9] = phy_id; pc_req[10]= phy_func; + if (rates) { + pc_req[32] = rates->minimum_linkrate << 4; + pc_req[33] = rates->maximum_linkrate << 4; + } res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE); @@ -436,7 +441,7 @@ static void sas_ex_disable_phy(struct domain_device *dev, int phy_id) struct expander_device *ex = &dev->ex_dev; struct ex_phy *phy = &ex->ex_phy[phy_id]; - sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE); + sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE, NULL); phy->linkrate = SAS_PHY_DISABLED; } @@ -731,7 +736,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id) /* Phy state */ if (ex_phy->linkrate == SAS_SATA_SPINUP_HOLD) { - if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET)) + if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET, NULL)) res = sas_ex_phy_discover(dev, phy_id); if (res) return res; @@ -1706,6 +1711,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id) SAS_ADDR(phy->attached_sas_addr)) { SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n", SAS_ADDR(dev->sas_addr), phy_id); + sas_ex_phy_discover(dev, phy_id); } else res = sas_discover_new(dev, phy_id); out: diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index b961664b8106..c836a237fb79 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -159,17 +159,57 @@ static int sas_phy_reset(struct sas_phy *phy, int hard_reset) struct sas_internal *i = to_sas_internal(sas_ha->core.shost->transportt); - ret = i->dft->lldd_control_phy(asd_phy, reset_type); + ret = i->dft->lldd_control_phy(asd_phy, reset_type, NULL); } else { struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent); struct domain_device *ddev = sas_find_dev_by_rphy(rphy); - ret = sas_smp_phy_control(ddev, phy->number, reset_type); + ret = sas_smp_phy_control(ddev, phy->number, reset_type, NULL); } return ret; } +static int sas_set_phy_speed(struct sas_phy *phy, + struct sas_phy_linkrates *rates) +{ + int ret; + + if ((rates->minimum_linkrate && + rates->minimum_linkrate > phy->maximum_linkrate) || + (rates->maximum_linkrate && + rates->maximum_linkrate < phy->minimum_linkrate)) + return -EINVAL; + + if (rates->minimum_linkrate && + rates->minimum_linkrate < phy->minimum_linkrate_hw) + rates->minimum_linkrate = phy->minimum_linkrate_hw; + + if (rates->maximum_linkrate && + rates->maximum_linkrate > phy->maximum_linkrate_hw) + rates->maximum_linkrate = phy->maximum_linkrate_hw; + + if (scsi_is_sas_phy_local(phy)) { + struct Scsi_Host *shost = dev_to_shost(phy->dev.parent); + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); + struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number]; + struct sas_internal *i = + to_sas_internal(sas_ha->core.shost->transportt); + + ret = i->dft->lldd_control_phy(asd_phy, PHY_FUNC_SET_LINK_RATE, + rates); + } else { + struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent); + struct domain_device *ddev = sas_find_dev_by_rphy(rphy); + ret = sas_smp_phy_control(ddev, phy->number, + PHY_FUNC_LINK_RESET, rates); + + } + + return ret; +} + static struct sas_function_template sft = { .phy_reset = sas_phy_reset, + .set_phy_speed = sas_set_phy_speed, .get_linkerrors = sas_get_linkerrors, }; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 0d69ede4b944..bffcee474921 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -70,7 +70,7 @@ int sas_notify_lldd_dev_found(struct domain_device *); void sas_notify_lldd_dev_gone(struct domain_device *); int sas_smp_phy_control(struct domain_device *dev, int phy_id, - enum phy_func phy_func); + enum phy_func phy_func, struct sas_phy_linkrates *); int sas_smp_get_phy_events(struct sas_phy *phy); struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy); diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c index 024ab00e70d2..9340cdbae4a3 100644 --- a/drivers/scsi/libsas/sas_phy.c +++ b/drivers/scsi/libsas/sas_phy.c @@ -67,13 +67,14 @@ static void sas_phye_oob_error(void *data) switch (phy->error) { case 1: case 2: - i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET); + i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET, + NULL); break; case 3: default: phy->error = 0; phy->enabled = 0; - i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE); + i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL); break; } } @@ -90,7 +91,7 @@ static void sas_phye_spinup_hold(void *data) &phy->phy_events_pending); phy->error = 0; - i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD); + i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD, NULL); } /* ---------- Phy class registration ---------- */ @@ -144,10 +145,10 @@ int sas_register_phys(struct sas_ha_struct *sas_ha) phy->phy->identify.target_port_protocols = phy->tproto; phy->phy->identify.sas_address = SAS_ADDR(sas_ha->sas_addr); phy->phy->identify.phy_identifier = i; - phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; - phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; - phy->phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; - phy->phy->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; + phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_UNKNOWN; + phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_UNKNOWN; + phy->phy->minimum_linkrate = SAS_LINK_RATE_UNKNOWN; + phy->phy->maximum_linkrate = SAS_LINK_RATE_UNKNOWN; phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; sas_phy_add(phy->phy); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 8d91313dd888..8e39982fc3db 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -586,7 +586,7 @@ struct sas_domain_function_template { int (*lldd_clear_nexus_ha)(struct sas_ha_struct *); /* Phy management */ - int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func); + int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func, void *); }; extern int sas_register_ha(struct sas_ha_struct *); diff --git a/include/scsi/sas.h b/include/scsi/sas.h index 9c8a5b91ae64..2f4b6afa34fc 100644 --- a/include/scsi/sas.h +++ b/include/scsi/sas.h @@ -121,6 +121,7 @@ enum phy_func { PHY_FUNC_CLEAR_AFFIL, PHY_FUNC_TX_SATA_PS_SIGNAL, PHY_FUNC_RELEASE_SPINUP_HOLD = 0x10, /* LOCAL PORT ONLY! */ + PHY_FUNC_SET_LINK_RATE, }; /* SAS LLDD would need to report only _very_few_ of those, like BROADCAST. From 2b7cbe20174695bca1afe2a8f755e1eb299f4768 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 7 Sep 2006 15:14:46 -0500 Subject: [PATCH 0173/1063] [SCSI] fix up SCSI netlink build CONFIG_SCSI_NETLINK can become a bool since the item its selecting (CONFIG_NET) cannot be a module. Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4d1998d23f0f..a6f920d218a0 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -28,7 +28,7 @@ config SCSI (the one containing the directory /) is located on a SCSI device. config SCSI_NETLINK - tristate + bool default n select NET From 08da3f413f6aa3eb48cfc5331c68e57393167fe5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 10 Sep 2006 21:09:26 -0400 Subject: [PATCH 0174/1063] [AGPGART] Add suspend callback for i965 Signed-off-by: Dave Jones --- drivers/char/agp/intel-agp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 42c7d8dec635..d1ede7db5a12 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -1924,6 +1924,8 @@ static int agp_intel_resume(struct pci_dev *pdev) intel_i830_configure(); else if (bridge->driver == &intel_810_driver) intel_i810_configure(); + else if (bridge->driver == &intel_i965_driver) + intel_i915_configure(); return 0; } From edf03fb0575cbee2595a63374b17dc0921f2094a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 10 Sep 2006 21:12:20 -0400 Subject: [PATCH 0175/1063] [AGPGART] Rework AGPv3 modesetting fallback. Sometimes the logic to handle AGPx8->AGPx4 fallback failed, as can be seen in https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=197346 The failures occured if the bridge was in AGPx8 mode, but the user hadn't specified a mode in their X config. We weren't setting the mode to the highest mode capable by the video card+bridge (as we do in the AGPv2 case), which was leading to all kinds of mayhem including us believing that after falling back from AGPx8, that we couldn't do x4 mode (which is disastrous in AGPv3, as those are the only two modes possible). Signed-off-by: Dave Jones --- drivers/char/agp/generic.c | 39 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index cc5ea347a8a7..0dcdb363923f 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -568,25 +568,34 @@ static void agp_v3_parse_one(u32 *requested_mode, u32 *bridge_agpstat, u32 *vga_ *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD); goto done; + } else if (*requested_mode & AGPSTAT3_4X) { + *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD); + *bridge_agpstat |= AGPSTAT3_4X; + goto done; + } else { /* - * If we didn't specify AGPx8, we can only do x4. - * If the hardware can't do x4, we're up shit creek, and never - * should have got this far. + * If we didn't specify an AGP mode, we see if both + * the graphics card, and the bridge can do x8, and use if so. + * If not, we fall back to x4 mode. */ - *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD); - if ((*bridge_agpstat & AGPSTAT3_4X) && (*vga_agpstat & AGPSTAT3_4X)) - *bridge_agpstat |= AGPSTAT3_4X; - else { - printk(KERN_INFO PFX "Badness. Don't know which AGP mode to set. " - "[bridge_agpstat:%x vga_agpstat:%x fell back to:- bridge_agpstat:%x vga_agpstat:%x]\n", - origbridge, origvga, *bridge_agpstat, *vga_agpstat); - if (!(*bridge_agpstat & AGPSTAT3_4X)) - printk(KERN_INFO PFX "Bridge couldn't do AGP x4.\n"); - if (!(*vga_agpstat & AGPSTAT3_4X)) - printk(KERN_INFO PFX "Graphic card couldn't do AGP x4.\n"); - return; + if ((*bridge_agpstat & AGPSTAT3_8X) && (*vga_agpstat & AGPSTAT3_8X)) { + printk(KERN_INFO PFX "No AGP mode specified. Setting to highest mode supported by bridge & card (x8).\n"); + *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD); + *vga_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD); + } else { + printk(KERN_INFO PFX "Fell back to AGPx4 mode because"); + if (!(*bridge_agpstat & AGPSTAT3_8X)) { + printk("bridge couldn't do x8. bridge_agpstat:%x (orig=%x)\n", *bridge_agpstat, origbridge); + *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD); + *bridge_agpstat |= AGPSTAT3_4X; + } + if (!(*vga_agpstat & AGPSTAT3_8X)) { + printk("graphics card couldn't do x8. vga_agpstat:%x (orig=%x)\n", *vga_agpstat, origvga); + *vga_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD); + *vga_agpstat |= AGPSTAT3_4X; + } } } From a506b44bb5000b2652490a906c3e58beb2a8f6bb Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Sat, 9 Sep 2006 09:31:03 -0700 Subject: [PATCH 0176/1063] [SCSI] fix compile error on module_refcount LD .tmp_vmlinux1 drivers/built-in.o(.text+0x8e1f9): In function `scsi_device_put': drivers/scsi/scsi.c:887: undefined reference to `module_refcount' make: *** [.tmp_vmlinux1] Error 1 There are only two users of module_refcount() outside of kernel/module.c and the other one uses ifdef's similar to this. Signed-Off-By: Daniel Walker Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index c35f5fc0d668..c51b5769eac8 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -875,10 +875,12 @@ void scsi_device_put(struct scsi_device *sdev) { struct module *module = sdev->host->hostt->module; +#ifdef CONFIG_MODULE_UNLOAD /* The module refcount will be zero if scsi_device_get() * was called from a module removal routine */ if (module && module_refcount(module) != 0) module_put(module); +#endif put_device(&sdev->sdev_gendev); } EXPORT_SYMBOL(scsi_device_put); From 65396410af63db90d6428c678ff84aa652c3c1ec Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Tue, 12 Sep 2006 23:49:33 +0200 Subject: [PATCH 0177/1063] [SCSI] wd33c93: Scsi_Cmnd convertion Changes obsolete typedef'd Scsi_Cmnd to struct scsi_cmnd. Signed-off-by: Henrik Kretzschmar Signed-off-by: James Bottomley --- drivers/scsi/a2091.c | 6 +++--- drivers/scsi/a2091.h | 4 ---- drivers/scsi/a3000.c | 8 ++++---- drivers/scsi/a3000.h | 4 ---- drivers/scsi/gvp11.c | 8 ++++---- drivers/scsi/gvp11.h | 4 ---- drivers/scsi/mvme147.c | 6 +++--- drivers/scsi/mvme147.h | 4 ---- drivers/scsi/sgiwd93.c | 8 ++++---- 9 files changed, 18 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c index fddfa2ebcd70..085406928605 100644 --- a/drivers/scsi/a2091.c +++ b/drivers/scsi/a2091.c @@ -40,7 +40,7 @@ static irqreturn_t a2091_intr (int irq, void *_instance, struct pt_regs *fp) return IRQ_HANDLED; } -static int dma_setup (Scsi_Cmnd *cmd, int dir_in) +static int dma_setup(struct scsi_cmnd *cmd, int dir_in) { unsigned short cntr = CNTR_PDMD | CNTR_INTEN; unsigned long addr = virt_to_bus(cmd->SCp.ptr); @@ -115,7 +115,7 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in) return 0; } -static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, +static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt, int status) { /* disable SCSI interrupts */ @@ -217,7 +217,7 @@ int __init a2091_detect(struct scsi_host_template *tpnt) return num_a2091; } -static int a2091_bus_reset(Scsi_Cmnd *cmd) +static int a2091_bus_reset(struct scsi_cmnd *cmd) { /* FIXME perform bus-specific reset */ diff --git a/drivers/scsi/a2091.h b/drivers/scsi/a2091.h index 22d6a13dd8be..fe809bc88d73 100644 --- a/drivers/scsi/a2091.h +++ b/drivers/scsi/a2091.h @@ -13,10 +13,6 @@ int a2091_detect(struct scsi_host_template *); int a2091_release(struct Scsi_Host *); -const char *wd33c93_info(void); -int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int wd33c93_abort(Scsi_Cmnd *); -int wd33c93_reset(Scsi_Cmnd *, unsigned int); #ifndef CMD_PER_LUN #define CMD_PER_LUN 2 diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c index ae9ab4b136ac..7bf46d40b561 100644 --- a/drivers/scsi/a3000.c +++ b/drivers/scsi/a3000.c @@ -44,7 +44,7 @@ static irqreturn_t a3000_intr (int irq, void *dummy, struct pt_regs *fp) return IRQ_NONE; } -static int dma_setup (Scsi_Cmnd *cmd, int dir_in) +static int dma_setup(struct scsi_cmnd *cmd, int dir_in) { unsigned short cntr = CNTR_PDMD | CNTR_INTEN; unsigned long addr = virt_to_bus(cmd->SCp.ptr); @@ -110,8 +110,8 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in) return 0; } -static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, - int status) +static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt, + int status) { /* disable SCSI interrupts */ unsigned short cntr = CNTR_PDMD; @@ -205,7 +205,7 @@ fail_register: return 0; } -static int a3000_bus_reset(Scsi_Cmnd *cmd) +static int a3000_bus_reset(struct scsi_cmnd *cmd) { /* FIXME perform bus-specific reset */ diff --git a/drivers/scsi/a3000.h b/drivers/scsi/a3000.h index 5535a65150a4..44a4ec7b4650 100644 --- a/drivers/scsi/a3000.h +++ b/drivers/scsi/a3000.h @@ -13,10 +13,6 @@ int a3000_detect(struct scsi_host_template *); int a3000_release(struct Scsi_Host *); -const char *wd33c93_info(void); -int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int wd33c93_abort(Scsi_Cmnd *); -int wd33c93_reset(Scsi_Cmnd *, unsigned int); #ifndef CMD_PER_LUN #define CMD_PER_LUN 2 diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c index a0d831b1bada..18dbe5c27dac 100644 --- a/drivers/scsi/gvp11.c +++ b/drivers/scsi/gvp11.c @@ -47,7 +47,7 @@ void gvp11_setup (char *str, int *ints) gvp11_xfer_mask = ints[1]; } -static int dma_setup (Scsi_Cmnd *cmd, int dir_in) +static int dma_setup(struct scsi_cmnd *cmd, int dir_in) { unsigned short cntr = GVP11_DMAC_INT_ENABLE; unsigned long addr = virt_to_bus(cmd->SCp.ptr); @@ -142,8 +142,8 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in) return 0; } -static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, - int status) +static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt, + int status) { /* stop DMA */ DMA(instance)->SP_DMA = 1; @@ -341,7 +341,7 @@ release: return num_gvp11; } -static int gvp11_bus_reset(Scsi_Cmnd *cmd) +static int gvp11_bus_reset(struct scsi_cmnd *cmd) { /* FIXME perform bus-specific reset */ diff --git a/drivers/scsi/gvp11.h b/drivers/scsi/gvp11.h index 575d219d14ba..bf22859a5035 100644 --- a/drivers/scsi/gvp11.h +++ b/drivers/scsi/gvp11.h @@ -13,10 +13,6 @@ int gvp11_detect(struct scsi_host_template *); int gvp11_release(struct Scsi_Host *); -const char *wd33c93_info(void); -int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int wd33c93_abort(Scsi_Cmnd *); -int wd33c93_reset(Scsi_Cmnd *, unsigned int); #ifndef CMD_PER_LUN #define CMD_PER_LUN 2 diff --git a/drivers/scsi/mvme147.c b/drivers/scsi/mvme147.c index cb367c2c5c78..9b991b746d1e 100644 --- a/drivers/scsi/mvme147.c +++ b/drivers/scsi/mvme147.c @@ -29,7 +29,7 @@ static irqreturn_t mvme147_intr (int irq, void *dummy, struct pt_regs *fp) return IRQ_HANDLED; } -static int dma_setup (Scsi_Cmnd *cmd, int dir_in) +static int dma_setup(struct scsi_cmnd *cmd, int dir_in) { unsigned char flags = 0x01; unsigned long addr = virt_to_bus(cmd->SCp.ptr); @@ -57,7 +57,7 @@ static int dma_setup (Scsi_Cmnd *cmd, int dir_in) return 0; } -static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, +static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt, int status) { m147_pcc->dma_cntrl = 0; @@ -112,7 +112,7 @@ int mvme147_detect(struct scsi_host_template *tpnt) return 0; } -static int mvme147_bus_reset(Scsi_Cmnd *cmd) +static int mvme147_bus_reset(struct scsi_cmnd *cmd) { /* FIXME perform bus-specific reset */ diff --git a/drivers/scsi/mvme147.h b/drivers/scsi/mvme147.h index 2f56d69bd180..32aee85434d8 100644 --- a/drivers/scsi/mvme147.h +++ b/drivers/scsi/mvme147.h @@ -12,10 +12,6 @@ int mvme147_detect(struct scsi_host_template *); int mvme147_release(struct Scsi_Host *); -const char *wd33c93_info(void); -int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); -int wd33c93_abort(Scsi_Cmnd *); -int wd33c93_reset(Scsi_Cmnd *, unsigned int); #ifndef CMD_PER_LUN #define CMD_PER_LUN 2 diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c index 7cd366fcc571..4f1db6f2aae8 100644 --- a/drivers/scsi/sgiwd93.c +++ b/drivers/scsi/sgiwd93.c @@ -97,7 +97,7 @@ static irqreturn_t sgiwd93_intr(int irq, void *dev_id, struct pt_regs *regs) } static inline -void fill_hpc_entries(struct hpc_chunk *hcp, Scsi_Cmnd *cmd, int datainp) +void fill_hpc_entries(struct hpc_chunk *hcp, struct scsi_cmnd *cmd, int datainp) { unsigned long len = cmd->SCp.this_residual; void *addr = cmd->SCp.ptr; @@ -129,7 +129,7 @@ void fill_hpc_entries(struct hpc_chunk *hcp, Scsi_Cmnd *cmd, int datainp) hcp->desc.cntinfo = HPCDMA_EOX; } -static int dma_setup(Scsi_Cmnd *cmd, int datainp) +static int dma_setup(struct scsi_cmnd *cmd, int datainp) { struct ip22_hostdata *hdata = HDATA(cmd->device->host); struct hpc3_scsiregs *hregs = @@ -163,7 +163,7 @@ static int dma_setup(Scsi_Cmnd *cmd, int datainp) return 0; } -static void dma_stop(struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, +static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt, int status) { struct ip22_hostdata *hdata = HDATA(instance); @@ -305,7 +305,7 @@ static int sgiwd93_release(struct Scsi_Host *instance) return 1; } -static int sgiwd93_bus_reset(Scsi_Cmnd *cmd) +static int sgiwd93_bus_reset(struct scsi_cmnd *cmd) { /* FIXME perform bus-specific reset */ From f50d4cfc98d70f919afb2924b1b53c36b2f4e62f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2006 16:54:08 +1000 Subject: [PATCH 0178/1063] [POWERPC] Split out vpa unregister logic from pseries_kexec_cpu_down_xics() As part of the new irq code pseries_kexec_cpu_down() was split into a xics and mpic version. The vpa unregister logic is now only done in the xics routine, and although that's ok in practice (we don't have SPLPAR machines with mpic), I'd rather have the two concepts stay separate. So move the vpa unregister into pseries_kexec_cpu_down(), which gets called by both the xics and mpic routines. This also gives us an obvious place to put any new kexec-down logic needed in future. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/setup.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 1587efc51057..a6398fbe530d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -223,12 +223,7 @@ static void pseries_lpar_enable_pmcs(void) } #ifdef CONFIG_KEXEC -static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary) -{ - mpic_teardown_this_cpu(secondary); -} - -static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) +static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) { /* Don't risk a hypervisor call if we're crashing */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { @@ -248,6 +243,17 @@ static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) hard_smp_processor_id()); } } +} + +static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary) +{ + pseries_kexec_cpu_down(crash_shutdown, secondary); + mpic_teardown_this_cpu(secondary); +} + +static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) +{ + pseries_kexec_cpu_down(crash_shutdown, secondary); xics_teardown_cpu(secondary); } #endif /* CONFIG_KEXEC */ From c3412dcb75ff4d64b44bedc72761d5707d19edf7 Mon Sep 17 00:00:00 2001 From: Will Schmidt Date: Wed, 30 Aug 2006 13:11:38 -0500 Subject: [PATCH 0179/1063] [POWERPC] Emulate power5 popcntb instruction In an attempt to make it easier for a power5 optimized app to run on a power4 or a 970 or random earlier machine, this provides emulation of the popcntb instruction. Signed-off-by: Will Schmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/traps.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9b352bd0a460..d9f10f2fc372 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -598,6 +598,9 @@ static void parse_fpe(struct pt_regs *regs) #define INST_STSWI 0x7c0005aa #define INST_STSWX 0x7c00052a +#define INST_POPCNTB 0x7c0000f4 +#define INST_POPCNTB_MASK 0xfc0007fe + static int emulate_string_inst(struct pt_regs *regs, u32 instword) { u8 rT = (instword >> 21) & 0x1f; @@ -666,6 +669,23 @@ static int emulate_string_inst(struct pt_regs *regs, u32 instword) return 0; } +static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword) +{ + u32 ra,rs; + unsigned long tmp; + + ra = (instword >> 16) & 0x1f; + rs = (instword >> 21) & 0x1f; + + tmp = regs->gpr[rs]; + tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL); + tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL); + tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + regs->gpr[ra] = tmp; + + return 0; +} + static int emulate_instruction(struct pt_regs *regs) { u32 instword; @@ -703,6 +723,11 @@ static int emulate_instruction(struct pt_regs *regs) if ((instword & INST_STRING_GEN_MASK) == INST_STRING) return emulate_string_inst(regs, instword); + /* Emulate the popcntb (Population Count Bytes) instruction. */ + if ((instword & INST_POPCNTB_MASK) == INST_POPCNTB) { + return emulate_popcntb_inst(regs, instword); + } + return -EINVAL; } From 477bcae4c289a60f2303fbd4a3a875dcca647cf8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 6 Sep 2006 09:02:53 -0500 Subject: [PATCH 0180/1063] [POWERPC] Make function of pm_power_off consistent with x86 Allow the pm_power_off function variable in PPC to work as an override. This makes the function consistent with the other architectures and it allows generic poweroff operations (like those provided in IPMI systems) to work properly on PPC. Signed-off-by: Corey Minyard Cc: Joseph Barnett Signed-off-by: Paul Mackerras --- arch/ppc/kernel/setup.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index a74f46d9826f..5458ac5da7c3 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -127,11 +127,8 @@ void machine_restart(char *cmd) ppc_md.restart(cmd); } -void machine_power_off(void) +static void ppc_generic_power_off(void) { -#ifdef CONFIG_NVRAM - nvram_sync(); -#endif ppc_md.power_off(); } @@ -143,7 +140,17 @@ void machine_halt(void) ppc_md.halt(); } -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void) = ppc_generic_power_off; + +void machine_power_off(void) +{ +#ifdef CONFIG_NVRAM + nvram_sync(); +#endif + if (pm_power_off) + pm_power_off(); + ppc_generic_power_off(); +} #ifdef CONFIG_TAU extern u32 cpu_temp(unsigned long cpu); From b7e89214aadf82fa5eaff28f50f2078fa6ae773c Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 7 Sep 2006 13:27:58 -0500 Subject: [PATCH 0181/1063] [POWERPC] PPC 4xx: Enable XMON on PPC 4xx boards The following patch allows XMON to run on the 4xx platform. Tested on Walnut, Ebony, and Nova (440GX based) eval boards. 440EP, 440SP, and 440SPE boards should work as well. Patch is against 2.6.18-rc6. Signed-off-by: Josh Boyer Signed-off-by: Paul Mackerras --- arch/ppc/xmon/start.c | 28 ++++++++++++++++++---------- arch/ppc/xmon/xmon.c | 26 ++++++++++++++++++++------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/arch/ppc/xmon/start.c b/arch/ppc/xmon/start.c index f7e92986952a..d74a883e5bde 100644 --- a/arch/ppc/xmon/start.c +++ b/arch/ppc/xmon/start.c @@ -15,6 +15,7 @@ #include #include #include +#include static volatile unsigned char *sccc, *sccd; unsigned int TXRDY, RXRDY, DLAB; @@ -57,23 +58,30 @@ static struct sysrq_key_op sysrq_xmon_op = void xmon_map_scc(void) { -#ifdef CONFIG_PPC_PREP - volatile unsigned char *base; - -#elif defined(CONFIG_GEMINI) +#if defined(CONFIG_GEMINI) /* should already be mapped by the kernel boot */ - sccc = (volatile unsigned char *) 0xffeffb0d; sccd = (volatile unsigned char *) 0xffeffb08; - TXRDY = 0x20; - RXRDY = 1; - DLAB = 0x80; #elif defined(CONFIG_405GP) - sccc = (volatile unsigned char *)0xef600305; sccd = (volatile unsigned char *)0xef600300; +#elif defined(CONFIG_440EP) + sccd = (volatile unsigned char *) ioremap(PPC440EP_UART0_ADDR, 8); +#elif defined(CONFIG_440SP) + sccd = (volatile unsigned char *) ioremap64(PPC440SP_UART0_ADDR, 8); +#elif defined(CONFIG_440SPE) + sccd = (volatile unsigned char *) ioremap64(PPC440SPE_UART0_ADDR, 8); +#elif defined(CONFIG_44x) + /* This is the default for 44x platforms. Any boards that have a + different UART address need to be put in cases before this or the + port will be mapped incorrectly */ + sccd = (volatile unsigned char *) ioremap64(PPC440GP_UART0_ADDR, 8); +#endif /* platform */ + +#ifndef CONFIG_PPC_PREP + sccc = sccd + 5; TXRDY = 0x20; RXRDY = 1; DLAB = 0x80; -#endif /* platform */ +#endif register_sysrq_key('x', &sysrq_xmon_op); } diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c index 37d234f93394..25d032b2aec7 100644 --- a/arch/ppc/xmon/xmon.c +++ b/arch/ppc/xmon/xmon.c @@ -153,6 +153,12 @@ static int xmon_trace[NR_CPUS]; #define SSTEP 1 /* stepping because of 's' command */ #define BRSTEP 2 /* stepping over breakpoint */ +#ifdef CONFIG_4xx +#define MSR_SSTEP_ENABLE 0x200 +#else +#define MSR_SSTEP_ENABLE 0x400 +#endif + static struct pt_regs *xmon_regs[NR_CPUS]; extern inline void sync(void) @@ -211,6 +217,14 @@ static void get_tb(unsigned *p) p[1] = lo; } +static inline void xmon_enable_sstep(struct pt_regs *regs) +{ + regs->msr |= MSR_SSTEP_ENABLE; +#ifdef CONFIG_4xx + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#endif +} + int xmon(struct pt_regs *excp) { struct pt_regs regs; @@ -254,10 +268,10 @@ int xmon(struct pt_regs *excp) cmd = cmds(excp); if (cmd == 's') { xmon_trace[smp_processor_id()] = SSTEP; - excp->msr |= 0x400; + xmon_enable_sstep(excp); } else if (at_breakpoint(excp->nip)) { xmon_trace[smp_processor_id()] = BRSTEP; - excp->msr |= 0x400; + xmon_enable_sstep(excp); } else { xmon_trace[smp_processor_id()] = 0; insert_bpts(); @@ -298,7 +312,7 @@ xmon_bpt(struct pt_regs *regs) remove_bpts(); excprint(regs); xmon_trace[smp_processor_id()] = BRSTEP; - regs->msr |= 0x400; + xmon_enable_sstep(regs); } else { xmon(regs); } @@ -385,7 +399,7 @@ insert_bpts(void) } store_inst((void *) bp->address); } -#if !defined(CONFIG_8xx) +#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx)) if (dabr.enabled) set_dabr(dabr.address); if (iabr.enabled) @@ -400,7 +414,7 @@ remove_bpts(void) struct bpt *bp; unsigned instr; -#if !defined(CONFIG_8xx) +#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx)) set_dabr(0); set_iabr(0); #endif @@ -677,7 +691,7 @@ bpt_cmds(void) cmd = inchar(); switch (cmd) { -#if !defined(CONFIG_8xx) +#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx)) case 'd': mode = 7; cmd = inchar(); From 5a2fe38d2844ba2f2dd8f4946d795e09d8f7e095 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 6 Sep 2006 14:34:41 -0500 Subject: [PATCH 0182/1063] [POWERPC] powerpc: Reduce default cacheline size to 64 bytes Reduce default cacheline size on 64-bit powerpc from 128 bytes to 64. This is the architected minimum. In most cases we'll still end up using cache line information from the device tree, but defaults are used during early boot and doing a few dcbst/icbi's too many there won't do any harm. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/setup_64.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index e9963d9f335a..3065b472b95d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -1748,7 +1748,7 @@ _STATIC(__after_prom_start) _GLOBAL(copy_and_flush) addi r5,r5,-8 addi r6,r6,-8 -4: li r0,16 /* Use the least common */ +4: li r0,8 /* Use the smallest common */ /* denominator cache line */ /* size. This results in */ /* extra cache line flushes */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 77efe19ccd2c..00d6b8addd78 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -78,10 +78,10 @@ u64 ppc64_pft_size; * before we've read this from the device tree. */ struct ppc64_caches ppc64_caches = { - .dline_size = 0x80, - .log_dline_size = 7, - .iline_size = 0x80, - .log_iline_size = 7 + .dline_size = 0x40, + .log_dline_size = 6, + .iline_size = 0x40, + .log_iline_size = 6 }; EXPORT_SYMBOL_GPL(ppc64_caches); From 0024300000769eadcb4a4fcdff531d45ee7735d4 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 6 Sep 2006 14:35:19 -0500 Subject: [PATCH 0183/1063] [POWERPC] powerpc: Divorce CPU_FTR_CTRL from CPU_FTR_PPCAS_ARCH_V2_BASE The performance monitor implementation (including CTRL register behaviour) is just included in PPC v2 as an example, it's not truly part of the base. It's actually a somewhat misleading feature, but I'll leave that be for now: The presence of the register is not what the feature bit is used for, but instead it's used to determine if it contains the runlatch bit for idle reporting of the performance monitor. For alternative implementations, the register might still exist but the bit might have different meaning (or no meaning at all). For now, split it off and don't include it in CPU_FTR_PPCAS_ARCH_V2_BASE. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- include/asm-powerpc/cputable.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 748bc1805da9..3608259c49cf 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -148,7 +148,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \ CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_CTRL) + CPU_FTR_NODSISRALIGN) /* iSeries doesn't support large pages */ #ifdef CONFIG_PPC_ISERIES @@ -313,24 +313,25 @@ extern void do_cpu_ftr_fixups(unsigned long offset); CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \ CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA) + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_MMCRA) #define CPU_FTRS_PPC970 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA) #define CPU_FTRS_POWER5 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR) #define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_PURR | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_REAL_LE) #define CPU_FTRS_CELL (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ - CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ - CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE) + CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE) #define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2) #endif From b3ebd1d862d6c23caa58e40d341eefc426f835e1 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 6 Sep 2006 14:35:57 -0500 Subject: [PATCH 0184/1063] [POWERPC] powerpc: PA6T cputable entry, PVR value Introduce PWRficient PA6T cputable entries and feature bits. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/cputable.c | 14 ++++++++++++++ include/asm-powerpc/cputable.h | 9 +++++++-- include/asm-powerpc/reg.h | 1 + 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 306da4cd37a0..db65c9f6559a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -58,6 +58,9 @@ extern void __restore_cpu_ppc970(void); #define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ PPC_FEATURE_TRUE_LE) +#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_HAS_ALTIVEC_COMP) #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) @@ -286,6 +289,17 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 128, .platform = "ppc-cell-be", }, + { /* PA Semi PA6T */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00900000, + .cpu_name = "PA6T", + .cpu_features = CPU_FTRS_PA6T, + .cpu_user_features = COMMON_USER_PA6T, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .platform = "pa6t", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 3608259c49cf..12707ab9dc98 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -23,6 +23,7 @@ #define PPC_FEATURE_SMT 0x00004000 #define PPC_FEATURE_ICACHE_SNOOP 0x00002000 #define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_PA6T 0x00000800 #define PPC_FEATURE_TRUE_LE 0x00000002 #define PPC_FEATURE_PPC_LE 0x00000001 @@ -332,6 +333,10 @@ extern void do_cpu_ftr_fixups(unsigned long offset); CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE) +#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \ + CPU_FTR_PURR | CPU_FTR_REAL_LE) #define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2) #endif @@ -340,7 +345,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ - CPU_FTRS_CELL | CPU_FTR_CI_LARGE_PAGE) + CPU_FTRS_CELL | CPU_FTRS_PA6T) #else enum { CPU_FTRS_POSSIBLE = @@ -379,7 +384,7 @@ enum { #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \ - CPU_FTRS_CELL & CPU_FTRS_POSSIBLE) + CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) #else enum { CPU_FTRS_ALWAYS = diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h index cf73475a0c69..3a9fcc15811b 100644 --- a/include/asm-powerpc/reg.h +++ b/include/asm-powerpc/reg.h @@ -592,6 +592,7 @@ #define PV_630p 0x0041 #define PV_970MP 0x0044 #define PV_BE 0x0070 +#define PV_PA6T 0x0090 /* * Number of entries in the SLB. If this ever changes we should handle From 1e76875e51266a5c43f601ecf08a92be5769228c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 6 Sep 2006 14:42:08 -0500 Subject: [PATCH 0185/1063] [POWERPC] powerpc: PA Semi PWRficient platform support Base patch for PA6T and PA6T-1682M. This introduces the arch/powerpc/platform/pasemi directory, together with basic implementations for various setup. Much of this was based on other platform code, i.e. Maple, etc. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 11 ++ arch/powerpc/platforms/Makefile | 1 + arch/powerpc/platforms/pasemi/Makefile | 1 + arch/powerpc/platforms/pasemi/pasemi.h | 8 + arch/powerpc/platforms/pasemi/pci.c | 198 +++++++++++++++++++++++++ arch/powerpc/platforms/pasemi/setup.c | 188 +++++++++++++++++++++++ arch/powerpc/platforms/pasemi/time.c | 29 ++++ 7 files changed, 436 insertions(+) create mode 100644 arch/powerpc/platforms/pasemi/Makefile create mode 100644 arch/powerpc/platforms/pasemi/pasemi.h create mode 100644 arch/powerpc/platforms/pasemi/pci.c create mode 100644 arch/powerpc/platforms/pasemi/setup.c create mode 100644 arch/powerpc/platforms/pasemi/time.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 904798fd4e74..c9dcec7f3c61 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -413,6 +413,17 @@ config PPC_MAPLE This option enables support for the Maple 970FX Evaluation Board. For more informations, refer to +config PPC_PASEMI + depends on PPC_MULTIPLATFORM && PPC64 + bool "PA Semi SoC-based platforms" + default n + select MPIC + select PPC_UDBG_16550 + select GENERIC_TBSYNC + help + This option enables support for PA Semi's PWRficient line + of SoC processors, including PA6T-1682M + config PPC_CELL bool default n diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 5cf46dc57895..e58fa953a50b 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -13,5 +13,6 @@ obj-$(CONFIG_PPC_86xx) += 86xx/ obj-$(CONFIG_PPC_PSERIES) += pseries/ obj-$(CONFIG_PPC_ISERIES) += iseries/ obj-$(CONFIG_PPC_MAPLE) += maple/ +obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile new file mode 100644 index 000000000000..1be1a993c5f5 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -0,0 +1 @@ +obj-y += setup.o pci.o time.o diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h new file mode 100644 index 000000000000..fd71d72736b2 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -0,0 +1,8 @@ +#ifndef _PASEMI_PASEMI_H +#define _PASEMI_PASEMI_H + +extern unsigned long pas_get_boot_time(void); +extern void pas_pci_init(void); +extern void pas_pcibios_fixup(void); + +#endif /* _PASEMI_PASEMI_H */ diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c new file mode 100644 index 000000000000..4679c5230413 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2006 PA Semi, Inc + * + * Authors: Kip Walker, PA Semi + * Olof Johansson, PA Semi + * + * Maintained by: Olof Johansson + * + * Based on arch/powerpc/platforms/maple/pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include +#include + +#include +#include + +#include + +#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off)) + +#define CONFIG_OFFSET_VALID(off) ((off) < 4096) + +static unsigned long pa_pxp_cfg_addr(struct pci_controller *hose, + u8 bus, u8 devfn, int offset) +{ + return ((unsigned long)hose->cfg_data) + PA_PXP_CFA(bus, devfn, offset); +} + +static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (!hose) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!CONFIG_OFFSET_VALID(offset)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (!hose) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!CONFIG_OFFSET_VALID(offset)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops pa_pxp_ops = { + pa_pxp_read_config, + pa_pxp_write_config, +}; + +static void __init setup_pa_pxp(struct pci_controller *hose) +{ + hose->ops = &pa_pxp_ops; + hose->cfg_data = ioremap(0xe0000000, 0x10000000); +} + +static int __init add_bridge(struct device_node *dev) +{ + struct pci_controller *hose; + + pr_debug("Adding PCI host bridge %s\n", dev->full_name); + + hose = pcibios_alloc_controller(dev); + if (!hose) + return -ENOMEM; + + hose->first_busno = 0; + hose->last_busno = 0xff; + + setup_pa_pxp(hose); + + printk(KERN_INFO "Found PA-PXP PCI host bridge.\n"); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, 1); + pci_setup_phb_io(hose, 1); + + return 0; +} + + +void __init pas_pcibios_fixup(void) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) + pci_read_irq_line(dev); +} + +static void __init pas_fixup_phb_resources(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + + +void __init pas_pci_init(void) +{ + struct device_node *np, *root; + + root = of_find_node_by_path("/"); + if (!root) { + printk(KERN_CRIT "pas_pci_init: can't find root " + "of device tree\n"); + return; + } + + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) + if (np->name && !strcmp(np->name, "pxp") && !add_bridge(np)) + of_node_get(np); + + of_node_put(root); + + pas_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + /* Use the common resource allocation mechanism */ + pci_probe_only = 1; +} diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c new file mode 100644 index 000000000000..628482671c15 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2006 PA Semi, Inc + * + * Authors: Kip Walker, PA Semi + * Olof Johansson, PA Semi + * + * Maintained by: Olof Johansson + * + * Based on arch/powerpc/platforms/maple/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pasemi.h" + +static void pas_restart(char *cmd) +{ + printk("restart unimplemented, looping...\n"); + for (;;) ; +} + +static void pas_power_off(void) +{ + printk("power off unimplemented, looping...\n"); + for (;;) ; +} + +static void pas_halt(void) +{ + pas_power_off(); +} + +#ifdef CONFIG_SMP +struct smp_ops_t pas_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .kick_cpu = smp_generic_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; +#endif /* CONFIG_SMP */ + +void __init pas_setup_arch(void) +{ +#ifdef CONFIG_SMP + /* Setup SMP callback */ + smp_ops = &pas_smp_ops; +#endif + /* Lookup PCI hosts */ + pas_pci_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + printk(KERN_DEBUG "Using default idle loop\n"); +} + +static void iommu_dev_setup_null(struct pci_dev *dev) { } +static void iommu_bus_setup_null(struct pci_bus *bus) { } + +static void __init pas_init_early(void) +{ + /* No iommu code yet */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + pci_direct_iommu_init(); +} + +/* No legacy IO on our parts */ +static int pas_check_legacy_ioport(unsigned int baseport) +{ + return -ENODEV; +} + +static __init void pas_init_IRQ(void) +{ + struct device_node *np; + struct device_node *root, *mpic_node; + unsigned long openpic_addr; + const unsigned int *opprop; + int naddr, opplen; + struct mpic *mpic; + + mpic_node = NULL; + + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "open-pic")) { + mpic_node = np; + break; + } + if (!mpic_node) + for_each_node_by_type(np, "open-pic") { + mpic_node = np; + break; + } + if (!mpic_node) { + printk(KERN_ERR + "Failed to locate the MPIC interrupt controller\n"); + return; + } + + /* Find address list in /platform-open-pic */ + root = of_find_node_by_path("/"); + naddr = prom_n_addr_cells(root); + opprop = get_property(root, "platform-open-pic", &opplen); + if (!opprop) { + printk(KERN_ERR "No platform-open-pic property.\n"); + of_node_put(root); + return; + } + openpic_addr = of_read_number(opprop, naddr); + printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); + of_node_put(root); + + mpic = mpic_alloc(mpic_node, openpic_addr, MPIC_PRIMARY, 0, 0, + " PAS-OPIC "); + BUG_ON(!mpic); + + mpic_assign_isu(mpic, 0, openpic_addr + 0x10000); + mpic_init(mpic); + of_node_put(mpic_node); + of_node_put(root); +} + +static void __init pas_progress(char *s, unsigned short hex) +{ + printk("[%04x] : %s\n", hex, s ? s : ""); +} + + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init pas_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "PA6T-1682M")) + return 0; + + hpte_init_native(); + + return 1; +} + +define_machine(pas) { + .name = "PA Semi PA6T-1682M", + .probe = pas_probe, + .setup_arch = pas_setup_arch, + .init_early = pas_init_early, + .init_IRQ = pas_init_IRQ, + .get_irq = mpic_get_irq, + .pcibios_fixup = pas_pcibios_fixup, + .restart = pas_restart, + .power_off = pas_power_off, + .halt = pas_halt, + .get_boot_time = pas_get_boot_time, + .calibrate_decr = generic_calibrate_decr, + .check_legacy_ioport = pas_check_legacy_ioport, + .progress = pas_progress, +}; diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c new file mode 100644 index 000000000000..9bd410b8fec6 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/time.c @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2006 PA Semi, Inc + * + * Maintained by: Olof Johansson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include + +unsigned long __init pas_get_boot_time(void) +{ + /* Let's just return a fake date right now */ + return mktime(2006, 1, 1, 12, 0, 0); +} From ab06ff3af34a6288b314862abfebd86ad918c5d9 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 6 Sep 2006 14:44:54 -0500 Subject: [PATCH 0186/1063] [POWERPC] powerpc: PA Semi PWRficient MAINTAINER entry Maintainer entry for PWRficient Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 25cd7073a20b..7e86286ddf1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1783,6 +1783,13 @@ W: http://www.penguinppc.org/ L: linuxppc-embedded@ozlabs.org S: Maintained +LINUX FOR POWERPC PA SEMI PWRFICIENT +P: Olof Johansson +M: olof@lixom.net +W: http://www.pasemi.com/ +L: linuxppc-dev@ozlabs.org +S: Supported + LLC (802.2) P: Arnaldo Carvalho de Melo M: acme@conectiva.com.br From 57852a853b0d6761f270be0961d5d8387e98c8bb Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2006 16:23:12 -0700 Subject: [PATCH 0187/1063] [POWERPC] powerpc: Instrument Hypervisor Calls Add instrumentation for hypervisor calls on pseries. Call statistics include number of calls, wall time and cpu cycles (if available) and are made available via debugfs. Instrumentation code is behind the HCALL_STATS config option and has no impact if not enabled. Signed-off-by: Mike Kravetz Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig.debug | 14 ++ arch/powerpc/kernel/asm-offsets.c | 7 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/hvCall.S | 72 +++++++++++ arch/powerpc/platforms/pseries/hvCall_inst.c | 129 +++++++++++++++++++ include/asm-powerpc/hvcall.h | 12 +- 6 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/pseries/hvCall_inst.c diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index e29ef77d3b00..d7b2aedd89aa 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -18,6 +18,20 @@ config DEBUG_STACK_USAGE This option will slow down process creation somewhat. +config HCALL_STATS + bool "Hypervisor call instrumentation" + depends on PPC_PSERIES && DEBUG_FS + help + Adds code to keep track of the number of hypervisor calls made and + the amount of time spent in hypervisor callsr. Wall time spent in + each call is always calculated, and if available CPU cycles spent + are also calculated. A directory named hcall_inst is added at the + root of the debugfs filesystem. Within the hcall_inst directory + are files that contain CPU specific call statistics. + + This option will add a small amount of overhead to all hypervisor + calls. + config DEBUGGER bool "Enable debugger hooks" depends on DEBUG_KERNEL diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c53acd2a6dfc..c578e7ab8173 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -137,6 +137,7 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); + DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(SLBSHADOW_STACKVSID, offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); @@ -165,6 +166,12 @@ int main(void) /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + + /* hcall statistics */ + DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats)); + DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls)); + DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total)); + DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total)); #endif /* CONFIG_PPC64 */ DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index e5e0ff466904..997243a91be8 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -12,3 +12,4 @@ obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o +obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 9a99b056bd27..c00cfed7af2c 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -10,9 +10,69 @@ #include #include #include +#include #define STK_PARM(i) (48 + ((i)-3)*8) +#ifdef CONFIG_HCALL_STATS +/* + * precall must preserve all registers. use unused STK_PARM() + * areas to save snapshots and opcode. + */ +#define HCALL_INST_PRECALL \ + std r3,STK_PARM(r3)(r1); /* save opcode */ \ + mftb r0; /* get timebase and */ \ + std r0,STK_PARM(r5)(r1); /* save for later */ \ +BEGIN_FTR_SECTION; \ + mfspr r0,SPRN_PURR; /* get PURR and */ \ + std r0,STK_PARM(r6)(r1); /* save for later */ \ +END_FTR_SECTION_IFCLR(CPU_FTR_PURR); + +/* + * postcall is performed immediately before function return which + * allows liberal use of volatile registers. + */ +#define HCALL_INST_POSTCALL \ + ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ + cmpldi cr7,r4,MAX_HCALL_OPCODE; \ + bgt- cr7,1f; \ + \ + /* get time and PURR snapshots after hcall */ \ + mftb r7; /* timebase after */ \ +BEGIN_FTR_SECTION; \ + mfspr r8,SPRN_PURR; /* PURR after */ \ + ld r6,STK_PARM(r6)(r1); /* PURR before */ \ + subf r6,r6,r8; /* delta */ \ +END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ + ld r5,STK_PARM(r5)(r1); /* timebase before */ \ + subf r5,r5,r7; /* time delta */ \ + \ + /* calculate address of stat structure r4 = opcode */ \ + srdi r4,r4,2; /* index into array */ \ + mulli r4,r4,HCALL_STAT_SIZE; \ + LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ + add r4,r4,r7; \ + ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ + add r4,r4,r7; \ + \ + /* update stats */ \ + ld r7,HCALL_STAT_CALLS(r4); /* count */ \ + addi r7,r7,1; \ + std r7,HCALL_STAT_CALLS(r4); \ + ld r7,HCALL_STAT_TB(r4); /* timebase */ \ + add r7,r7,r5; \ + std r7,HCALL_STAT_TB(r4); \ +BEGIN_FTR_SECTION; \ + ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ + add r7,r7,r6; \ + std r7,HCALL_STAT_PURR(r4); \ +END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ +1: +#else +#define HCALL_INST_PRECALL +#define HCALL_INST_POSTCALL +#endif + .text _GLOBAL(plpar_hcall_norets) @@ -21,8 +81,12 @@ _GLOBAL(plpar_hcall_norets) mfcr r0 stw r0,8(r1) + HCALL_INST_PRECALL + HVSC /* invoke the hypervisor */ + HCALL_INST_POSTCALL + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -33,6 +97,8 @@ _GLOBAL(plpar_hcall) mfcr r0 stw r0,8(r1) + HCALL_INST_PRECALL + std r4,STK_PARM(r4)(r1) /* Save ret buffer */ mr r4,r5 @@ -50,6 +116,8 @@ _GLOBAL(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + HCALL_INST_POSTCALL + lwz r0,8(r1) mtcrf 0xff,r0 @@ -61,6 +129,8 @@ _GLOBAL(plpar_hcall9) mfcr r0 stw r0,8(r1) + HCALL_INST_PRECALL + std r4,STK_PARM(r4)(r1) /* Save ret buffer */ mr r4,r5 @@ -86,6 +156,8 @@ _GLOBAL(plpar_hcall9) std r11,56(r12) std r12,64(r12) + HCALL_INST_POSTCALL + lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c new file mode 100644 index 000000000000..641e6511cf06 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2006 Mike Kravetz IBM Corporation + * + * Hypervisor Call Instrumentation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); + +/* + * Routines for displaying the statistics in debugfs + */ +static void *hc_start(struct seq_file *m, loff_t *pos) +{ + if ((int)*pos < HCALL_STAT_ARRAY_SIZE) + return (void *)(unsigned long)(*pos + 1); + + return NULL; +} + +static void *hc_next(struct seq_file *m, void *p, loff_t * pos) +{ + ++*pos; + + return hc_start(m, pos); +} + +static void hc_stop(struct seq_file *m, void *p) +{ +} + +static int hc_show(struct seq_file *m, void *p) +{ + unsigned long h_num = (unsigned long)p; + struct hcall_stats *hs = (struct hcall_stats *)m->private; + + if (hs[h_num].num_calls) { + if (!cpu_has_feature(CPU_FTR_PURR)) + seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2, + hs[h_num].num_calls, + hs[h_num].tb_total, + hs[h_num].purr_total); + else + seq_printf(m, "%lu %lu %lu\n", h_num<<2, + hs[h_num].num_calls, + hs[h_num].tb_total); + } + + return 0; +} + +static struct seq_operations hcall_inst_seq_ops = { + .start = hc_start, + .next = hc_next, + .stop = hc_stop, + .show = hc_show +}; + +static int hcall_inst_seq_open(struct inode *inode, struct file *file) +{ + int rc; + struct seq_file *seq; + + rc = seq_open(file, &hcall_inst_seq_ops); + seq = file->private_data; + seq->private = file->f_dentry->d_inode->u.generic_ip; + + return rc; +} + +static struct file_operations hcall_inst_seq_fops = { + .open = hcall_inst_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#define HCALL_ROOT_DIR "hcall_inst" +#define CPU_NAME_BUF_SIZE 32 + +static int __init hcall_inst_init(void) +{ + struct dentry *hcall_root; + struct dentry *hcall_file; + char cpu_name_buf[CPU_NAME_BUF_SIZE]; + int cpu; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); + if (!hcall_root) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); + hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO, + hcall_root, + per_cpu(hcall_stats, cpu), + &hcall_inst_seq_fops); + if (!hcall_file) + return -ENOMEM; + } + + return 0; +} +__initcall(hcall_inst_init); diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index 63ce1ac8c1f4..257d1cecb8c9 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -208,7 +208,7 @@ #define H_JOIN 0x298 #define H_VASI_STATE 0x2A4 #define H_ENABLE_CRQ 0x2B0 -#define MAX_HCALL_OPCODES (H_ENABLE_CRQ >> 2) +#define MAX_HCALL_OPCODE H_ENABLE_CRQ #ifndef __ASSEMBLY__ @@ -246,6 +246,16 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...); #define PLPAR_HCALL9_BUFSIZE 9 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); +/* For hcall instrumentation. One structure per-hcall, per-CPU */ +struct hcall_stats { + unsigned long num_calls; /* number of calls (on this CPU) */ + unsigned long tb_total; /* total wall time (mftb) of calls. */ + unsigned long purr_total; /* total cpu time (PURR) of calls. */ +}; +void update_hcall_stats(unsigned long opcode, unsigned long tb_delta, + unsigned long purr_delta); +#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ From 06e6d290ac7a9fb6fcec3a2207988163709f06aa Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 7 Sep 2006 08:25:40 -0500 Subject: [PATCH 0188/1063] [POWERPC] PPC: Fix Kconfig whitespace warnings Fix the following whitespace warnings when compiling with ARCH=ppc arch/ppc/Kconfig:1207:warning: leading whitespace ignored arch/ppc/Kconfig:1226:warning: leading whitespace ignored arch/ppc/Kconfig:1231:warning: leading whitespace ignored Also fix a typo ("Supprt"). Signed-off-by: Josh Boyer Signed-off-by: Paul Mackerras --- arch/ppc/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index a04cdf01596b..8fa10cf661a8 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -1204,7 +1204,7 @@ config PCI_DOMAINS default PCI config MPC83xx_PCI2 - bool " Supprt for 2nd PCI host controller" + bool "Support for 2nd PCI host controller" depends on PCI && MPC834x default y if MPC834x_SYS @@ -1223,12 +1223,12 @@ config PCI_8260 default y config 8260_PCI9 - bool " Enable workaround for MPC826x erratum PCI 9" + bool "Enable workaround for MPC826x erratum PCI 9" depends on PCI_8260 && !ADS8272 default y choice - prompt " IDMA channel for PCI 9 workaround" + prompt "IDMA channel for PCI 9 workaround" depends on 8260_PCI9 config 8260_PCI9_IDMA1 From 87fd7724d4022913ae8dbee3ed55cd04f2c316a6 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 7 Sep 2006 15:18:08 -0500 Subject: [PATCH 0189/1063] [POWERPC] Quiet hvc_console console output on failed opens No other tty driver will print on the console when the open of it fails. On systems that happen to be configured for both ttyS0 and hvc0 console, this will keep flooding the console output. This is most likely to happen with systems booted between with and without hypervisor from the same filesystem. Let's just remove it. When it's really needed (i.e. when the open fails and someone is trying to debug it), noone will see the output anyway. And init will report the opens failing in due time through the syslog. Signed-off-by: Olof Johansson Acked-by: Ryan S. Arnold Signed-off-by: Paul Mackerras --- drivers/char/hvc_console.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index dbee8bed0530..1d1bd34b7f12 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -320,10 +320,8 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) struct kobject *kobjp; /* Auto increments kobject reference if found. */ - if (!(hp = hvc_get_by_index(tty->index))) { - printk(KERN_WARNING "hvc_console: tty open failed, no vty associated with tty.\n"); + if (!(hp = hvc_get_by_index(tty->index))) return -ENODEV; - } spin_lock_irqsave(&hp->lock, flags); /* Check and then increment for fast path open. */ From 26c8af5f01dfb91f709cc2ba07fb650949aae13e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Fri, 8 Sep 2006 16:29:21 +0200 Subject: [PATCH 0190/1063] [POWERPC] print backtrace when entering xmon xmon does not print a backtrace per default. This is bad on systems with USB keyboard, the most needed info about the crash is lost. print a backtrace during the very first xmon entry. Booting with xmon=nobt disables the autobacktrace functionality. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig.debug | 2 ++ arch/powerpc/kernel/setup-common.c | 4 ++++ arch/powerpc/xmon/xmon.c | 10 ++++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index d7b2aedd89aa..5ad149b47e34 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -88,6 +88,8 @@ config XMON very early during boot. 'xmon=on' will just enable the xmon debugger hooks. 'xmon=off' will disable the debugger hooks if CONFIG_XMON_DEFAULT is set. + xmon will print a backtrace on the very first invocation. + 'xmon=nobt' will disable this autobacktrace. config XMON_DEFAULT bool "Enable xmon by default" diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d57930d86faa..465e7435efbc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -441,6 +441,8 @@ void __init smp_setup_cpu_maps(void) int __initdata do_early_xmon; #ifdef CONFIG_XMON +extern int xmon_no_auto_backtrace; + static int __init early_xmon(char *p) { /* ensure xmon is enabled */ @@ -449,6 +451,8 @@ static int __init early_xmon(char *p) xmon_init(1); if (strncmp(p, "off", 3) == 0) xmon_init(0); + if (strncmp(p, "nobt", 4) == 0) + xmon_no_auto_backtrace = 1; if (strncmp(p, "early", 5) != 0) return 0; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 179b10ced8c7..8adad1444a51 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -137,10 +137,14 @@ static void bootcmds(void); static void proccall(void); void dump_segments(void); static void symbol_lookup(void); +static void xmon_show_stack(unsigned long sp, unsigned long lr, + unsigned long pc); static void xmon_print_symbol(unsigned long address, const char *mid, const char *after); static const char *getvecname(unsigned long vec); +int xmon_no_auto_backtrace; + extern int print_insn_powerpc(unsigned long, unsigned long, int); extern void xmon_enter(void); @@ -736,6 +740,12 @@ cmds(struct pt_regs *excp) last_cmd = NULL; xmon_regs = excp; + + if (!xmon_no_auto_backtrace) { + xmon_no_auto_backtrace = 1; + xmon_show_stack(excp->gpr[1], excp->link, excp->nip); + } + for(;;) { #ifdef CONFIG_SMP printf("%x:", smp_processor_id()); From 3dd836a56de0d4f049438412959b905e1db4666e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 Sep 2006 16:04:25 +0100 Subject: [PATCH 0191/1063] [POWERPC] Export copy_4K_page() Export copy_4K_page() for use by modules via copy_page() (such as CacheFiles). Signed-Off-By: David Howells Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/ppc_ksyms.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 39d3bfcabcd2..b2edac8ddf0a 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -91,6 +91,9 @@ EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(copy_4K_page); +#endif #ifndef __powerpc64__ EXPORT_SYMBOL(__ide_mm_insl); From f04da0bc36566ad17cf21e4ac8dbae377ca1dc75 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 13 Sep 2006 13:32:39 -0500 Subject: [PATCH 0192/1063] [POWERPC] Fix non-smp build This fixes a compile error that only surfaces on CONFIG_SMP=n builds; seems to get pulled in through another header file for SMP builds. This problem was introduced by the hvcall stats patch. Signed-off-by: Olof Johansson Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c578e7ab8173..d06f378597bb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -43,6 +43,7 @@ #include #include #include +#include #endif #define DEFINE(sym, val) \ From 7dcd86e14319f4ceab883787ab2e00a5f860d14d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 13 Sep 2006 17:41:55 -0500 Subject: [PATCH 0193/1063] [POWERPC] Fix MPC8349EMDS dts PCI interrupt-map values for IDSEL 0x18 Fix MPC8349EMDS dts PCI interrupt-map values for IDSEL 0x18 per Tanya's catch. Signed-off-by: Kim Phillips Signed-off-by: Tanya Jiang Signed-off-by: Paul Mackerras --- arch/powerpc/boot/dts/mpc8349emds.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8349emds.dts b/arch/powerpc/boot/dts/mpc8349emds.dts index 12f5dbf3055f..efceb3432653 100644 --- a/arch/powerpc/boot/dts/mpc8349emds.dts +++ b/arch/powerpc/boot/dts/mpc8349emds.dts @@ -214,10 +214,10 @@ b800 0 0 4 700 15 8 /* IDSEL 0x18 */ - b000 0 0 1 700 15 8 - b000 0 0 2 700 16 8 - b000 0 0 3 700 17 8 - b000 0 0 4 700 14 8>; + c000 0 0 1 700 15 8 + c000 0 0 2 700 16 8 + c000 0 0 3 700 17 8 + c000 0 0 4 700 14 8>; interrupt-parent = <700>; interrupts = <42 8>; bus-range = <0 0>; @@ -274,10 +274,10 @@ b800 0 0 4 700 15 8 /* IDSEL 0x18 */ - b000 0 0 1 700 15 8 - b000 0 0 2 700 16 8 - b000 0 0 3 700 17 8 - b000 0 0 4 700 14 8>; + c000 0 0 1 700 15 8 + c000 0 0 2 700 16 8 + c000 0 0 3 700 17 8 + c000 0 0 4 700 14 8>; interrupt-parent = <700>; interrupts = <42 8>; bus-range = <0 0>; From d882687c51b52424a56992578ce7636b3f3c8d41 Mon Sep 17 00:00:00 2001 From: Havasi Ferenc Date: Tue, 5 Sep 2006 16:08:58 +0200 Subject: [PATCH 0194/1063] [JFFS2][SUMMARY] Fix a summary collecting bug. In some special case (padding because of sync or umount) it can be possible that summary information is not fit to the end of the erase block. In these cases the collecting of summary is disabled for this erase block. The problem was that this was not respected by jffs2_sum_add_kvec(). This patch fix this bug. From: Zoltan Sogor Signed-off-by: Ferenc Havasi Signed-off-by: David Woodhouse --- fs/jffs2/summary.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index c19bd476e8ec..e52cef526d90 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, union jffs2_node_union *node; struct jffs2_eraseblock *jeb; + if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) { + dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n"); + return 0; + } + node = invecs[0].iov_base; jeb = &c->blocks[ofs / c->sector_size]; ofs -= jeb->offset; From de591dacf3034977b3fb94b61d08240c8b35c39d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Skinnemoen?= Date: Fri, 15 Sep 2006 17:19:31 +0200 Subject: [PATCH 0195/1063] MTD: Fix bug in fixup_convert_atmel_pri MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memset() in fixup_convert_atmel_pri is supposed to zero out everything except the first 5 bytes in *extp, but it ends up zeroing out something way outside the struct instead. Fix this potentially dangerous code by casting the pointer to char * before doing arithmetic. Signed-off-by: HÃ¥vard Skinnemoen Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index ddc5bd783354..a482e8922de1 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -175,7 +175,7 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) struct cfi_pri_atmel atmel_pri; memcpy(&atmel_pri, extp, sizeof(atmel_pri)); - memset(extp + 5, 0, sizeof(*extp) - 5); + memset((char *)extp + 5, 0, sizeof(*extp) - 5); if (atmel_pri.Features & 0x02) extp->EraseSuspend = 2; From ea59830db01b6b3d6bda9f84e3d272a346115e8e Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Sat, 16 Sep 2006 21:09:29 -0400 Subject: [PATCH 0196/1063] [MTD] Use SEEK_{SET,CUR,END} instead of hardcoded values in mtdchar lseek() Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: David Woodhouse --- drivers/mtd/mtdchar.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index fb8b4f7e48d3..5b6acfcb2b88 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -62,15 +62,12 @@ static loff_t mtd_lseek (struct file *file, loff_t offset, int orig) struct mtd_info *mtd = mfi->mtd; switch (orig) { - case 0: - /* SEEK_SET */ + case SEEK_SET: break; - case 1: - /* SEEK_CUR */ + case SEEK_CUR: offset += file->f_pos; break; - case 2: - /* SEEK_END */ + case SEEK_END: offset += mtd->size; break; default: From fadcfa33b6319a5faf8af2287f08bf93a7f926b6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Sep 2006 12:43:58 +0100 Subject: [PATCH 0197/1063] [HEADERS] One line per header in Kbuild files to reduce conflicts Signed-off-by: David Woodhouse --- include/Kbuild | 11 +- include/asm-alpha/Kbuild | 10 +- include/asm-generic/Kbuild | 15 +- include/asm-generic/Kbuild.asm | 38 ++- include/asm-i386/Kbuild | 9 +- include/asm-ia64/Kbuild | 18 +- include/asm-powerpc/Kbuild | 45 ++- include/asm-s390/Kbuild | 11 +- include/asm-sparc/Kbuild | 24 +- include/asm-sparc64/Kbuild | 27 +- include/asm-x86_64/Kbuild | 18 +- include/linux/Kbuild | 400 ++++++++++++++++++++++---- include/linux/byteorder/Kbuild | 9 +- include/linux/dvb/Kbuild | 11 +- include/linux/netfilter/Kbuild | 47 ++- include/linux/netfilter_arp/Kbuild | 5 +- include/linux/netfilter_bridge/Kbuild | 21 +- include/linux/netfilter_ipv4/Kbuild | 82 ++++-- include/linux/netfilter_ipv6/Kbuild | 27 +- include/linux/nfsd/Kbuild | 9 +- include/linux/raid/Kbuild | 3 +- include/linux/sunrpc/Kbuild | 2 +- include/linux/tc_act/Kbuild | 5 +- include/linux/tc_ematch/Kbuild | 5 +- include/mtd/Kbuild | 8 +- include/rdma/Kbuild | 2 +- include/scsi/Kbuild | 4 +- include/sound/Kbuild | 12 +- include/video/Kbuild | 2 +- 29 files changed, 721 insertions(+), 159 deletions(-) diff --git a/include/Kbuild b/include/Kbuild index cb2534800b19..2d03f995865f 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,2 +1,9 @@ -header-y += asm-generic/ linux/ scsi/ sound/ mtd/ rdma/ video/ -header-y += asm-$(ARCH)/ +header-y += asm-generic/ +header-y += linux/ +header-y += scsi/ +header-y += sound/ +header-y += mtd/ +header-y += rdma/ +header-y += video/ + +header-y += asm-$(ARCH)/ diff --git a/include/asm-alpha/Kbuild b/include/asm-alpha/Kbuild index 2b06b3bad5ff..b7c8f188b313 100644 --- a/include/asm-alpha/Kbuild +++ b/include/asm-alpha/Kbuild @@ -1,5 +1,11 @@ include include/asm-generic/Kbuild.asm -unifdef-y += console.h fpu.h sysinfo.h compiler.h +header-y += gentrap.h +header-y += regdef.h +header-y += pal.h +header-y += reg.h -header-y += gentrap.h regdef.h pal.h reg.h +unifdef-y += console.h +unifdef-y += fpu.h +unifdef-y += sysinfo.h +unifdef-y += compiler.h diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 70594b275a6e..3c06be381701 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,3 +1,12 @@ -header-y += atomic.h errno-base.h errno.h fcntl.h ioctl.h ipc.h mman.h \ - signal.h statfs.h -unifdef-y := resource.h siginfo.h +header-y += atomic.h +header-y += errno-base.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ipc.h +header-y += mman.h +header-y += signal.h +header-y += statfs.h + +unifdef-y += resource.h +unifdef-y += siginfo.h diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index c00de6028fa8..a84c3d88a189 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -1,8 +1,34 @@ -unifdef-y += a.out.h auxvec.h byteorder.h errno.h fcntl.h ioctl.h \ - ioctls.h ipcbuf.h mman.h msgbuf.h param.h poll.h \ - posix_types.h ptrace.h resource.h sembuf.h shmbuf.h shmparam.h \ - sigcontext.h siginfo.h signal.h socket.h sockios.h stat.h \ - statfs.h termbits.h termios.h types.h unistd.h user.h +unifdef-y += a.out.h +unifdef-y += auxvec.h +unifdef-y += byteorder.h +unifdef-y += errno.h +unifdef-y += fcntl.h +unifdef-y += ioctl.h +unifdef-y += ioctls.h +unifdef-y += ipcbuf.h +unifdef-y += mman.h +unifdef-y += msgbuf.h +unifdef-y += param.h +unifdef-y += poll.h +unifdef-y += posix_types.h +unifdef-y += ptrace.h +unifdef-y += resource.h +unifdef-y += sembuf.h +unifdef-y += shmbuf.h +unifdef-y += sigcontext.h +unifdef-y += siginfo.h +unifdef-y += signal.h +unifdef-y += socket.h +unifdef-y += sockios.h +unifdef-y += stat.h +unifdef-y += statfs.h +unifdef-y += termbits.h +unifdef-y += termios.h +unifdef-y += types.h +unifdef-y += unistd.h +unifdef-y += user.h # These probably shouldn't be exported -unifdef-y += elf.h page.h +unifdef-y += shmparam.h +unifdef-y += elf.h +unifdef-y += page.h diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 2308190321da..b75a348d0c1c 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -1,5 +1,10 @@ include include/asm-generic/Kbuild.asm -header-y += boot.h debugreg.h ldt.h ucontext.h +header-y += boot.h +header-y += debugreg.h +header-y += ldt.h +header-y += ucontext.h -unifdef-y += mtrr.h setup.h vm86.h +unifdef-y += mtrr.h +unifdef-y += setup.h +unifdef-y += vm86.h diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index f1cb00f39c22..15818a18bc52 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -1,7 +1,17 @@ include include/asm-generic/Kbuild.asm -header-y += break.h fpu.h fpswa.h gcc_intrin.h ia64regs.h \ - intel_intrin.h intrinsics.h perfmon_default_smpl.h \ - ptrace_offsets.h rse.h setup.h ucontext.h +header-y += break.h +header-y += fpu.h +header-y += fpswa.h +header-y += gcc_intrin.h +header-y += ia64regs.h +header-y += intel_intrin.h +header-y += intrinsics.h +header-y += perfmon_default_smpl.h +header-y += ptrace_offsets.h +header-y += rse.h +header-y += setup.h +header-y += ucontext.h -unifdef-y += perfmon.h ustack.h +unifdef-y += perfmon.h +unifdef-y += ustack.h diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild index ac61d7eb6021..9827849953a3 100644 --- a/include/asm-powerpc/Kbuild +++ b/include/asm-powerpc/Kbuild @@ -1,10 +1,41 @@ include include/asm-generic/Kbuild.asm -unifdef-y += a.out.h asm-compat.h bootx.h byteorder.h cputable.h elf.h \ - nvram.h param.h posix_types.h ptrace.h seccomp.h signal.h \ - termios.h types.h unistd.h +header-y += auxvec.h +header-y += ioctls.h +header-y += mman.h +header-y += sembuf.h +header-y += siginfo.h +header-y += stat.h +header-y += errno.h +header-y += ipcbuf.h +header-y += msgbuf.h +header-y += shmbuf.h +header-y += socket.h +header-y += termbits.h +header-y += fcntl.h +header-y += ipc.h +header-y += poll.h +header-y += shmparam.h +header-y += sockios.h +header-y += ucontext.h +header-y += ioctl.h +header-y += linkage.h +header-y += resource.h +header-y += sigcontext.h +header-y += statfs.h -header-y += auxvec.h ioctls.h mman.h sembuf.h siginfo.h stat.h errno.h \ - ipcbuf.h msgbuf.h shmbuf.h socket.h termbits.h fcntl.h ipc.h \ - poll.h shmparam.h sockios.h ucontext.h ioctl.h linkage.h \ - resource.h sigcontext.h statfs.h +unifdef-y += a.out.h +unifdef-y += asm-compat.h +unifdef-y += bootx.h +unifdef-y += byteorder.h +unifdef-y += cputable.h +unifdef-y += elf.h +unifdef-y += nvram.h +unifdef-y += param.h +unifdef-y += posix_types.h +unifdef-y += ptrace.h +unifdef-y += seccomp.h +unifdef-y += signal.h +unifdef-y += termios.h +unifdef-y += types.h +unifdef-y += unistd.h diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index ed8955f49e47..14158a4a9c87 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -1,4 +1,11 @@ include include/asm-generic/Kbuild.asm -unifdef-y += cmb.h debug.h -header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h +header-y += dasd.h +header-y += qeth.h +header-y += tape390.h +header-y += ucontext.h +header-y += vtoc.h +header-y += z90crypt.h + +unifdef-y += cmb.h +unifdef-y += debug.h diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild index e2a57fd7abfa..b22b67a64ecc 100644 --- a/include/asm-sparc/Kbuild +++ b/include/asm-sparc/Kbuild @@ -1,6 +1,22 @@ include include/asm-generic/Kbuild.asm -unifdef-y += fbio.h perfctr.h psr.h -header-y += apc.h asi.h auxio.h bpp.h head.h ipc.h jsflash.h \ - openpromio.h pbm.h pconf.h pgtsun4.h reg.h traps.h \ - turbosparc.h vfc_ioctls.h winmacro.h +header-y += apc.h +header-y += asi.h +header-y += auxio.h +header-y += bpp.h +header-y += head.h +header-y += ipc.h +header-y += jsflash.h +header-y += openpromio.h +header-y += pbm.h +header-y += pconf.h +header-y += pgtsun4.h +header-y += reg.h +header-y += traps.h +header-y += turbosparc.h +header-y += vfc_ioctls.h +header-y += winmacro.h + +unifdef-y += fbio.h +unifdef-y += perfctr.h +unifdef-y += psr.h diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild index 9284c3cb27ec..4b59ce46cc2d 100644 --- a/include/asm-sparc64/Kbuild +++ b/include/asm-sparc64/Kbuild @@ -4,7 +4,26 @@ ALTARCH := sparc ARCHDEF := defined __sparc__ && defined __arch64__ ALTARCHDEF := defined __sparc__ && !defined __arch64__ -unifdef-y += fbio.h perfctr.h -header-y += apb.h asi.h bbc.h bpp.h display7seg.h envctrl.h floppy.h \ - ipc.h kdebug.h mostek.h openprom.h openpromio.h parport.h \ - pconf.h psrcompat.h pstate.h reg.h uctx.h utrap.h watchdog.h +header-y += apb.h +header-y += asi.h +header-y += bbc.h +header-y += bpp.h +header-y += display7seg.h +header-y += envctrl.h +header-y += floppy.h +header-y += ipc.h +header-y += kdebug.h +header-y += mostek.h +header-y += openprom.h +header-y += openpromio.h +header-y += parport.h +header-y += pconf.h +header-y += psrcompat.h +header-y += pstate.h +header-y += reg.h +header-y += uctx.h +header-y += utrap.h +header-y += watchdog.h + +unifdef-y += fbio.h +unifdef-y += perfctr.h diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index dc4d101e8a16..40f2f13fe174 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -4,8 +4,18 @@ ALTARCH := i386 ARCHDEF := defined __x86_64__ ALTARCHDEF := defined __i386__ -header-y += boot.h bootsetup.h cpufeature.h debugreg.h ldt.h \ - msr.h prctl.h setup.h sigcontext32.h ucontext.h \ - vsyscall32.h +header-y += boot.h +header-y += bootsetup.h +header-y += cpufeature.h +header-y += debugreg.h +header-y += ldt.h +header-y += msr.h +header-y += prctl.h +header-y += setup.h +header-y += sigcontext32.h +header-y += ucontext.h +header-y += vsyscall32.h -unifdef-y += mce.h mtrr.h vsyscall.h +unifdef-y += mce.h +unifdef-y += mtrr.h +unifdef-y += vsyscall.h diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 2b8a7d68fae3..7d076d97b2f7 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -1,63 +1,343 @@ -header-y := byteorder/ dvb/ hdlc/ isdn/ nfsd/ raid/ sunrpc/ tc_act/ \ - netfilter/ netfilter_arp/ netfilter_bridge/ netfilter_ipv4/ \ - netfilter_ipv6/ +header-y += byteorder/ +header-y += dvb/ +header-y += hdlc/ +header-y += isdn/ +header-y += nfsd/ +header-y += raid/ +header-y += sunrpc/ +header-y += tc_act/ +header-y += netfilter/ +header-y += netfilter_arp/ +header-y += netfilter_bridge/ +header-y += netfilter_ipv4/ +header-y += netfilter_ipv6/ -header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h \ - atmapi.h atmbr2684.h atmclip.h atm_eni.h atm_he.h \ - atm_idt77105.h atmioc.h atmlec.h atmmpc.h atm_nicstar.h \ - atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h \ - awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h \ - bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h \ - consolemap.h cycx_cfm.h dm-ioctl.h dn.h dqblk_v1.h \ - dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h \ - fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h \ - fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h \ - hpfs_fs.h hysdn_if.h i2c-dev.h i8k.h icmp.h \ - if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h \ - if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h \ - if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h \ - in_route.h ioctl.h ip.h ipmi_msgdefs.h ip_mp_alg.h ipsec.h \ - ipx.h irda.h isdn_divertif.h iso_fs.h ite_gpio.h ixjuser.h \ - jffs2.h keyctl.h limits.h major.h matroxfb.h meye.h minix_fs.h \ - mmtimer.h mqueue.h mtio.h ncp_no.h netfilter_arp.h netrom.h \ - nfs2.h nfs4_mount.h nfs_mount.h openprom_fs.h param.h \ - pci_ids.h pci_regs.h personality.h pfkeyv2.h pg.h pkt_cls.h \ - pkt_sched.h posix_types.h ppdev.h prctl.h ps2esdi.h qic117.h \ - qnxtypes.h quotaio_v1.h quotaio_v2.h radeonfb.h raw.h \ - resource.h rose.h sctp.h smbno.h snmp.h sockios.h som.h \ - sound.h stddef.h synclink.h telephony.h termios.h ticable.h \ - times.h tiocl.h tipc.h toshiba.h ultrasound.h un.h utime.h \ - utsname.h video_decoder.h video_encoder.h videotext.h vt.h \ - wavefront.h wireless.h xattr.h x25.h zorro_ids.h +header-y += affs_fs.h +header-y += affs_hardblocks.h +header-y += aio_abi.h +header-y += a.out.h +header-y += arcfb.h +header-y += atmapi.h +header-y += atmbr2684.h +header-y += atmclip.h +header-y += atm_eni.h +header-y += atm_he.h +header-y += atm_idt77105.h +header-y += atmioc.h +header-y += atmlec.h +header-y += atmmpc.h +header-y += atm_nicstar.h +header-y += atmppp.h +header-y += atmsap.h +header-y += atmsvc.h +header-y += atm_zatm.h +header-y += auto_fs4.h +header-y += auxvec.h +header-y += awe_voice.h +header-y += ax25.h +header-y += b1lli.h +header-y += baycom.h +header-y += bfs_fs.h +header-y += blkpg.h +header-y += bpqether.h +header-y += cdk.h +header-y += chio.h +header-y += coda_psdev.h +header-y += coff.h +header-y += comstats.h +header-y += consolemap.h +header-y += cycx_cfm.h +header-y += dm-ioctl.h +header-y += dn.h +header-y += dqblk_v1.h +header-y += dqblk_v2.h +header-y += dqblk_xfs.h +header-y += efs_fs_sb.h +header-y += elf-fdpic.h +header-y += elf.h +header-y += elf-em.h +header-y += fadvise.h +header-y += fd.h +header-y += fdreg.h +header-y += ftape-header-segment.h +header-y += ftape-vendors.h +header-y += fuse.h +header-y += futex.h +header-y += genetlink.h +header-y += gen_stats.h +header-y += gigaset_dev.h +header-y += hdsmart.h +header-y += hpfs_fs.h +header-y += hysdn_if.h +header-y += i2c-dev.h +header-y += i8k.h +header-y += icmp.h +header-y += if_arcnet.h +header-y += if_arp.h +header-y += if_bonding.h +header-y += if_cablemodem.h +header-y += if_fc.h +header-y += if_fddi.h +header-y += if.h +header-y += if_hippi.h +header-y += if_infiniband.h +header-y += if_packet.h +header-y += if_plip.h +header-y += if_ppp.h +header-y += if_slip.h +header-y += if_strip.h +header-y += if_tunnel.h +header-y += in6.h +header-y += in_route.h +header-y += ioctl.h +header-y += ip.h +header-y += ipmi_msgdefs.h +header-y += ip_mp_alg.h +header-y += ipsec.h +header-y += ipx.h +header-y += irda.h +header-y += isdn_divertif.h +header-y += iso_fs.h +header-y += ite_gpio.h +header-y += ixjuser.h +header-y += jffs2.h +header-y += keyctl.h +header-y += limits.h +header-y += major.h +header-y += matroxfb.h +header-y += meye.h +header-y += minix_fs.h +header-y += mmtimer.h +header-y += mqueue.h +header-y += mtio.h +header-y += ncp_no.h +header-y += netfilter_arp.h +header-y += netrom.h +header-y += nfs2.h +header-y += nfs4_mount.h +header-y += nfs_mount.h +header-y += openprom_fs.h +header-y += param.h +header-y += pci_ids.h +header-y += pci_regs.h +header-y += personality.h +header-y += pfkeyv2.h +header-y += pg.h +header-y += pkt_cls.h +header-y += pkt_sched.h +header-y += posix_types.h +header-y += ppdev.h +header-y += prctl.h +header-y += ps2esdi.h +header-y += qic117.h +header-y += qnxtypes.h +header-y += quotaio_v1.h +header-y += quotaio_v2.h +header-y += radeonfb.h +header-y += raw.h +header-y += resource.h +header-y += rose.h +header-y += sctp.h +header-y += smbno.h +header-y += snmp.h +header-y += sockios.h +header-y += som.h +header-y += sound.h +header-y += stddef.h +header-y += synclink.h +header-y += telephony.h +header-y += termios.h +header-y += ticable.h +header-y += times.h +header-y += tiocl.h +header-y += tipc.h +header-y += toshiba.h +header-y += ultrasound.h +header-y += un.h +header-y += utime.h +header-y += utsname.h +header-y += video_decoder.h +header-y += video_encoder.h +header-y += videotext.h +header-y += vt.h +header-y += wavefront.h +header-y += wireless.h +header-y += xattr.h +header-y += x25.h +header-y += zorro_ids.h -unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h \ - atmarp.h atmdev.h atm.h atm_tcp.h audit.h auto_fs.h binfmts.h \ - capability.h capi.h cciss_ioctl.h cdrom.h cm4000_cs.h \ - cn_proc.h coda.h connector.h cramfs_fs.h cuda.h cyclades.h \ - dccp.h dirent.h divert.h elfcore.h errno.h errqueue.h \ - ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h \ - filter.h flat.h fs.h ftape.h gameport.h generic_serial.h \ - genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h \ - i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h \ - if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h \ - if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h \ - inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h \ - ipv6_route.h isdn.h isdnif.h isdn_ppp.h isicom.h jbd.h \ - joystick.h kdev_t.h kd.h kernelcapi.h kernel.h keyboard.h \ - llc.h loop.h lp.h mempolicy.h mii.h mman.h mroute.h msdos_fs.h \ - msg.h nbd.h ncp_fs.h ncp.h ncp_mount.h netdevice.h \ - netfilter_bridge.h netfilter_decnet.h netfilter.h \ - netfilter_ipv4.h netfilter_ipv6.h netfilter_logging.h net.h \ - netlink.h nfs3.h nfs4.h nfsacl.h nfs_fs.h nfs.h nfs_idmap.h \ - n_r3964.h nubus.h nvram.h parport.h patchkey.h pci.h pktcdvd.h \ - pmu.h poll.h ppp_defs.h ppp-comp.h ptrace.h qnx4_fs.h quota.h \ - random.h reboot.h reiserfs_fs.h reiserfs_xattr.h romfs_fs.h \ - route.h rtc.h rtnetlink.h scc.h sched.h sdla.h \ - selinux_netlink.h sem.h serial_core.h serial.h serio.h shm.h \ - signal.h smb_fs.h smb.h smb_mount.h socket.h sonet.h sonypi.h \ - soundcard.h stat.h sysctl.h tcp.h time.h timex.h tty.h types.h \ - udf_fs_i.h udp.h uinput.h uio.h unistd.h usb_ch9.h \ - usbdevice_fs.h user.h videodev2.h videodev.h wait.h \ - wanrouter.h watchdog.h xfrm.h zftape.h +unifdef-y += acct.h +unifdef-y += adb.h +unifdef-y += adfs_fs.h +unifdef-y += agpgart.h +unifdef-y += apm_bios.h +unifdef-y += atalk.h +unifdef-y += atmarp.h +unifdef-y += atmdev.h +unifdef-y += atm.h +unifdef-y += atm_tcp.h +unifdef-y += audit.h +unifdef-y += auto_fs.h +unifdef-y += binfmts.h +unifdef-y += capability.h +unifdef-y += capi.h +unifdef-y += cciss_ioctl.h +unifdef-y += cdrom.h +unifdef-y += cm4000_cs.h +unifdef-y += cn_proc.h +unifdef-y += coda.h +unifdef-y += connector.h +unifdef-y += cramfs_fs.h +unifdef-y += cuda.h +unifdef-y += cyclades.h +unifdef-y += dccp.h +unifdef-y += dirent.h +unifdef-y += divert.h +unifdef-y += elfcore.h +unifdef-y += errno.h +unifdef-y += errqueue.h +unifdef-y += ethtool.h +unifdef-y += eventpoll.h +unifdef-y += ext2_fs.h +unifdef-y += ext3_fs.h +unifdef-y += fb.h +unifdef-y += fcntl.h +unifdef-y += filter.h +unifdef-y += flat.h +unifdef-y += fs.h +unifdef-y += ftape.h +unifdef-y += gameport.h +unifdef-y += generic_serial.h +unifdef-y += genhd.h +unifdef-y += hayesesp.h +unifdef-y += hdlcdrv.h +unifdef-y += hdlc.h +unifdef-y += hdreg.h +unifdef-y += hiddev.h +unifdef-y += hpet.h +unifdef-y += i2c.h +unifdef-y += i2o-dev.h +unifdef-y += icmpv6.h +unifdef-y += if_bridge.h +unifdef-y += if_ec.h +unifdef-y += if_eql.h +unifdef-y += if_ether.h +unifdef-y += if_frad.h +unifdef-y += if_ltalk.h +unifdef-y += if_pppox.h +unifdef-y += if_shaper.h +unifdef-y += if_tr.h +unifdef-y += if_tun.h +unifdef-y += if_vlan.h +unifdef-y += if_wanpipe.h +unifdef-y += igmp.h +unifdef-y += inet_diag.h +unifdef-y += in.h +unifdef-y += inotify.h +unifdef-y += input.h +unifdef-y += ipc.h +unifdef-y += ipmi.h +unifdef-y += ipv6.h +unifdef-y += ipv6_route.h +unifdef-y += isdn.h +unifdef-y += isdnif.h +unifdef-y += isdn_ppp.h +unifdef-y += isicom.h +unifdef-y += jbd.h +unifdef-y += joystick.h +unifdef-y += kdev_t.h +unifdef-y += kd.h +unifdef-y += kernelcapi.h +unifdef-y += kernel.h +unifdef-y += keyboard.h +unifdef-y += llc.h +unifdef-y += loop.h +unifdef-y += lp.h +unifdef-y += mempolicy.h +unifdef-y += mii.h +unifdef-y += mman.h +unifdef-y += mroute.h +unifdef-y += msdos_fs.h +unifdef-y += msg.h +unifdef-y += nbd.h +unifdef-y += ncp_fs.h +unifdef-y += ncp.h +unifdef-y += ncp_mount.h +unifdef-y += netdevice.h +unifdef-y += netfilter_bridge.h +unifdef-y += netfilter_decnet.h +unifdef-y += netfilter.h +unifdef-y += netfilter_ipv4.h +unifdef-y += netfilter_ipv6.h +unifdef-y += netfilter_logging.h +unifdef-y += net.h +unifdef-y += netlink.h +unifdef-y += nfs3.h +unifdef-y += nfs4.h +unifdef-y += nfsacl.h +unifdef-y += nfs_fs.h +unifdef-y += nfs.h +unifdef-y += nfs_idmap.h +unifdef-y += n_r3964.h +unifdef-y += nubus.h +unifdef-y += nvram.h +unifdef-y += parport.h +unifdef-y += patchkey.h +unifdef-y += pci.h +unifdef-y += pktcdvd.h +unifdef-y += pmu.h +unifdef-y += poll.h +unifdef-y += ppp_defs.h +unifdef-y += ppp-comp.h +unifdef-y += ptrace.h +unifdef-y += qnx4_fs.h +unifdef-y += quota.h +unifdef-y += random.h +unifdef-y += reboot.h +unifdef-y += reiserfs_fs.h +unifdef-y += reiserfs_xattr.h +unifdef-y += romfs_fs.h +unifdef-y += route.h +unifdef-y += rtc.h +unifdef-y += rtnetlink.h +unifdef-y += scc.h +unifdef-y += sched.h +unifdef-y += sdla.h +unifdef-y += selinux_netlink.h +unifdef-y += sem.h +unifdef-y += serial_core.h +unifdef-y += serial.h +unifdef-y += serio.h +unifdef-y += shm.h +unifdef-y += signal.h +unifdef-y += smb_fs.h +unifdef-y += smb.h +unifdef-y += smb_mount.h +unifdef-y += socket.h +unifdef-y += sonet.h +unifdef-y += sonypi.h +unifdef-y += soundcard.h +unifdef-y += stat.h +unifdef-y += sysctl.h +unifdef-y += tcp.h +unifdef-y += time.h +unifdef-y += timex.h +unifdef-y += tty.h +unifdef-y += types.h +unifdef-y += udf_fs_i.h +unifdef-y += udp.h +unifdef-y += uinput.h +unifdef-y += uio.h +unifdef-y += unistd.h +unifdef-y += usb_ch9.h +unifdef-y += usbdevice_fs.h +unifdef-y += user.h +unifdef-y += videodev2.h +unifdef-y += videodev.h +unifdef-y += wait.h +unifdef-y += wanrouter.h +unifdef-y += watchdog.h +unifdef-y += xfrm.h +unifdef-y += zftape.h -objhdr-y := version.h +objhdr-y += version.h diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild index 84a57d4fb212..56499ab9e32e 100644 --- a/include/linux/byteorder/Kbuild +++ b/include/linux/byteorder/Kbuild @@ -1,2 +1,7 @@ -unifdef-y += generic.h swabb.h swab.h -header-y += big_endian.h little_endian.h pdp_endian.h +header-y += big_endian.h +header-y += little_endian.h +header-y += pdp_endian.h + +unifdef-y += generic.h +unifdef-y += swabb.h +unifdef-y += swab.h diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild index 63973af72fd5..d97b3a51e227 100644 --- a/include/linux/dvb/Kbuild +++ b/include/linux/dvb/Kbuild @@ -1,2 +1,9 @@ -header-y += ca.h frontend.h net.h osd.h version.h -unifdef-y := audio.h dmx.h video.h +header-y += ca.h +header-y += frontend.h +header-y += net.h +header-y += osd.h +header-y += version.h + +unifdef-y += audio.h +unifdef-y += dmx.h +unifdef-y += video.h diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 1d3a14e2da6e..9a285cecf249 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -1,11 +1,38 @@ -header-y := nf_conntrack_sctp.h nf_conntrack_tuple_common.h \ - nfnetlink_conntrack.h nfnetlink_log.h nfnetlink_queue.h \ - xt_CLASSIFY.h xt_comment.h xt_connbytes.h xt_connmark.h \ - xt_CONNMARK.h xt_conntrack.h xt_dccp.h xt_esp.h \ - xt_helper.h xt_length.h xt_limit.h xt_mac.h xt_mark.h \ - xt_MARK.h xt_multiport.h xt_NFQUEUE.h xt_pkttype.h \ - xt_policy.h xt_realm.h xt_sctp.h xt_state.h xt_string.h \ - xt_tcpmss.h xt_tcpudp.h xt_SECMARK.h xt_CONNSECMARK.h +header-y += nf_conntrack_sctp.h +header-y += nf_conntrack_tuple_common.h +header-y += nfnetlink_conntrack.h +header-y += nfnetlink_log.h +header-y += nfnetlink_queue.h +header-y += xt_CLASSIFY.h +header-y += xt_comment.h +header-y += xt_connbytes.h +header-y += xt_connmark.h +header-y += xt_CONNMARK.h +header-y += xt_conntrack.h +header-y += xt_dccp.h +header-y += xt_esp.h +header-y += xt_helper.h +header-y += xt_length.h +header-y += xt_limit.h +header-y += xt_mac.h +header-y += xt_mark.h +header-y += xt_MARK.h +header-y += xt_multiport.h +header-y += xt_NFQUEUE.h +header-y += xt_pkttype.h +header-y += xt_policy.h +header-y += xt_realm.h +header-y += xt_sctp.h +header-y += xt_state.h +header-y += xt_string.h +header-y += xt_tcpmss.h +header-y += xt_tcpudp.h +header-y += xt_SECMARK.h +header-y += xt_CONNSECMARK.h -unifdef-y := nf_conntrack_common.h nf_conntrack_ftp.h \ - nf_conntrack_tcp.h nfnetlink.h x_tables.h xt_physdev.h +unifdef-y += nf_conntrack_common.h +unifdef-y += nf_conntrack_ftp.h +unifdef-y += nf_conntrack_tcp.h +unifdef-y += nfnetlink.h +unifdef-y += x_tables.h +unifdef-y += xt_physdev.h diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild index 198ec5e7b17d..4f13dfcb92ea 100644 --- a/include/linux/netfilter_arp/Kbuild +++ b/include/linux/netfilter_arp/Kbuild @@ -1,2 +1,3 @@ -header-y := arpt_mangle.h -unifdef-y := arp_tables.h +header-y += arpt_mangle.h + +unifdef-y += arp_tables.h diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild index 5b1aba6abbad..76ff4c47d8c4 100644 --- a/include/linux/netfilter_bridge/Kbuild +++ b/include/linux/netfilter_bridge/Kbuild @@ -1,4 +1,17 @@ -header-y += ebt_among.h ebt_arp.h ebt_arpreply.h ebt_ip.h ebt_limit.h \ - ebt_log.h ebt_mark_m.h ebt_mark_t.h ebt_nat.h ebt_pkttype.h \ - ebt_redirect.h ebt_stp.h ebt_ulog.h ebt_vlan.h -unifdef-y := ebtables.h ebt_802_3.h +header-y += ebt_among.h +header-y += ebt_arp.h +header-y += ebt_arpreply.h +header-y += ebt_ip.h +header-y += ebt_limit.h +header-y += ebt_log.h +header-y += ebt_mark_m.h +header-y += ebt_mark_t.h +header-y += ebt_nat.h +header-y += ebt_pkttype.h +header-y += ebt_redirect.h +header-y += ebt_stp.h +header-y += ebt_ulog.h +header-y += ebt_vlan.h + +unifdef-y += ebtables.h +unifdef-y += ebt_802_3.h diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 04e4d2721689..591c1a809c00 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,21 +1,63 @@ +header-y += ip_conntrack_helper.h +header-y += ip_conntrack_helper_h323_asn1.h +header-y += ip_conntrack_helper_h323_types.h +header-y += ip_conntrack_protocol.h +header-y += ip_conntrack_sctp.h +header-y += ip_conntrack_tcp.h +header-y += ip_conntrack_tftp.h +header-y += ip_nat_pptp.h +header-y += ipt_addrtype.h +header-y += ipt_ah.h +header-y += ipt_CLASSIFY.h +header-y += ipt_CLUSTERIP.h +header-y += ipt_comment.h +header-y += ipt_connbytes.h +header-y += ipt_connmark.h +header-y += ipt_CONNMARK.h +header-y += ipt_conntrack.h +header-y += ipt_dccp.h +header-y += ipt_dscp.h +header-y += ipt_DSCP.h +header-y += ipt_ecn.h +header-y += ipt_ECN.h +header-y += ipt_esp.h +header-y += ipt_hashlimit.h +header-y += ipt_helper.h +header-y += ipt_iprange.h +header-y += ipt_length.h +header-y += ipt_limit.h +header-y += ipt_LOG.h +header-y += ipt_mac.h +header-y += ipt_mark.h +header-y += ipt_MARK.h +header-y += ipt_multiport.h +header-y += ipt_NFQUEUE.h +header-y += ipt_owner.h +header-y += ipt_physdev.h +header-y += ipt_pkttype.h +header-y += ipt_policy.h +header-y += ipt_realm.h +header-y += ipt_recent.h +header-y += ipt_REJECT.h +header-y += ipt_SAME.h +header-y += ipt_sctp.h +header-y += ipt_state.h +header-y += ipt_string.h +header-y += ipt_tcpmss.h +header-y += ipt_TCPMSS.h +header-y += ipt_tos.h +header-y += ipt_TOS.h +header-y += ipt_ttl.h +header-y += ipt_TTL.h +header-y += ipt_ULOG.h -header-y := ip_conntrack_helper.h ip_conntrack_helper_h323_asn1.h \ - ip_conntrack_helper_h323_types.h ip_conntrack_protocol.h \ - ip_conntrack_sctp.h ip_conntrack_tcp.h ip_conntrack_tftp.h \ - ip_nat_pptp.h ipt_addrtype.h ipt_ah.h \ - ipt_CLASSIFY.h ipt_CLUSTERIP.h ipt_comment.h \ - ipt_connbytes.h ipt_connmark.h ipt_CONNMARK.h \ - ipt_conntrack.h ipt_dccp.h ipt_dscp.h ipt_DSCP.h ipt_ecn.h \ - ipt_ECN.h ipt_esp.h ipt_hashlimit.h ipt_helper.h \ - ipt_iprange.h ipt_length.h ipt_limit.h ipt_LOG.h ipt_mac.h \ - ipt_mark.h ipt_MARK.h ipt_multiport.h ipt_NFQUEUE.h \ - ipt_owner.h ipt_physdev.h ipt_pkttype.h ipt_policy.h \ - ipt_realm.h ipt_recent.h ipt_REJECT.h ipt_SAME.h \ - ipt_sctp.h ipt_state.h ipt_string.h ipt_tcpmss.h \ - ipt_TCPMSS.h ipt_tos.h ipt_TOS.h ipt_ttl.h ipt_TTL.h \ - ipt_ULOG.h - -unifdef-y := ip_conntrack.h ip_conntrack_h323.h ip_conntrack_irc.h \ - ip_conntrack_pptp.h ip_conntrack_proto_gre.h \ - ip_conntrack_tuple.h ip_nat.h ip_nat_rule.h ip_queue.h \ - ip_tables.h +unifdef-y += ip_conntrack.h +unifdef-y += ip_conntrack_h323.h +unifdef-y += ip_conntrack_irc.h +unifdef-y += ip_conntrack_pptp.h +unifdef-y += ip_conntrack_proto_gre.h +unifdef-y += ip_conntrack_tuple.h +unifdef-y += ip_nat.h +unifdef-y += ip_nat_rule.h +unifdef-y += ip_queue.h +unifdef-y += ip_tables.h diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index 913ddbf55b4b..9dd978d149ff 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -1,6 +1,21 @@ -header-y += ip6t_HL.h ip6t_LOG.h ip6t_MARK.h ip6t_REJECT.h ip6t_ah.h \ - ip6t_esp.h ip6t_frag.h ip6t_hl.h ip6t_ipv6header.h \ - ip6t_length.h ip6t_limit.h ip6t_mac.h ip6t_mark.h \ - ip6t_multiport.h ip6t_opts.h ip6t_owner.h ip6t_policy.h \ - ip6t_physdev.h ip6t_rt.h -unifdef-y := ip6_tables.h +header-y += ip6t_HL.h +header-y += ip6t_LOG.h +header-y += ip6t_MARK.h +header-y += ip6t_REJECT.h +header-y += ip6t_ah.h +header-y += ip6t_esp.h +header-y += ip6t_frag.h +header-y += ip6t_hl.h +header-y += ip6t_ipv6header.h +header-y += ip6t_length.h +header-y += ip6t_limit.h +header-y += ip6t_mac.h +header-y += ip6t_mark.h +header-y += ip6t_multiport.h +header-y += ip6t_opts.h +header-y += ip6t_owner.h +header-y += ip6t_policy.h +header-y += ip6t_physdev.h +header-y += ip6t_rt.h + +unifdef-y += ip6_tables.h diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index c8c545665885..d9c5455808e5 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild @@ -1,2 +1,7 @@ -unifdef-y := const.h export.h stats.h syscall.h nfsfh.h debug.h auth.h - +unifdef-y += const.h +unifdef-y += export.h +unifdef-y += stats.h +unifdef-y += syscall.h +unifdef-y += nfsfh.h +unifdef-y += debug.h +unifdef-y += auth.h diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild index 73fa27a8d552..2415a64c5e51 100644 --- a/include/linux/raid/Kbuild +++ b/include/linux/raid/Kbuild @@ -1 +1,2 @@ -header-y += md_p.h md_u.h +header-y += md_p.h +header-y += md_u.h diff --git a/include/linux/sunrpc/Kbuild b/include/linux/sunrpc/Kbuild index 0d1d768a27bf..fb438f158eee 100644 --- a/include/linux/sunrpc/Kbuild +++ b/include/linux/sunrpc/Kbuild @@ -1 +1 @@ -unifdef-y := debug.h +unifdef-y += debug.h diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 5251a505b2f1..78dfbac36375 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -1 +1,4 @@ -header-y += tc_gact.h tc_ipt.h tc_mirred.h tc_pedit.h +header-y += tc_gact.h +header-y += tc_ipt.h +header-y += tc_mirred.h +header-y += tc_pedit.h diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild index 381e93018df6..4a58a1c32a00 100644 --- a/include/linux/tc_ematch/Kbuild +++ b/include/linux/tc_ematch/Kbuild @@ -1 +1,4 @@ -headers-y := tc_em_cmp.h tc_em_meta.h tc_em_nbyte.h tc_em_text.h +header-y += tc_em_cmp.h +header-y += tc_em_meta.h +header-y += tc_em_nbyte.h +header-y += tc_em_text.h diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild index e1da2a5b2a57..13e7a3c6d794 100644 --- a/include/mtd/Kbuild +++ b/include/mtd/Kbuild @@ -1,2 +1,6 @@ -unifdef-y := mtd-abi.h -header-y := inftl-user.h jffs2-user.h mtd-user.h nftl-user.h +header-y += inftl-user.h +header-y += jffs2-user.h +header-y += mtd-user.h +header-y += nftl-user.h + +unifdef-y += mtd-abi.h diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild index eb710ba9b1a0..e7c043216558 100644 --- a/include/rdma/Kbuild +++ b/include/rdma/Kbuild @@ -1 +1 @@ -header-y := ib_user_mad.h +header-y += ib_user_mad.h diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild index 14a033d73314..744f85011f1e 100644 --- a/include/scsi/Kbuild +++ b/include/scsi/Kbuild @@ -1,2 +1,4 @@ header-y += scsi.h -unifdef-y := scsi_ioctl.h sg.h + +unifdef-y += scsi_ioctl.h +unifdef-y += sg.h diff --git a/include/sound/Kbuild b/include/sound/Kbuild index 3a5a3df61496..fd054a344324 100644 --- a/include/sound/Kbuild +++ b/include/sound/Kbuild @@ -1,2 +1,10 @@ -header-y := asound_fm.h hdsp.h hdspm.h sfnt_info.h sscape_ioctl.h -unifdef-y := asequencer.h asound.h emu10k1.h sb16_csp.h +header-y += asound_fm.h +header-y += hdsp.h +header-y += hdspm.h +header-y += sfnt_info.h +header-y += sscape_ioctl.h + +unifdef-y += asequencer.h +unifdef-y += asound.h +unifdef-y += emu10k1.h +unifdef-y += sb16_csp.h diff --git a/include/video/Kbuild b/include/video/Kbuild index 76a60737cc15..a14f9c045b8c 100644 --- a/include/video/Kbuild +++ b/include/video/Kbuild @@ -1 +1 @@ -unifdef-y := sisfb.h +unifdef-y += sisfb.h From 86998aa6534e839ec003ed2ef7067d6fe8696ccc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Sep 2006 11:14:34 +0200 Subject: [PATCH 0198/1063] [PATCH] genirq core: fix handle_level_irq() while porting the -rt tree to 2.6.18-rc7 i noticed the following screaming-IRQ scenario on an SMP system: 2274 0Dn.:1 0.001ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.010ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.020ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.029ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.039ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.048ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.058ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.068ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.077ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.087ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) 2274 0Dn.:1 0.097ms: do_IRQ+0xc/0x103 <= (ret_from_intr+0x0/0xf) as it turns out, the bug is caused by handle_level_irq(), which if it races with another CPU already handling this IRQ, it _unmasks_ the IRQ line on the way out. This is not how 2.6.17 works, and we introduced this bug in one of the early genirq cleanups right before it went into -mm. (the bug was not in the genirq patchset for a long time, and we didnt notice the bug due to the lack of -rt rebase to the new genirq code. -rt, and hardirq-preemption in particular opens up such races much wider than anything else.) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- kernel/irq/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 9336f2e89e40..ac1f850d4937 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -252,7 +252,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) mask_ack_irq(desc, irq); if (unlikely(desc->status & IRQ_INPROGRESS)) - goto out; + goto out_unlock; desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); kstat_cpu(cpu).irqs[irq]++; @@ -263,7 +263,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) action = desc->action; if (unlikely(!action || (desc->status & IRQ_DISABLED))) { desc->status |= IRQ_PENDING; - goto out; + goto out_unlock; } desc->status |= IRQ_INPROGRESS; @@ -276,9 +276,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; -out: if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(irq); +out_unlock: spin_unlock(&desc->lock); } From 271fc18eead0aa330758a3b9d8a57ffe315a1978 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Sep 2006 20:12:19 -0700 Subject: [PATCH 0199/1063] [PATCH] Add headers_check' target to output of 'make help' Signed-off-by: David Woodhouse Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index a086e3212af3..47e21c04014e 100644 --- a/Makefile +++ b/Makefile @@ -1082,6 +1082,7 @@ help: @echo 'Static analysers' @echo ' checkstack - Generate a list of stack hogs' @echo ' namespacecheck - Name space analysis on compiled kernel' + @echo ' headers_check - Sanity check on exported headers' @echo '' @echo 'Kernel packaging:' @$(MAKE) $(build)=$(package-dir) help From 020d8c063b4d850b53dfbaee843c8335b7a90d26 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Sep 2006 20:12:23 -0700 Subject: [PATCH 0200/1063] [PATCH] Fix 'make headers_check' on ia64 Fix 'make headers_check' on m68k Signed-off-by: Geert Uytterhoeven Cc: Roman Zippel Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m68k/page.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index db017f838c29..fcc165ddd09e 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -2,6 +2,8 @@ #define _M68K_PAGE_H +#ifdef __KERNEL__ + /* PAGE_SHIFT determines the page size */ #ifndef CONFIG_SUN3 #define PAGE_SHIFT (12) @@ -15,8 +17,6 @@ #endif #define PAGE_MASK (~(PAGE_SIZE-1)) -#ifdef __KERNEL__ - #include #if PAGE_SHIFT < 13 @@ -175,8 +175,8 @@ static inline void *__va(unsigned long x) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#endif /* __KERNEL__ */ - #include +#endif /* __KERNEL__ */ + #endif /* _M68K_PAGE_H */ From ac7fb273ca1d0b4fb354575bb6e101ffd09e0b54 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 18 Sep 2006 20:12:27 -0700 Subject: [PATCH 0201/1063] [PATCH] headers_check: Clean up asm-parisc/page.h for user headers Remove definitions of PAGE_* from the user view Delete unnecessary comments referring to the size of pages Only include if we're in __KERNEL__ Signed-off-by: David Woodhouse Signed-off-by: Matthew Wilcox Signed-off-by: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-parisc/page.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h index 0695bc958d56..57d6d82756dd 100644 --- a/include/asm-parisc/page.h +++ b/include/asm-parisc/page.h @@ -1,22 +1,14 @@ #ifndef _PARISC_PAGE_H #define _PARISC_PAGE_H -#if !defined(__KERNEL__) -/* this is for userspace applications (4k page size) */ -# define PAGE_SHIFT 12 /* 4k */ -# define PAGE_SIZE (1UL << PAGE_SHIFT) -# define PAGE_MASK (~(PAGE_SIZE-1)) -#endif - - #ifdef __KERNEL__ #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) -# define PAGE_SHIFT 12 /* 4k */ +# define PAGE_SHIFT 12 #elif defined(CONFIG_PARISC_PAGE_SIZE_16KB) -# define PAGE_SHIFT 14 /* 16k */ +# define PAGE_SHIFT 14 #elif defined(CONFIG_PARISC_PAGE_SIZE_64KB) -# define PAGE_SHIFT 16 /* 64k */ +# define PAGE_SHIFT 16 #else # error "unknown default kernel page size" #endif @@ -188,9 +180,9 @@ extern int npmem_ranges; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#endif /* __KERNEL__ */ - #include #include +#endif /* __KERNEL__ */ + #endif /* _PARISC_PAGE_H */ From 833f73299fdf4497af1552e219e95661f4d2cdca Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 18 Sep 2006 20:12:33 -0700 Subject: [PATCH 0202/1063] [PATCH] EXT2: Remove superblock lock contention in ext2_statfs Fix a performance degradation introduced in 2.6.17. (30% degradation running dbench with 16 threads) Commit 21730eed11de42f22afcbd43f450a1872a0b5ea1, which claims to make EXT2_DEBUG work again, moves the taking of the kernel lock out of debug-only code in ext2_count_free_inodes and ext2_count_free_blocks and into ext2_statfs. The same problem was fixed in ext3 by removing the lock completely (commit 5b11687924e40790deb0d5f959247ade82196665) Signed-off-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/balloc.c | 1 - fs/ext2/ialloc.c | 1 - fs/ext2/super.c | 2 -- 3 files changed, 4 deletions(-) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index d4870432ecfc..b1981d0e95ad 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) #endif /* EXT2FS_DEBUG */ -/* Superblock must be locked */ unsigned long ext2_count_free_blocks (struct super_block * sb) { struct ext2_group_desc * desc; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index de85c61c58c5..695f69ccf908 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -637,7 +637,6 @@ fail: return ERR_PTR(err); } -/* Superblock must be locked */ unsigned long ext2_count_free_inodes (struct super_block * sb) { struct ext2_group_desc *desc; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index ca5bfb6914d2..4286ff6330b6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1083,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) unsigned long overhead; int i; - lock_super(sb); if (test_opt (sb, MINIX_DF)) overhead = 0; else { @@ -1124,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); buf->f_ffree = ext2_count_free_inodes (sb); buf->f_namelen = EXT2_NAME_LEN; - unlock_super(sb); return 0; } From 79e453d49bd49ba1b576f89310cc565c9e4ca379 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Sep 2006 08:15:22 -0700 Subject: [PATCH 0203/1063] Revert mmiocfg heuristics and blacklist changes This reverts commits 11012d419cfc0e0f78ca356aca03674217910124 and 40dd2d20f220eda1cd0da8ea3f0f9db8971ba237, which allowed us to use the MMIO accesses for PCI config cycles even without the area being marked reserved in the e820 memory tables. Those changes were needed for EFI-environment Intel macs, but broke some newer Intel 965 boards, so for now it's better to revert to our old 2.6.17 behaviour and at least avoid introducing any new breakage. Andi Kleen has a set of patches that work with both EFI and the broken Intel 965 boards, which will be applied once they get wider testing. Cc: Arjan van de Ven Cc: Edgar Hucek Cc: Andi Kleen Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 2 -- arch/i386/kernel/setup.c | 32 +++++++++++++++++++++++++++ arch/i386/pci/common.c | 5 ----- arch/i386/pci/mmconfig.c | 34 +++++++++-------------------- arch/i386/pci/pci.h | 3 +-- arch/x86_64/kernel/e820.c | 29 ++++++++++++++++++++++++ arch/x86_64/pci/mmconfig.c | 34 +++++++++-------------------- 7 files changed, 82 insertions(+), 57 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 87a17337c7f6..71d05f481727 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1189,8 +1189,6 @@ running once the system is up. Mechanism 2. nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI Configuration - mmconf [IA-32,X86_64] Force MMCONFIG. This is useful - to override the builtin blacklist. nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 345ffb7d904d..f1682206d304 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -956,6 +956,38 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) return 0; } + /* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find the highest page frame number we have available */ diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 1220dd828ce3..0a362e3aeac5 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -237,11 +237,6 @@ char * __devinit pcibios_setup(char *str) pci_probe &= ~PCI_PROBE_MMCONF; return NULL; } - /* override DMI blacklist */ - else if (!strcmp(str, "mmconf")) { - pci_probe |= PCI_PROBE_MMCONF_FORCE; - return NULL; - } #endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index ef5a2faa7d82..972180f738d9 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "pci.h" @@ -188,31 +187,9 @@ static __init void unreachable_devices(void) } } -static int disable_mcfg(struct dmi_system_id *d) -{ - printk("PCI: %s detected. Disabling MCFG.\n", d->ident); - pci_probe &= ~PCI_PROBE_MMCONF; - return 0; -} - -static struct dmi_system_id __initdata dmi_bad_mcfg[] = { - /* Has broken MCFG table that makes the system hang when used */ - { - .callback = disable_mcfg, - .ident = "Intel D3C5105 SDV", - .matches = { - DMI_MATCH(DMI_BIOS_VENDOR, "Intel"), - DMI_MATCH(DMI_BOARD_NAME, "D26928"), - }, - }, - {} -}; - void __init pci_mmcfg_init(void) { - dmi_check_system(dmi_bad_mcfg); - - if ((pci_probe & (PCI_PROBE_MMCONF_FORCE|PCI_PROBE_MMCONF)) == 0) + if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); @@ -221,6 +198,15 @@ void __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return; + if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", + pci_mmcfg_config[0].base_address); + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + return; + } + printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index 49a849b3a241..bf4e79335388 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -16,8 +16,7 @@ #define PCI_PROBE_CONF1 0x0002 #define PCI_PROBE_CONF2 0x0004 #define PCI_PROBE_MMCONF 0x0008 -#define PCI_PROBE_MMCONF_FORCE 0x0010 -#define PCI_PROBE_MASK 0x00ff +#define PCI_PROBE_MASK 0x000f #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 764bf23c7103..d6d7f731f6f0 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -108,6 +108,35 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type) return 0; } +/* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +{ + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find a free area in a specific range. */ diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 2d48a7941d48..3c55c76c6fd5 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "pci.h" @@ -165,33 +164,11 @@ static __init void unreachable_devices(void) } } -static int disable_mcfg(struct dmi_system_id *d) -{ - printk("PCI: %s detected. Disabling MCFG.\n", d->ident); - pci_probe &= ~PCI_PROBE_MMCONF; - return 0; -} - -static struct dmi_system_id __initdata dmi_bad_mcfg[] = { - /* Has broken MCFG table that makes the system hang when used */ - { - .callback = disable_mcfg, - .ident = "Intel D3C5105 SDV", - .matches = { - DMI_MATCH(DMI_BIOS_VENDOR, "Intel"), - DMI_MATCH(DMI_BOARD_NAME, "D26928"), - }, - }, - {} -}; - void __init pci_mmcfg_init(void) { int i; - dmi_check_system(dmi_bad_mcfg); - - if ((pci_probe & (PCI_PROBE_MMCONF|PCI_PROBE_MMCONF_FORCE)) == 0) + if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); @@ -200,6 +177,15 @@ void __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return; + if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", + pci_mmcfg_config[0].base_address); + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + return; + } + /* RED-PEN i386 doesn't do _nocache right now */ pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { From 7a52411107e1ac8f5be6967936ec237f40a1c7e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Sep 2006 16:03:45 -0400 Subject: [PATCH 0204/1063] NFS: Fix Oopsable condition in nfs_readpage_sync() Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7a9ee00e0c61..f0aff824a291 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -204,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); - nfs_readpage_truncate_uninitialised_page(rdata); - if (rdata->res.eof || rdata->res.count == rdata->args.count) + if (rdata->res.eof || rdata->res.count == rdata->args.count) { SetPageUptodate(page); + if (rdata->res.eof && count != 0) + memclear_highpage_flush(page, rdata->args.pgbase, count); + } result = 0; io_error: From 76723de0cf5b186afe2f329eeef304c321d52bf8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Sep 2006 08:11:51 -0400 Subject: [PATCH 0205/1063] NFSv4: Fix incorrect semaphore release in _nfs4_do_open() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 153898e1331f..b14145b7b87f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -970,7 +970,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st status = -ENOMEM; opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); if (opendata == NULL) - goto err_put_state_owner; + goto err_release_rwsem; status = _nfs4_proc_open(opendata); if (status != 0) @@ -989,11 +989,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st return 0; err_opendata_free: nfs4_opendata_free(opendata); +err_release_rwsem: + up_read(&clp->cl_sem); err_put_state_owner: nfs4_put_state_owner(sp); out_err: - /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ - up_read(&clp->cl_sem); *res = NULL; return status; } From 5c2d97cb31fb77981797fec46230ca005b865799 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Sep 2006 23:20:35 -0400 Subject: [PATCH 0206/1063] NFS: Fix nfs_page use after free issues in fs/nfs/write.c Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8ab3cf10d792..7084ac9a6455 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -590,8 +590,8 @@ static void nfs_cancel_commit_list(struct list_head *head) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_inode_remove_request(req); - nfs_clear_page_writeback(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + nfs_clear_page_writeback(req); } } @@ -1386,8 +1386,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_clear_page_writeback(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + nfs_clear_page_writeback(req); } return -ENOMEM; } From 7f81dc0097095f19d25e14c043edfdebb9e01295 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Tue, 19 Sep 2006 12:59:11 -0700 Subject: [PATCH 0207/1063] [ATM]: [he] don't hold the device lock when upcalling This can create a deadlock/lock ordering problem with other layers that want to use the transmit (or other) path of the card at that time. Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- drivers/atm/he.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index dd96123a2b7f..ffcb9fd31c38 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1928,7 +1928,9 @@ he_service_rbrq(struct he_dev *he_dev, int group) #ifdef notdef ATM_SKB(skb)->vcc = vcc; #endif + spin_unlock(&he_dev->global_lock); vcc->push(vcc, skb); + spin_lock(&he_dev->global_lock); atomic_inc(&vcc->stats->rx); From b22b9004f22ba8ca33d15059e8b710a4b71ec3cc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 19 Sep 2006 13:00:57 -0700 Subject: [PATCH 0208/1063] [NETFILTER]: xt_quota: add missing module aliases Add missing aliases for ipt_quota and ip6t_quota to make autoload work. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_quota.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 4cdba7469dc4..be8d3c26b568 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,6 +11,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston "); +MODULE_ALIAS("ipt_quota"); +MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); From 888454c57a45511808d3fa52597b3d765df034a6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2006 13:42:46 -0700 Subject: [PATCH 0209/1063] [IPV4] fib_trie: missing ntohl() when calling fib_semantic_match() fib_trie.c::check_leaf() passes host-endian where fib_semantic_match() expects (and stores into) net-endian. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 23fb9d9768e3..01801c0f885d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1281,18 +1281,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, struct fib_result *res) { int err, i; - t_key mask; + __be32 mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; struct hlist_node *node; hlist_for_each_entry_rcu(li, node, hhead, hlist) { i = li->plen; - mask = ntohl(inet_make_mask(i)); - if (l->key != (key & mask)) + mask = inet_make_mask(i); + if (l->key != (key & ntohl(mask))) continue; - if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { + if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) { *plen = i; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; From e478bec0ba0a83a48a0f6982934b6de079e7e6b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Sep 2006 20:42:06 -0700 Subject: [PATCH 0210/1063] Linux v2.6.18. Arrr! Ahoy, all land-lubbers, test me out right smartly! Signed-off-by: Linus Torvalds --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 47e21c04014e..edfc2fdf76c9 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 18 -EXTRAVERSION = -rc7 -NAME=Crazed Snow-Weasel +EXTRAVERSION = +NAME=Avast! A bilge rat! # *DOCUMENTATION* # To see a list of typical targets execute "make help" From 4f896e53eea70013fa48d0d8662680cf8aae8a43 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 24 Aug 2006 13:29:33 +1000 Subject: [PATCH 0211/1063] [POWERPC] make spinlocks work in a combined kernel If we build a pSeries/iSeries combined kernel, we will need this. Signed-off-by: Stephen Rothwell --- arch/powerpc/lib/locks.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 077bed7dc52b..80b482ca30df 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -23,6 +23,7 @@ #include #include #include +#include void __spin_yield(raw_spinlock_t *lock) { @@ -39,13 +40,12 @@ void __spin_yield(raw_spinlock_t *lock) rmb(); if (lock->slock != lock_value) return; /* something has changed */ -#ifdef CONFIG_PPC_ISERIES - HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, - ((u64)holder_cpu << 32) | yield_count); -#else - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), - yield_count); -#endif + if (firmware_has_feature(FW_FEATURE_ISERIES)) + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); + else + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); } /* @@ -69,13 +69,12 @@ void __rw_yield(raw_rwlock_t *rw) rmb(); if (rw->lock != lock_value) return; /* something has changed */ -#ifdef CONFIG_PPC_ISERIES - HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, - ((u64)holder_cpu << 32) | yield_count); -#else - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), - yield_count); -#endif + if (firmware_has_feature(FW_FEATURE_ISERIES)) + HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc, + ((u64)holder_cpu << 32) | yield_count); + else + plpar_hcall_norets(H_CONFER, + get_hard_smp_processor_id(holder_cpu), yield_count); } #endif From 9ca91e0fb5295e8317030feb889085e452cedab1 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 14 Sep 2006 16:59:31 +1000 Subject: [PATCH 0212/1063] [POWERPC] silence a warning Left over from the constifying of get_property. Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/powermac/cpufreq_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c index 167cd3ce8a13..d30466d74194 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ b/arch/powerpc/platforms/powermac/cpufreq_64.c @@ -89,7 +89,7 @@ static DEFINE_MUTEX(g5_switch_mutex); #ifdef CONFIG_PMAC_SMU -static u32 *g5_pmode_data; +static const u32 *g5_pmode_data; static int g5_pmode_max; static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ From fa053d2f008cb73fa768b8e171486d8c0b33312b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 19 Sep 2006 14:51:40 +1000 Subject: [PATCH 0213/1063] [POWERPC] remove unused io accessors The io accessors insw_ns, outsw_ns, insl_ns and outsl_ns are unused (except for one unnecessary use in drivers/net/3c509.c that is addressed in a previous patch) and are only defined in powerpc/ppc, so remove them. Signed-off-by: Stephen Rothwell --- arch/ppc/syslib/m8260_pci_erratum9.c | 16 ---------------- include/asm-powerpc/io.h | 13 +------------ include/asm-ppc/io.h | 10 ---------- include/asm-ppc/mpc8260_pci9.h | 4 ---- 4 files changed, 1 insertion(+), 42 deletions(-) diff --git a/arch/ppc/syslib/m8260_pci_erratum9.c b/arch/ppc/syslib/m8260_pci_erratum9.c index 974581ea4849..5475709ce07b 100644 --- a/arch/ppc/syslib/m8260_pci_erratum9.c +++ b/arch/ppc/syslib/m8260_pci_erratum9.c @@ -339,20 +339,6 @@ void insl(unsigned port, void *buf, int nl) idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0); } -void insw_ns(unsigned port, void *buf, int ns) -{ - u8 *addr = (u8 *)(port + _IO_BASE); - - idma_pci9_read((u8 *)buf, (u8 *)addr, ns*sizeof(u16), sizeof(u16), 0); -} - -void insl_ns(unsigned port, void *buf, int nl) -{ - u8 *addr = (u8 *)(port + _IO_BASE); - - idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0); -} - void *memcpy_fromio(void *dest, unsigned long src, size_t count) { unsigned long pa = iopa((unsigned long) src); @@ -373,8 +359,6 @@ EXPORT_SYMBOL(inl); EXPORT_SYMBOL(insb); EXPORT_SYMBOL(insw); EXPORT_SYMBOL(insl); -EXPORT_SYMBOL(insw_ns); -EXPORT_SYMBOL(insl_ns); EXPORT_SYMBOL(memcpy_fromio); #endif /* ifdef CONFIG_8260_PCI9 */ diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 212428db0d8b..9aaced542624 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -76,8 +76,7 @@ extern unsigned long pci_io_base; #define insb(port, buf, ns) _insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) #define insw(port, buf, ns) _insw_ns((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) #define insl(port, buf, nl) _insl_ns((u8 __iomem *)((port)+pci_io_base), (buf), (nl)) -#define insw_ns(port, buf, ns) _insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define insl_ns(port, buf, nl) _insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) + #else static inline unsigned char __raw_readb(const volatile void __iomem *addr) @@ -138,8 +137,6 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) #define insb(port, buf, ns) eeh_insb((port), (buf), (ns)) #define insw(port, buf, ns) eeh_insw_ns((port), (buf), (ns)) #define insl(port, buf, nl) eeh_insl_ns((port), (buf), (nl)) -#define insw_ns(port, buf, ns) eeh_insw_ns((port), (buf), (ns)) -#define insl_ns(port, buf, nl) eeh_insl_ns((port), (buf), (nl)) #endif @@ -180,14 +177,6 @@ static inline void mmiowb(void) #define inl_p(port) inl(port) #define outl_p(val, port) (udelay(1), outl((val), (port))) -/* - * The *_ns versions below don't do byte-swapping. - * Neither do the standard versions now, these are just here - * for older code. - */ -#define outsw_ns(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define outsl_ns(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) - #define IO_SPACE_LIMIT ~(0UL) diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index 680555be22ec..fb0a8fcc51cf 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -338,16 +338,6 @@ extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); -/* - * The *_ns versions below don't do byte-swapping. - * Neither do the standard versions now, these are just here - * for older code. - */ -#define insw_ns(port, buf, ns) _insw_ns((port)+___IO_BASE, (buf), (ns)) -#define outsw_ns(port, buf, ns) _outsw_ns((port)+___IO_BASE, (buf), (ns)) -#define insl_ns(port, buf, nl) _insl_ns((port)+___IO_BASE, (buf), (nl)) -#define outsl_ns(port, buf, nl) _outsl_ns((port)+___IO_BASE, (buf), (nl)) - #define IO_SPACE_LIMIT ~0 diff --git a/include/asm-ppc/mpc8260_pci9.h b/include/asm-ppc/mpc8260_pci9.h index 26b3f6e787bc..9f7176881c56 100644 --- a/include/asm-ppc/mpc8260_pci9.h +++ b/include/asm-ppc/mpc8260_pci9.h @@ -30,8 +30,6 @@ #undef inb #undef inw #undef inl -#undef insw_ns -#undef insl_ns #undef memcpy_fromio extern int readb(volatile unsigned char *addr); @@ -43,8 +41,6 @@ extern void insl(unsigned port, void *buf, int nl); extern int inb(unsigned port); extern int inw(unsigned port); extern unsigned inl(unsigned port); -extern void insw_ns(unsigned port, void *buf, int ns); -extern void insl_ns(unsigned port, void *buf, int nl); extern void *memcpy_fromio(void *dest, unsigned long src, size_t count); #endif /* !__CONFIG_8260_PCI9_DEFS */ From 661f1cdb8b3e3c2c44e97df122c1d5643c054ce8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 19 Sep 2006 16:52:55 +1000 Subject: [PATCH 0214/1063] [POWERPC] remove unused asm routines _insw, _outsw, _insl amd _outsl are all unused, so remove them. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/misc.S | 52 -------------------- arch/powerpc/kernel/ppc_ksyms.c | 4 -- arch/ppc/kernel/misc.S | 84 --------------------------------- arch/ppc/kernel/ppc_ksyms.c | 4 -- include/asm-powerpc/io.h | 4 -- include/asm-ppc/io.h | 4 -- 6 files changed, 152 deletions(-) diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index f770805f1215..dd5f8e429196 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -86,58 +86,6 @@ _GLOBAL(_outsb) sync blr -_GLOBAL(_insw) - sync - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhbrx r5,0,r3 - eieio - sthu r5,2(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsw) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- - sync -00: lhzu r5,2(r4) - sthbrx r5,0,r3 - bdnz 00b - sync - blr - -_GLOBAL(_insl) - sync - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwbrx r5,0,r3 - eieio - stwu r5,4(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsl) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- - sync -00: lwzu r5,4(r4) - stwbrx r5,0,r3 - bdnz 00b - sync - blr - #ifdef CONFIG_PPC32 _GLOBAL(__ide_mm_insw) #endif diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index b2edac8ddf0a..314d6114e6ec 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -104,10 +104,6 @@ EXPORT_SYMBOL(__ide_mm_outsl); EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); -EXPORT_SYMBOL(_insw); -EXPORT_SYMBOL(_outsw); -EXPORT_SYMBOL(_insl); -EXPORT_SYMBOL(_outsl); EXPORT_SYMBOL(_insw_ns); EXPORT_SYMBOL(_outsw_ns); EXPORT_SYMBOL(_insl_ns); diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 2fa0075f2b5f..44700bbfe7bc 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -768,90 +768,6 @@ _GLOBAL(_outsb) bdnz 00b blr -_GLOBAL(_insw) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhbrx r5,0,r3 -01: eieio -02: sthu r5,2(r4) - ISYNC_8xx - .section .fixup,"ax" -03: blr - .text - .section __ex_table, "a" - .align 2 - .long 00b, 03b - .long 01b, 03b - .long 02b, 03b - .text - bdnz 00b - blr - -_GLOBAL(_outsw) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhzu r5,2(r4) -01: eieio -02: sthbrx r5,0,r3 - ISYNC_8xx - .section .fixup,"ax" -03: blr - .text - .section __ex_table, "a" - .align 2 - .long 00b, 03b - .long 01b, 03b - .long 02b, 03b - .text - bdnz 00b - blr - -_GLOBAL(_insl) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwbrx r5,0,r3 -01: eieio -02: stwu r5,4(r4) - ISYNC_8xx - .section .fixup,"ax" -03: blr - .text - .section __ex_table, "a" - .align 2 - .long 00b, 03b - .long 01b, 03b - .long 02b, 03b - .text - bdnz 00b - blr - -_GLOBAL(_outsl) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwzu r5,4(r4) -01: stwbrx r5,0,r3 -02: eieio - ISYNC_8xx - .section .fixup,"ax" -03: blr - .text - .section __ex_table, "a" - .align 2 - .long 00b, 03b - .long 01b, 03b - .long 02b, 03b - .text - bdnz 00b - blr - _GLOBAL(__ide_mm_insw) _GLOBAL(_insw_ns) cmpwi 0,r5,0 diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index d1735401384c..2bd1f7353f56 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -122,10 +122,6 @@ EXPORT_SYMBOL(__ide_mm_outsl); EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); -EXPORT_SYMBOL(_insw); -EXPORT_SYMBOL(_outsw); -EXPORT_SYMBOL(_insl); -EXPORT_SYMBOL(_outsl); EXPORT_SYMBOL(_insw_ns); EXPORT_SYMBOL(_outsw_ns); EXPORT_SYMBOL(_insl_ns); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 9aaced542624..0ee48436b1e3 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -151,10 +151,6 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) extern void _insb(volatile u8 __iomem *port, void *buf, int ns); extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns); -extern void _insw(volatile u16 __iomem *port, void *buf, int ns); -extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns); -extern void _insl(volatile u32 __iomem *port, void *buf, int nl); -extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl); extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns); extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index fb0a8fcc51cf..9fac420f1648 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -329,10 +329,6 @@ __do_out_asm(outl, "stwbrx") extern void _insb(volatile u8 __iomem *port, void *buf, int ns); extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns); -extern void _insw(volatile u16 __iomem *port, void *buf, int ns); -extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns); -extern void _insl(volatile u32 __iomem *port, void *buf, int nl); -extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl); extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns); extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); From 73ea9e1bcb8eea4f3b2052fe7ccd7ee4b5a271a0 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 19 Sep 2006 17:30:20 +1000 Subject: [PATCH 0215/1063] [POWERPC] clean up ide io accessors Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/misc.S | 12 ------------ arch/powerpc/kernel/ppc_ksyms.c | 7 ------- arch/ppc/kernel/misc.S | 4 ---- arch/ppc/kernel/ppc_ksyms.c | 5 ----- include/asm-powerpc/ide.h | 12 ++++++------ include/asm-powerpc/io.h | 6 ------ 6 files changed, 6 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index dd5f8e429196..6feb391422ec 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -86,9 +86,6 @@ _GLOBAL(_outsb) sync blr -#ifdef CONFIG_PPC32 -_GLOBAL(__ide_mm_insw) -#endif _GLOBAL(_insw_ns) sync cmpwi 0,r5,0 @@ -103,9 +100,6 @@ _GLOBAL(_insw_ns) isync blr -#ifdef CONFIG_PPC32 -_GLOBAL(__ide_mm_outsw) -#endif _GLOBAL(_outsw_ns) cmpwi 0,r5,0 mtctr r5 @@ -118,9 +112,6 @@ _GLOBAL(_outsw_ns) sync blr -#ifdef CONFIG_PPC32 -_GLOBAL(__ide_mm_insl) -#endif _GLOBAL(_insl_ns) sync cmpwi 0,r5,0 @@ -135,9 +126,6 @@ _GLOBAL(_insl_ns) isync blr -#ifdef CONFIG_PPC32 -_GLOBAL(__ide_mm_outsl) -#endif _GLOBAL(_outsl_ns) cmpwi 0,r5,0 mtctr r5 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 314d6114e6ec..75429e580518 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -95,13 +95,6 @@ EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(copy_4K_page); #endif -#ifndef __powerpc64__ -EXPORT_SYMBOL(__ide_mm_insl); -EXPORT_SYMBOL(__ide_mm_outsw); -EXPORT_SYMBOL(__ide_mm_insw); -EXPORT_SYMBOL(__ide_mm_outsl); -#endif - EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); EXPORT_SYMBOL(_insw_ns); diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 44700bbfe7bc..50b4bbd06804 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -768,7 +768,6 @@ _GLOBAL(_outsb) bdnz 00b blr -_GLOBAL(__ide_mm_insw) _GLOBAL(_insw_ns) cmpwi 0,r5,0 mtctr r5 @@ -790,7 +789,6 @@ _GLOBAL(_insw_ns) bdnz 00b blr -_GLOBAL(__ide_mm_outsw) _GLOBAL(_outsw_ns) cmpwi 0,r5,0 mtctr r5 @@ -812,7 +810,6 @@ _GLOBAL(_outsw_ns) bdnz 00b blr -_GLOBAL(__ide_mm_insl) _GLOBAL(_insl_ns) cmpwi 0,r5,0 mtctr r5 @@ -834,7 +831,6 @@ _GLOBAL(_insl_ns) bdnz 00b blr -_GLOBAL(__ide_mm_outsl) _GLOBAL(_outsl_ns) cmpwi 0,r5,0 mtctr r5 diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 2bd1f7353f56..c8b65ca8a350 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -115,11 +115,6 @@ EXPORT_SYMBOL(outw); EXPORT_SYMBOL(outl); EXPORT_SYMBOL(outsl);*/ -EXPORT_SYMBOL(__ide_mm_insl); -EXPORT_SYMBOL(__ide_mm_outsw); -EXPORT_SYMBOL(__ide_mm_insw); -EXPORT_SYMBOL(__ide_mm_outsl); - EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); EXPORT_SYMBOL(_insw_ns); diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h index b09b42af6a1e..c8390f9485de 100644 --- a/include/asm-powerpc/ide.h +++ b/include/asm-powerpc/ide.h @@ -12,6 +12,7 @@ #include #include #endif +#include #ifndef MAX_HWIFS #ifdef __powerpc64__ @@ -21,15 +22,14 @@ #endif #endif +#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c)) +#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c)) +#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c)) +#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c)) + #ifndef __powerpc64__ #include #include -#include - -extern void __ide_mm_insw(void __iomem *port, void *addr, u32 count); -extern void __ide_mm_outsw(void __iomem *port, void *addr, u32 count); -extern void __ide_mm_insl(void __iomem *port, void *addr, u32 count); -extern void __ide_mm_outsl(void __iomem *port, void *addr, u32 count); struct ide_machdep_calls { int (*default_irq)(unsigned long base); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 0ee48436b1e3..51a598747367 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -28,12 +28,6 @@ extern int check_legacy_ioport(unsigned long base_port); #include -#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c)) -#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c)) -#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c)) -#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c)) - - #define SIO_CONFIG_RA 0x398 #define SIO_CONFIG_RD 0x399 From 5adcaf50cf697aa4d0c731107003c1383b59b214 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 19 Sep 2006 22:17:49 +1000 Subject: [PATCH 0216/1063] [POWERPC] convert string i/o operations to C This produces essentially the same code and will make the iSeries i/o consolidation easier. The count parameter is changed to long since that will produce the same (better) code on 32 and 64 bit builds. Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/io.c | 117 ++++++++++++++++++++++++++++++++ arch/powerpc/kernel/misc.S | 95 -------------------------- arch/powerpc/kernel/ppc_ksyms.c | 7 -- include/asm-powerpc/io.h | 12 ++-- include/asm-ppc/io.h | 12 ++-- 6 files changed, 130 insertions(+), 115 deletions(-) create mode 100644 arch/powerpc/kernel/io.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 8b3f4faf5768..8b133afbdc20 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -51,7 +51,7 @@ extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds obj-y += time.o prom.o traps.o setup-common.o \ - udbg.o misc.o + udbg.o misc.o io.o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c new file mode 100644 index 000000000000..80a3209acef4 --- /dev/null +++ b/arch/powerpc/kernel/io.c @@ -0,0 +1,117 @@ +/* + * I/O string operations + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright (C) 2006 IBM Corporation + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) + * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) + * + * Rewritten in C by Stephen Rothwell. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#include + +void _insb(volatile u8 __iomem *port, void *buf, long count) +{ + u8 *tbuf = buf; + u8 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + asm volatile("eieio"); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insb); + +void _outsb(volatile u8 __iomem *port, const void *buf, long count) +{ + const u8 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsb); + +void _insw_ns(volatile u16 __iomem *port, void *buf, long count) +{ + u16 *tbuf = buf; + u16 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + asm volatile("eieio"); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insw_ns); + +void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) +{ + const u16 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsw_ns); + +void _insl_ns(volatile u32 __iomem *port, void *buf, long count) +{ + u32 *tbuf = buf; + u32 tmp; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + tmp = *port; + asm volatile("eieio"); + *tbuf++ = tmp; + } while (--count != 0); + asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); +} +EXPORT_SYMBOL(_insl_ns); + +void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) +{ + const u32 *tbuf = buf; + + if (unlikely(count <= 0)) + return; + asm volatile("sync"); + do { + *port = *tbuf++; + } while (--count != 0); + asm volatile("sync"); +} +EXPORT_SYMBOL(_outsl_ns); diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 6feb391422ec..330c9dc7db86 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -43,98 +43,3 @@ _GLOBAL(add_reloc_offset) add r3,r3,r5 mtlr r0 blr - -/* - * I/O string operations - * - * insb(port, buf, len) - * outsb(port, buf, len) - * insw(port, buf, len) - * outsw(port, buf, len) - * insl(port, buf, len) - * outsl(port, buf, len) - * insw_ns(port, buf, len) - * outsw_ns(port, buf, len) - * insl_ns(port, buf, len) - * outsl_ns(port, buf, len) - * - * The *_ns versions don't do byte-swapping. - */ -_GLOBAL(_insb) - sync - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,1 - blelr- -00: lbz r5,0(r3) - eieio - stbu r5,1(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsb) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,1 - blelr- - sync -00: lbzu r5,1(r4) - stb r5,0(r3) - bdnz 00b - sync - blr - -_GLOBAL(_insw_ns) - sync - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhz r5,0(r3) - eieio - sthu r5,2(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsw_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- - sync -00: lhzu r5,2(r4) - sth r5,0(r3) - bdnz 00b - sync - blr - -_GLOBAL(_insl_ns) - sync - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwz r5,0(r3) - eieio - stwu r5,4(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsl_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- - sync -00: lwzu r5,4(r4) - stw r5,0(r3) - bdnz 00b - sync - blr - diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 75429e580518..807193a3c784 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -95,13 +95,6 @@ EXPORT_SYMBOL(__strnlen_user); EXPORT_SYMBOL(copy_4K_page); #endif -EXPORT_SYMBOL(_insb); -EXPORT_SYMBOL(_outsb); -EXPORT_SYMBOL(_insw_ns); -EXPORT_SYMBOL(_outsw_ns); -EXPORT_SYMBOL(_insl_ns); -EXPORT_SYMBOL(_outsl_ns); - #if defined(CONFIG_PPC32) && (defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)) EXPORT_SYMBOL(ppc_ide_md); #endif diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 51a598747367..57e7d14d6563 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -143,12 +143,12 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) #define readl_relaxed(addr) readl(addr) #define readq_relaxed(addr) readq(addr) -extern void _insb(volatile u8 __iomem *port, void *buf, int ns); -extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns); -extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns); -extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); -extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); -extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); +extern void _insb(volatile u8 __iomem *port, void *buf, long count); +extern void _outsb(volatile u8 __iomem *port, const void *buf, long count); +extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count); +extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count); +extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count); +extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count); static inline void mmiowb(void) { diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index 9fac420f1648..3d9a9e6f3321 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -327,12 +327,12 @@ __do_out_asm(outl, "stwbrx") #define inl_p(port) inl((port)) #define outl_p(val, port) outl((val), (port)) -extern void _insb(volatile u8 __iomem *port, void *buf, int ns); -extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns); -extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns); -extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); -extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); -extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); +extern void _insb(volatile u8 __iomem *port, void *buf, long count); +extern void _outsb(volatile u8 __iomem *port, const void *buf, long count); +extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count); +extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count); +extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count); +extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count); #define IO_SPACE_LIMIT ~0 From 19e59df4dc2e6f7b46190ee77ce7093769f597a7 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 14 Sep 2006 14:55:36 +1000 Subject: [PATCH 0217/1063] [POWERPC] iseries: eliminate a couple of warnings Copy and paste bug in io.h Signed-off-by: Stephen Rothwell --- include/asm-powerpc/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 57e7d14d6563..174fb89d5eda 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -68,8 +68,8 @@ extern unsigned long pci_io_base; * for older code. */ #define insb(port, buf, ns) _insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define insw(port, buf, ns) _insw_ns((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define insl(port, buf, nl) _insl_ns((u8 __iomem *)((port)+pci_io_base), (buf), (nl)) +#define insw(port, buf, ns) _insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) +#define insl(port, buf, nl) _insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) #else From a4dc7ff08915a2035aa6d6decc53fa1deaa410bb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 19 Sep 2006 14:06:27 +1000 Subject: [PATCH 0218/1063] [POWERPC] Define of_read_ulong helper There are various places where we want to extract an unsigned long value from a device-tree property that can be 1 or 2 cells in length. This replaces some open-coded calculations, and one place where we assumed without checking that properties were the length we wanted, with a little of_read_ulong() helper. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/prom.c | 19 ++----------------- arch/powerpc/kernel/setup-common.c | 13 ++++++++----- arch/powerpc/kernel/time.c | 4 +--- include/asm-powerpc/prom.h | 12 +++++++++++- 4 files changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index bf2005b2feb6..eb913f80bfb1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -757,24 +757,9 @@ static int __init early_init_dt_scan_root(unsigned long node, static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) { cell_t *p = *cellp; - unsigned long r; - /* Ignore more than 2 cells */ - while (s > sizeof(unsigned long) / 4) { - p++; - s--; - } - r = *p++; -#ifdef CONFIG_PPC64 - if (s > 1) { - r <<= 32; - r |= *(p++); - s--; - } -#endif - - *cellp = p; - return r; + *cellp = p + s; + return of_read_ulong(p, s); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 465e7435efbc..0af3fc1bdcc9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -304,18 +304,21 @@ struct seq_operations cpuinfo_op = { void __init check_for_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - const unsigned long *prop; + const unsigned int *prop; + int len; DBG(" -> check_for_initrd()\n"); if (of_chosen) { - prop = get_property(of_chosen, "linux,initrd-start", NULL); + prop = get_property(of_chosen, "linux,initrd-start", &len); if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); + initrd_start = (unsigned long) + __va(of_read_ulong(prop, len / 4)); prop = get_property(of_chosen, - "linux,initrd-end", NULL); + "linux,initrd-end", &len); if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); + initrd_end = (unsigned long) + __va(of_read_ulong(prop, len / 4)); initrd_below_start_ok = 1; } else initrd_start = 0; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b9a2061cfdb7..7a3c3f791ade 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -870,9 +870,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) fp = get_property(cpu, name, NULL); if (fp) { found = 1; - *val = 0; - while (cells--) - *val = (*val << 32) | *fp++; + *val = of_read_ulong(fp, cells); } of_node_put(cpu); diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index c15e66a2e681..524629769336 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -197,7 +197,7 @@ extern int release_OF_resource(struct device_node* node, int index); */ -/* Helper to read a big number */ +/* Helper to read a big number; size is in cells (not bytes) */ static inline u64 of_read_number(const u32 *cell, int size) { u64 r = 0; @@ -206,6 +206,16 @@ static inline u64 of_read_number(const u32 *cell, int size) return r; } +/* Like of_read_number, but we want an unsigned long result */ +#ifdef CONFIG_PPC32 +static inline unsigned long of_read_ulong(const u32 *cell, int size) +{ + return cell[size-1]; +} +#else +#define of_read_ulong(cell, size) of_read_number(cell, size) +#endif + /* Translate an OF address block into a CPU physical address */ #define OF_BAD_ADDR ((u64)-1) From b2c5f61920eeee9c4e78698de4fde4586fe5ae79 Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 19 Sep 2006 14:05:08 +1000 Subject: [PATCH 0219/1063] [POWERPC] Start arch/powerpc/boot code reorganization This abstracts the operations used in the bootwrapper, and defines the operations needed for the bootwrapper to run on an OF platform. The operations have been divided up into platform ops (platform_ops), firmware ops (fw_ops), device tree ops (dt_ops), and console ops (console_ops). The proper operations will be hooked up at runtime to provide the functionality that you need. Signed-off-by: Mark A. Greer Signed-off-by: Paul Mackerras --- arch/powerpc/boot/Makefile | 3 +- arch/powerpc/boot/flatdevtree.h | 46 +++++ arch/powerpc/boot/main.c | 258 +++++++++++++++-------------- arch/powerpc/boot/{prom.c => of.c} | 144 ++++++++++++++-- arch/powerpc/boot/ops.h | 100 +++++++++++ arch/powerpc/boot/prom.h | 41 ----- arch/powerpc/boot/stdio.c | 4 +- arch/powerpc/boot/stdio.h | 8 + arch/powerpc/boot/types.h | 23 +++ 9 files changed, 445 insertions(+), 182 deletions(-) create mode 100644 arch/powerpc/boot/flatdevtree.h rename arch/powerpc/boot/{prom.c => of.c} (54%) create mode 100644 arch/powerpc/boot/ops.h delete mode 100644 arch/powerpc/boot/prom.h create mode 100644 arch/powerpc/boot/types.h diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index afc776f821e5..e73774136b55 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -36,7 +36,8 @@ zliblinuxheader := zlib.h zconf.h zutil.h $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h) -src-boot := crt0.S string.S prom.c stdio.c main.c div64.S +src-boot-$(CONFIG_PPC_MULTIPLATFORM) := of.c +src-boot := crt0.S string.S stdio.c main.c div64.S $(src-boot-y) src-boot += $(zlib) src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) diff --git a/arch/powerpc/boot/flatdevtree.h b/arch/powerpc/boot/flatdevtree.h new file mode 100644 index 000000000000..761c8dc84008 --- /dev/null +++ b/arch/powerpc/boot/flatdevtree.h @@ -0,0 +1,46 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef FLATDEVTREE_H +#define FLATDEVTREE_H + +#include "types.h" + +/* Definitions used by the flattened device tree */ +#define OF_DT_HEADER 0xd00dfeed /* marker */ +#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ +#define OF_DT_END_NODE 0x2 /* End node */ +#define OF_DT_PROP 0x3 /* Property: name off, size, content */ +#define OF_DT_NOP 0x4 /* nop */ +#define OF_DT_END 0x9 + +#define OF_DT_VERSION 0x10 + +struct boot_param_header { + u32 magic; /* magic word OF_DT_HEADER */ + u32 totalsize; /* total size of DT block */ + u32 off_dt_struct; /* offset to structure */ + u32 off_dt_strings; /* offset to strings */ + u32 off_mem_rsvmap; /* offset to memory reserve map */ + u32 version; /* format version */ + u32 last_comp_version; /* last compatible version */ + /* version 2 fields below */ + u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ + /* version 3 fields below */ + u32 dt_strings_size; /* size of the DT strings block */ +}; + +#endif /* FLATDEVTREE_H */ diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index b66634c9ea34..d719bb9333d1 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -14,17 +14,12 @@ #include "page.h" #include "string.h" #include "stdio.h" -#include "prom.h" #include "zlib.h" +#include "ops.h" +#include "flatdevtree.h" extern void flush_cache(void *, unsigned long); - -/* Value picked to match that used by yaboot */ -#define PROG_START 0x01400000 /* only used on 64-bit systems */ -#define RAM_END (512<<20) /* Fixme: use OF */ -#define ONE_MB 0x100000 - extern char _start[]; extern char __bss_start[]; extern char _end[]; @@ -33,14 +28,6 @@ extern char _vmlinux_end[]; extern char _initrd_start[]; extern char _initrd_end[]; -/* A buffer that may be edited by tools operating on a zImage binary so as to - * edit the command line passed to vmlinux (by setting /chosen/bootargs). - * The buffer is put in it's own section so that tools may locate it easier. - */ -static char builtin_cmdline[512] - __attribute__((section("__builtin_cmdline"))); - - struct addr_range { unsigned long addr; unsigned long size; @@ -51,21 +38,16 @@ static struct addr_range vmlinuz; static struct addr_range initrd; static unsigned long elfoffset; +static int is_64bit; -static char scratch[46912]; /* scratch space for gunzip, from zlib_inflate_workspacesize() */ +/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */ +static char scratch[46912]; static char elfheader[256]; - -typedef void (*kernel_entry_t)( unsigned long, - unsigned long, - void *, - void *); - +typedef void (*kernel_entry_t)(unsigned long, unsigned long, void *); #undef DEBUG -static unsigned long claim_base; - #define HEAD_CRC 2 #define EXTRA_FIELD 4 #define ORIG_NAME 8 @@ -123,24 +105,6 @@ static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) zlib_inflateEnd(&s); } -static unsigned long try_claim(unsigned long size) -{ - unsigned long addr = 0; - - for(; claim_base < RAM_END; claim_base += ONE_MB) { -#ifdef DEBUG - printf(" trying: 0x%08lx\n\r", claim_base); -#endif - addr = (unsigned long)claim(claim_base, size, 0); - if ((void *)addr != (void *)-1) - break; - } - if (addr == 0) - return 0; - claim_base = PAGE_ALIGN(claim_base + size); - return addr; -} - static int is_elf64(void *hdr) { Elf64_Ehdr *elf64 = hdr; @@ -169,16 +133,7 @@ static int is_elf64(void *hdr) vmlinux.size = (unsigned long)elf64ph->p_filesz + elfoffset; vmlinux.memsize = (unsigned long)elf64ph->p_memsz + elfoffset; -#if defined(PROG_START) - /* - * Maintain a "magic" minimum address. This keeps some older - * firmware platforms running. - */ - - if (claim_base < PROG_START) - claim_base = PROG_START; -#endif - + is_64bit = 1; return 1; } @@ -212,47 +167,9 @@ static int is_elf32(void *hdr) return 1; } -void export_cmdline(void* chosen_handle) -{ - int len; - char cmdline[2] = { 0, 0 }; - - if (builtin_cmdline[0] == 0) - return; - - len = getprop(chosen_handle, "bootargs", cmdline, sizeof(cmdline)); - if (len > 0 && cmdline[0] != 0) - return; - - setprop(chosen_handle, "bootargs", builtin_cmdline, - strlen(builtin_cmdline) + 1); -} - - -void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) +static void prep_kernel(unsigned long *a1, unsigned long *a2) { int len; - kernel_entry_t kernel_entry; - - memset(__bss_start, 0, _end - __bss_start); - - prom = (int (*)(void *)) promptr; - chosen_handle = finddevice("/chosen"); - if (chosen_handle == (void *) -1) - exit(); - if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4) - exit(); - - printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp); - - /* - * The first available claim_base must be above the end of the - * the loaded kernel wrapper file (_start to _end includes the - * initrd image if it is present) and rounded up to a nice - * 1 MB boundary for good measure. - */ - - claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB); vmlinuz.addr = (unsigned long)_vmlinux_start; vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start); @@ -263,43 +180,51 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) gunzip(elfheader, sizeof(elfheader), (unsigned char *)vmlinuz.addr, &len); } else - memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader)); + memcpy(elfheader, (const void *)vmlinuz.addr, + sizeof(elfheader)); if (!is_elf64(elfheader) && !is_elf32(elfheader)) { printf("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); exit(); } + if (platform_ops.image_hdr) + platform_ops.image_hdr(elfheader); - /* We need to claim the memsize plus the file offset since gzip + /* We need to alloc the memsize plus the file offset since gzip * will expand the header (file offset), then the kernel, then * possible rubbish we don't care about. But the kernel bss must * be claimed (it will be zero'd by the kernel itself) */ printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize); - vmlinux.addr = try_claim(vmlinux.memsize); + vmlinux.addr = (unsigned long)malloc(vmlinux.memsize); if (vmlinux.addr == 0) { printf("Can't allocate memory for kernel image !\n\r"); exit(); } /* - * Now we try to claim memory for the initrd (and copy it there) + * Now we try to alloc memory for the initrd (and copy it there) */ initrd.size = (unsigned long)(_initrd_end - _initrd_start); initrd.memsize = initrd.size; if ( initrd.size > 0 ) { - printf("Allocating 0x%lx bytes for initrd ...\n\r", initrd.size); - initrd.addr = try_claim(initrd.size); + printf("Allocating 0x%lx bytes for initrd ...\n\r", + initrd.size); + initrd.addr = (unsigned long)malloc((u32)initrd.size); if (initrd.addr == 0) { - printf("Can't allocate memory for initial ramdisk !\n\r"); + printf("Can't allocate memory for initial " + "ramdisk !\n\r"); exit(); } - a1 = initrd.addr; - a2 = initrd.size; - printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r", - initrd.addr, (unsigned long)_initrd_start, initrd.size); - memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size); - printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr)); + *a1 = initrd.addr; + *a2 = initrd.size; + printf("initial ramdisk moving 0x%lx <- 0x%lx " + "(0x%lx bytes)\n\r", initrd.addr, + (unsigned long)_initrd_start, initrd.size); + memmove((void *)initrd.addr, (void *)_initrd_start, + initrd.size); + printf("initrd head: 0x%lx\n\r", + *((unsigned long *)initrd.addr)); } /* Eventually gunzip the kernel */ @@ -311,11 +236,10 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) (unsigned char *)vmlinuz.addr, &len); printf("done 0x%lx bytes\n\r", len); } else { - memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size); + memmove((void *)vmlinux.addr,(void *)vmlinuz.addr, + vmlinuz.size); } - export_cmdline(chosen_handle); - /* Skip over the ELF header */ #ifdef DEBUG printf("... skipping 0x%lx bytes of ELF header\n\r", @@ -324,23 +248,107 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) vmlinux.addr += elfoffset; flush_cache((void *)vmlinux.addr, vmlinux.size); - - kernel_entry = (kernel_entry_t)vmlinux.addr; -#ifdef DEBUG - printf( "kernel:\n\r" - " entry addr = 0x%lx\n\r" - " a1 = 0x%lx,\n\r" - " a2 = 0x%lx,\n\r" - " prom = 0x%lx,\n\r" - " bi_recs = 0x%lx,\n\r", - (unsigned long)kernel_entry, a1, a2, - (unsigned long)prom, NULL); -#endif - - kernel_entry(a1, a2, prom, NULL); - - printf("Error: Linux kernel returned to zImage bootloader!\n\r"); - - exit(); } +void __attribute__ ((weak)) ft_init(void *dt_blob) +{ +} + +/* A buffer that may be edited by tools operating on a zImage binary so as to + * edit the command line passed to vmlinux (by setting /chosen/bootargs). + * The buffer is put in it's own section so that tools may locate it easier. + */ +static char builtin_cmdline[COMMAND_LINE_SIZE] + __attribute__((__section__("__builtin_cmdline"))); + +static void get_cmdline(char *buf, int size) +{ + void *devp; + int len = strlen(builtin_cmdline); + + buf[0] = '\0'; + + if (len > 0) { /* builtin_cmdline overrides dt's /chosen/bootargs */ + len = min(len, size-1); + strncpy(buf, builtin_cmdline, len); + buf[len] = '\0'; + } + else if ((devp = finddevice("/chosen"))) + getprop(devp, "bootargs", buf, size); +} + +static void set_cmdline(char *buf) +{ + void *devp; + + if ((devp = finddevice("/chosen"))) + setprop(devp, "bootargs", buf, strlen(buf) + 1); +} + +/* Section where ft can be tacked on after zImage is built */ +union blobspace { + struct boot_param_header hdr; + char space[8*1024]; +} dt_blob __attribute__((__section__("__builtin_ft"))); + +struct platform_ops platform_ops; +struct dt_ops dt_ops; +struct console_ops console_ops; + +void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) +{ + int have_dt = 0; + kernel_entry_t kentry; + char cmdline[COMMAND_LINE_SIZE]; + + memset(__bss_start, 0, _end - __bss_start); + memset(&platform_ops, 0, sizeof(platform_ops)); + memset(&dt_ops, 0, sizeof(dt_ops)); + memset(&console_ops, 0, sizeof(console_ops)); + + /* Override the dt_ops and device tree if there was an flat dev + * tree attached to the zImage. + */ + if (dt_blob.hdr.magic == OF_DT_HEADER) { + have_dt = 1; + ft_init(&dt_blob); + } + + if (platform_init(promptr)) + exit(); + if (console_ops.open && (console_ops.open() < 0)) + exit(); + if (platform_ops.fixups) + platform_ops.fixups(); + + printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", + _start, sp); + + prep_kernel(&a1, &a2); + + /* If cmdline came from zimage wrapper or if we can edit the one + * in the dt, print it out and edit it, if possible. + */ + if ((strlen(builtin_cmdline) > 0) || console_ops.edit_cmdline) { + get_cmdline(cmdline, COMMAND_LINE_SIZE); + printf("\n\rLinux/PowerPC load: %s", cmdline); + if (console_ops.edit_cmdline) + console_ops.edit_cmdline(cmdline, COMMAND_LINE_SIZE); + printf("\n\r"); + set_cmdline(cmdline); + } + + if (console_ops.close) + console_ops.close(); + + kentry = (kernel_entry_t) vmlinux.addr; + if (have_dt) + kentry(dt_ops.ft_addr(), 0, NULL); + else + /* XXX initrd addr/size should be passed in properties */ + kentry(a1, a2, promptr); + + /* console closed so printf below may not work */ + printf("Error: Linux kernel returned to zImage boot wrapper!\n\r"); + exit(); +} diff --git a/arch/powerpc/boot/prom.c b/arch/powerpc/boot/of.c similarity index 54% rename from arch/powerpc/boot/prom.c rename to arch/powerpc/boot/of.c index fa0057736f6b..fd99f789a37b 100644 --- a/arch/powerpc/boot/prom.c +++ b/arch/powerpc/boot/of.c @@ -8,15 +8,29 @@ */ #include #include +#include "types.h" +#include "elf.h" #include "string.h" #include "stdio.h" -#include "prom.h" +#include "page.h" +#include "ops.h" -int (*prom)(void *); -phandle chosen_handle; -ihandle stdout; +typedef void *ihandle; +typedef void *phandle; -int call_prom(const char *service, int nargs, int nret, ...) +extern char _end[]; + +/* Value picked to match that used by yaboot */ +#define PROG_START 0x01400000 /* only used on 64-bit systems */ +#define RAM_END (512<<20) /* Fixme: use OF */ +#define ONE_MB 0x100000 + +int (*prom) (void *); + + +static unsigned long claim_base; + +static int call_prom(const char *service, int nargs, int nret, ...) { int i; struct prom_args { @@ -45,7 +59,7 @@ int call_prom(const char *service, int nargs, int nret, ...) return (nret > 0)? args.args[nargs]: 0; } -int call_prom_ret(const char *service, int nargs, int nret, +static int call_prom_ret(const char *service, int nargs, int nret, unsigned int *rets, ...) { int i; @@ -79,11 +93,6 @@ int call_prom_ret(const char *service, int nargs, int nret, return (nret > 0)? args.args[nargs]: 0; } -int write(void *handle, void *ptr, int nb) -{ - return call_prom("write", 3, 1, handle, ptr, nb); -} - /* * Older OF's require that when claiming a specific range of addresses, * we claim the physical space in the /memory node and the virtual @@ -142,7 +151,7 @@ static int check_of_version(void) return 1; } -void *claim(unsigned long virt, unsigned long size, unsigned long align) +static void *claim(unsigned long virt, unsigned long size, unsigned long align) { int ret; unsigned int result; @@ -151,7 +160,7 @@ void *claim(unsigned long virt, unsigned long size, unsigned long align) need_map = check_of_version(); if (align || !need_map) return (void *) call_prom("claim", 3, 1, virt, size, align); - + ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory, align, size, virt); if (ret != 0 || result == -1) @@ -163,3 +172,112 @@ void *claim(unsigned long virt, unsigned long size, unsigned long align) 0x12, size, virt, virt); return (void *) virt; } + +static void *of_try_claim(u32 size) +{ + unsigned long addr = 0; + static u8 first_time = 1; + + if (first_time) { + claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB); + first_time = 0; + } + + for(; claim_base < RAM_END; claim_base += ONE_MB) { +#ifdef DEBUG + printf(" trying: 0x%08lx\n\r", claim_base); +#endif + addr = (unsigned long)claim(claim_base, size, 0); + if ((void *)addr != (void *)-1) + break; + } + if (addr == 0) + return NULL; + claim_base = PAGE_ALIGN(claim_base + size); + return (void *)addr; +} + +static void of_image_hdr(const void *hdr) +{ + const Elf64_Ehdr *elf64 = hdr; + + if (elf64->e_ident[EI_CLASS] == ELFCLASS64) { + /* + * Maintain a "magic" minimum address. This keeps some older + * firmware platforms running. + */ + if (claim_base < PROG_START) + claim_base = PROG_START; + } +} + +static void of_exit(void) +{ + call_prom("exit", 0, 0); +} + +/* + * OF device tree routines + */ +static void *of_finddevice(const char *name) +{ + return (phandle) call_prom("finddevice", 1, 1, name); +} + +static int of_getprop(const void *phandle, const char *name, void *buf, + const int buflen) +{ + return call_prom("getprop", 4, 1, phandle, name, buf, buflen); +} + +static int of_setprop(const void *phandle, const char *name, const void *buf, + const int buflen) +{ + return call_prom("setprop", 4, 1, phandle, name, buf, buflen); +} + +/* + * OF console routines + */ +static void *of_stdout_handle; + +static int of_console_open(void) +{ + void *devp; + + if (((devp = finddevice("/chosen")) != NULL) + && (getprop(devp, "stdout", &of_stdout_handle, + sizeof(of_stdout_handle)) + == sizeof(of_stdout_handle))) + return 0; + + return -1; +} + +static void of_console_write(char *buf, int len) +{ + call_prom("write", 3, 1, of_stdout_handle, buf, len); +} + +int platform_init(void *promptr) +{ + platform_ops.fixups = NULL; + platform_ops.image_hdr = of_image_hdr; + platform_ops.malloc = of_try_claim; + platform_ops.free = NULL; + platform_ops.exit = of_exit; + + dt_ops.finddevice = of_finddevice; + dt_ops.getprop = of_getprop; + dt_ops.setprop = of_setprop; + dt_ops.translate_addr = NULL; + + console_ops.open = of_console_open; + console_ops.write = of_console_write; + console_ops.edit_cmdline = NULL; + console_ops.close = NULL; + console_ops.data = NULL; + + prom = (int (*)(void *))promptr; + return 0; +} diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h new file mode 100644 index 000000000000..135eb4bb03b4 --- /dev/null +++ b/arch/powerpc/boot/ops.h @@ -0,0 +1,100 @@ +/* + * Global definition of all the bootwrapper operations. + * + * Author: Mark A. Greer + * + * 2006 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifndef _PPC_BOOT_OPS_H_ +#define _PPC_BOOT_OPS_H_ + +#include "types.h" + +#define COMMAND_LINE_SIZE 512 +#define MAX_PATH_LEN 256 +#define MAX_PROP_LEN 256 /* What should this be? */ + +/* Platform specific operations */ +struct platform_ops { + void (*fixups)(void); + void (*image_hdr)(const void *); + void * (*malloc)(u32 size); + void (*free)(void *ptr, u32 size); + void (*exit)(void); +}; +extern struct platform_ops platform_ops; + +/* Device Tree operations */ +struct dt_ops { + void * (*finddevice)(const char *name); + int (*getprop)(const void *node, const char *name, void *buf, + const int buflen); + int (*setprop)(const void *node, const char *name, + const void *buf, const int buflen); + u64 (*translate_addr)(const char *path, const u32 *in_addr, + const u32 addr_len); + unsigned long (*ft_addr)(void); +}; +extern struct dt_ops dt_ops; + +/* Console operations */ +struct console_ops { + int (*open)(void); + void (*write)(char *buf, int len); + void (*edit_cmdline)(char *buf, int len); + void (*close)(void); + void *data; +}; +extern struct console_ops console_ops; + +/* Serial console operations */ +struct serial_console_data { + int (*open)(void); + void (*putc)(unsigned char c); + unsigned char (*getc)(void); + u8 (*tstc)(void); + void (*close)(void); +}; + +extern int platform_init(void *promptr); +extern void simple_alloc_init(void); +extern void ft_init(void *dt_blob); +extern int serial_console_init(void); + +static inline void *finddevice(const char *name) +{ + return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL; +} + +static inline int getprop(void *devp, const char *name, void *buf, int buflen) +{ + return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1; +} + +static inline int setprop(void *devp, const char *name, void *buf, int buflen) +{ + return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1; +} + +static inline void *malloc(u32 size) +{ + return (platform_ops.malloc) ? platform_ops.malloc(size) : NULL; +} + +static inline void free(void *ptr, u32 size) +{ + if (platform_ops.free) + platform_ops.free(ptr, size); +} + +static inline void exit(void) +{ + if (platform_ops.exit) + platform_ops.exit(); + for(;;); +} + +#endif /* _PPC_BOOT_OPS_H_ */ diff --git a/arch/powerpc/boot/prom.h b/arch/powerpc/boot/prom.h deleted file mode 100644 index a57b184c564f..000000000000 --- a/arch/powerpc/boot/prom.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _PPC_BOOT_PROM_H_ -#define _PPC_BOOT_PROM_H_ - -typedef void *phandle; -typedef void *ihandle; - -extern int (*prom) (void *); -extern phandle chosen_handle; -extern ihandle stdout; - -int call_prom(const char *service, int nargs, int nret, ...); -int call_prom_ret(const char *service, int nargs, int nret, - unsigned int *rets, ...); - -extern int write(void *handle, void *ptr, int nb); -extern void *claim(unsigned long virt, unsigned long size, unsigned long aln); - -static inline void exit(void) -{ - call_prom("exit", 0, 0); -} - -static inline phandle finddevice(const char *name) -{ - return (phandle) call_prom("finddevice", 1, 1, name); -} - -static inline int getprop(void *phandle, const char *name, - void *buf, int buflen) -{ - return call_prom("getprop", 4, 1, phandle, name, buf, buflen); -} - - -static inline int setprop(void *phandle, const char *name, - void *buf, int buflen) -{ - return call_prom("setprop", 4, 1, phandle, name, buf, buflen); -} - -#endif /* _PPC_BOOT_PROM_H_ */ diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c index b5aa522f8b77..6d5f6382e1ce 100644 --- a/arch/powerpc/boot/stdio.c +++ b/arch/powerpc/boot/stdio.c @@ -10,7 +10,7 @@ #include #include "string.h" #include "stdio.h" -#include "prom.h" +#include "ops.h" size_t strnlen(const char * s, size_t count) { @@ -320,6 +320,6 @@ printf(const char *fmt, ...) va_start(args, fmt); n = vsprintf(sprint_buf, fmt, args); va_end(args); - write(stdout, sprint_buf, n); + console_ops.write(sprint_buf, n); return n; } diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h index eb9e16c87aef..73b8a91bfb34 100644 --- a/arch/powerpc/boot/stdio.h +++ b/arch/powerpc/boot/stdio.h @@ -1,8 +1,16 @@ #ifndef _PPC_BOOT_STDIO_H_ #define _PPC_BOOT_STDIO_H_ +#include + +#define ENOMEM 12 /* Out of Memory */ +#define EINVAL 22 /* Invalid argument */ +#define ENOSPC 28 /* No space left on device */ + extern int printf(const char *fmt, ...); +#define fprintf(fmt, args...) printf(args) + extern int sprintf(char *buf, const char *fmt, ...); extern int vsprintf(char *buf, const char *fmt, va_list args); diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h new file mode 100644 index 000000000000..79d26e708677 --- /dev/null +++ b/arch/powerpc/boot/types.h @@ -0,0 +1,23 @@ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#endif /* _TYPES_H_ */ From de1a3f1ce6c4c3b2b14cf9157a22d6b4c64f708e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:20 +0200 Subject: [PATCH 0220/1063] [S390] EX_TABLE macro. Add EX_TABLE helper macro to simplify creation of inline assembly exception table entries. Signed-off-by: Martin Schwidefsky --- include/asm-s390/processor.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 5b71d3731723..a3a4e5fd30d7 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -339,4 +339,21 @@ int unregister_idle_notifier(struct notifier_block *nb); #endif +/* + * Helper macro for exception table entries + */ +#ifndef __s390x__ +#define EX_TABLE(_fault,_target) \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long " #_fault "," #_target "\n" \ + ".previous\n" +#else +#define EX_TABLE(_fault,_target) \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ + " .quad " #_fault "," #_target "\n" \ + ".previous\n" +#endif + #endif /* __ASM_S390_PROCESSOR_H */ From 7561b974e0cbbdca1bb880b55200afd9a1a20737 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:22 +0200 Subject: [PATCH 0221/1063] [S390] remove old z90crypt driver. The z90crypt driver has served its term. It is replaced by the shiny new zcrypt device driver. Signed-off-by: Martin Schwidefsky --- drivers/s390/Kconfig | 9 - drivers/s390/crypto/Makefile | 2 - drivers/s390/crypto/z90common.h | 166 -- drivers/s390/crypto/z90crypt.h | 71 - drivers/s390/crypto/z90hardware.c | 2531 --------------------- drivers/s390/crypto/z90main.c | 3379 ----------------------------- include/asm-s390/z90crypt.h | 212 -- 7 files changed, 6370 deletions(-) delete mode 100644 drivers/s390/crypto/z90common.h delete mode 100644 drivers/s390/crypto/z90crypt.h delete mode 100644 drivers/s390/crypto/z90hardware.c delete mode 100644 drivers/s390/crypto/z90main.c delete mode 100644 include/asm-s390/z90crypt.h diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig index 4d36208ff8de..f0ea550d39bc 100644 --- a/drivers/s390/Kconfig +++ b/drivers/s390/Kconfig @@ -217,13 +217,4 @@ endmenu menu "Cryptographic devices" -config Z90CRYPT - tristate "Support for PCI-attached cryptographic adapters" - default "m" - help - Select this option if you want to use a PCI-attached cryptographic - adapter like the PCI Cryptographic Accelerator (PCICA) or the PCI - Cryptographic Coprocessor (PCICC). This option is also available - as a module called z90crypt.ko. - endmenu diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile index 15edebbead7f..67e75be8e4e4 100644 --- a/drivers/s390/crypto/Makefile +++ b/drivers/s390/crypto/Makefile @@ -2,5 +2,3 @@ # S/390 crypto devices # -z90crypt-objs := z90main.o z90hardware.o -obj-$(CONFIG_Z90CRYPT) += z90crypt.o diff --git a/drivers/s390/crypto/z90common.h b/drivers/s390/crypto/z90common.h deleted file mode 100644 index dbbcda3c846a..000000000000 --- a/drivers/s390/crypto/z90common.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * linux/drivers/s390/crypto/z90common.h - * - * z90crypt 1.3.3 - * - * Copyright (C) 2001, 2005 IBM Corporation - * Author(s): Robert Burroughs (burrough@us.ibm.com) - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef _Z90COMMON_H_ -#define _Z90COMMON_H_ - - -#define RESPBUFFSIZE 256 -#define PCI_FUNC_KEY_DECRYPT 0x5044 -#define PCI_FUNC_KEY_ENCRYPT 0x504B -extern int ext_bitlens; - -enum devstat { - DEV_GONE, - DEV_ONLINE, - DEV_QUEUE_FULL, - DEV_EMPTY, - DEV_NO_WORK, - DEV_BAD_MESSAGE, - DEV_TSQ_EXCEPTION, - DEV_RSQ_EXCEPTION, - DEV_SEN_EXCEPTION, - DEV_REC_EXCEPTION -}; - -enum hdstat { - HD_NOT_THERE, - HD_BUSY, - HD_DECONFIGURED, - HD_CHECKSTOPPED, - HD_ONLINE, - HD_TSQ_EXCEPTION -}; - -#define Z90C_NO_DEVICES 1 -#define Z90C_AMBIGUOUS_DOMAIN 2 -#define Z90C_INCORRECT_DOMAIN 3 -#define ENOTINIT 4 - -#define SEN_BUSY 7 -#define SEN_USER_ERROR 8 -#define SEN_QUEUE_FULL 11 -#define SEN_NOT_AVAIL 16 -#define SEN_PAD_ERROR 17 -#define SEN_RETRY 18 -#define SEN_RELEASED 24 - -#define REC_EMPTY 4 -#define REC_BUSY 6 -#define REC_OPERAND_INV 8 -#define REC_OPERAND_SIZE 9 -#define REC_EVEN_MOD 10 -#define REC_NO_WORK 11 -#define REC_HARDWAR_ERR 12 -#define REC_NO_RESPONSE 13 -#define REC_RETRY_DEV 14 -#define REC_USER_GONE 15 -#define REC_BAD_MESSAGE 16 -#define REC_INVALID_PAD 17 -#define REC_USE_PCICA 18 - -#define WRONG_DEVICE_TYPE 20 - -#define REC_FATAL_ERROR 32 -#define SEN_FATAL_ERROR 33 -#define TSQ_FATAL_ERROR 34 -#define RSQ_FATAL_ERROR 35 - -#define Z90CRYPT_NUM_TYPES 6 -#define PCICA 0 -#define PCICC 1 -#define PCIXCC_MCL2 2 -#define PCIXCC_MCL3 3 -#define CEX2C 4 -#define CEX2A 5 -#define NILDEV -1 -#define ANYDEV -1 -#define PCIXCC_UNK -2 - -enum hdevice_type { - PCICC_HW = 3, - PCICA_HW = 4, - PCIXCC_HW = 5, - CEX2A_HW = 6, - CEX2C_HW = 7 -}; - -struct CPRBX { - unsigned short cprb_len; - unsigned char cprb_ver_id; - unsigned char pad_000[3]; - unsigned char func_id[2]; - unsigned char cprb_flags[4]; - unsigned int req_parml; - unsigned int req_datal; - unsigned int rpl_msgbl; - unsigned int rpld_parml; - unsigned int rpl_datal; - unsigned int rpld_datal; - unsigned int req_extbl; - unsigned char pad_001[4]; - unsigned int rpld_extbl; - unsigned char req_parmb[16]; - unsigned char req_datab[16]; - unsigned char rpl_parmb[16]; - unsigned char rpl_datab[16]; - unsigned char req_extb[16]; - unsigned char rpl_extb[16]; - unsigned short ccp_rtcode; - unsigned short ccp_rscode; - unsigned int mac_data_len; - unsigned char logon_id[8]; - unsigned char mac_value[8]; - unsigned char mac_content_flgs; - unsigned char pad_002; - unsigned short domain; - unsigned char pad_003[12]; - unsigned char pad_004[36]; -}; - -#ifndef DEV_NAME -#define DEV_NAME "z90crypt" -#endif -#define PRINTK(fmt, args...) \ - printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) -#define PRINTKN(fmt, args...) \ - printk(KERN_DEBUG DEV_NAME ": " fmt, ## args) -#define PRINTKW(fmt, args...) \ - printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) -#define PRINTKC(fmt, args...) \ - printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) - -#ifdef Z90CRYPT_DEBUG -#define PDEBUG(fmt, args...) \ - printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) -#else -#define PDEBUG(fmt, args...) do {} while (0) -#endif - -#define UMIN(a,b) ((a) < (b) ? (a) : (b)) -#define IS_EVEN(x) ((x) == (2 * ((x) / 2))) - -#endif diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h deleted file mode 100644 index 0ca1d126ccb6..000000000000 --- a/drivers/s390/crypto/z90crypt.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * linux/drivers/s390/crypto/z90crypt.h - * - * z90crypt 1.3.3 (kernel-private header) - * - * Copyright (C) 2001, 2005 IBM Corporation - * Author(s): Robert Burroughs (burrough@us.ibm.com) - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef _Z90CRYPT_H_ -#define _Z90CRYPT_H_ - -#include - -/** - * local errno definitions - */ -#define ENOBUFF 129 // filp->private_data->...>work_elem_p->buffer is NULL -#define EWORKPEND 130 // user issues ioctl while another pending -#define ERELEASED 131 // user released while ioctl pending -#define EQUIESCE 132 // z90crypt quiescing (no more work allowed) -#define ETIMEOUT 133 // request timed out -#define EUNKNOWN 134 // some unrecognized error occured (retry may succeed) -#define EGETBUFF 135 // Error getting buffer or hardware lacks capability - // (retry in software) - -/** - * DEPRECATED STRUCTURES - */ - -/** - * This structure is DEPRECATED and the corresponding ioctl() has been - * replaced with individual ioctl()s for each piece of data! - * This structure will NOT survive past version 1.3.1, so switch to the - * new ioctl()s. - */ -#define MASK_LENGTH 64 // mask length -struct ica_z90_status { - int totalcount; - int leedslitecount; // PCICA - int leeds2count; // PCICC - // int PCIXCCCount; is not in struct for backward compatibility - int requestqWaitCount; - int pendingqWaitCount; - int totalOpenCount; - int cryptoDomain; - // status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3, - // 5=CEX2C - unsigned char status[MASK_LENGTH]; - // qdepth: # work elements waiting for each device - unsigned char qdepth[MASK_LENGTH]; -}; - -#endif /* _Z90CRYPT_H_ */ diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c deleted file mode 100644 index be60795f4a74..000000000000 --- a/drivers/s390/crypto/z90hardware.c +++ /dev/null @@ -1,2531 +0,0 @@ -/* - * linux/drivers/s390/crypto/z90hardware.c - * - * z90crypt 1.3.3 - * - * Copyright (C) 2001, 2005 IBM Corporation - * Author(s): Robert Burroughs (burrough@us.ibm.com) - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include "z90crypt.h" -#include "z90common.h" - -struct cca_token_hdr { - unsigned char token_identifier; - unsigned char version; - unsigned short token_length; - unsigned char reserved[4]; -}; - -#define CCA_TKN_HDR_ID_EXT 0x1E - -struct cca_private_ext_ME_sec { - unsigned char section_identifier; - unsigned char version; - unsigned short section_length; - unsigned char private_key_hash[20]; - unsigned char reserved1[4]; - unsigned char key_format; - unsigned char reserved2; - unsigned char key_name_hash[20]; - unsigned char key_use_flags[4]; - unsigned char reserved3[6]; - unsigned char reserved4[24]; - unsigned char confounder[24]; - unsigned char exponent[128]; - unsigned char modulus[128]; -}; - -#define CCA_PVT_USAGE_ALL 0x80 - -struct cca_public_sec { - unsigned char section_identifier; - unsigned char version; - unsigned short section_length; - unsigned char reserved[2]; - unsigned short exponent_len; - unsigned short modulus_bit_len; - unsigned short modulus_byte_len; - unsigned char exponent[3]; -}; - -struct cca_private_ext_ME { - struct cca_token_hdr pvtMEHdr; - struct cca_private_ext_ME_sec pvtMESec; - struct cca_public_sec pubMESec; -}; - -struct cca_public_key { - struct cca_token_hdr pubHdr; - struct cca_public_sec pubSec; -}; - -struct cca_pvt_ext_CRT_sec { - unsigned char section_identifier; - unsigned char version; - unsigned short section_length; - unsigned char private_key_hash[20]; - unsigned char reserved1[4]; - unsigned char key_format; - unsigned char reserved2; - unsigned char key_name_hash[20]; - unsigned char key_use_flags[4]; - unsigned short p_len; - unsigned short q_len; - unsigned short dp_len; - unsigned short dq_len; - unsigned short u_len; - unsigned short mod_len; - unsigned char reserved3[4]; - unsigned short pad_len; - unsigned char reserved4[52]; - unsigned char confounder[8]; -}; - -#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08 -#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40 - -struct cca_private_ext_CRT { - struct cca_token_hdr pvtCrtHdr; - struct cca_pvt_ext_CRT_sec pvtCrtSec; - struct cca_public_sec pubCrtSec; -}; - -struct ap_status_word { - unsigned char q_stat_flags; - unsigned char response_code; - unsigned char reserved[2]; -}; - -#define AP_Q_STATUS_EMPTY 0x80 -#define AP_Q_STATUS_REPLIES_WAITING 0x40 -#define AP_Q_STATUS_ARRAY_FULL 0x20 - -#define AP_RESPONSE_NORMAL 0x00 -#define AP_RESPONSE_Q_NOT_AVAIL 0x01 -#define AP_RESPONSE_RESET_IN_PROGRESS 0x02 -#define AP_RESPONSE_DECONFIGURED 0x03 -#define AP_RESPONSE_CHECKSTOPPED 0x04 -#define AP_RESPONSE_BUSY 0x05 -#define AP_RESPONSE_Q_FULL 0x10 -#define AP_RESPONSE_NO_PENDING_REPLY 0x10 -#define AP_RESPONSE_INDEX_TOO_BIG 0x11 -#define AP_RESPONSE_NO_FIRST_PART 0x13 -#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 - -#define AP_MAX_CDX_BITL 4 -#define AP_RQID_RESERVED_BITL 4 -#define SKIP_BITL (AP_MAX_CDX_BITL + AP_RQID_RESERVED_BITL) - -struct type4_hdr { - unsigned char reserved1; - unsigned char msg_type_code; - unsigned short msg_len; - unsigned char request_code; - unsigned char msg_fmt; - unsigned short reserved2; -}; - -#define TYPE4_TYPE_CODE 0x04 -#define TYPE4_REQU_CODE 0x40 - -#define TYPE4_SME_LEN 0x0188 -#define TYPE4_LME_LEN 0x0308 -#define TYPE4_SCR_LEN 0x01E0 -#define TYPE4_LCR_LEN 0x03A0 - -#define TYPE4_SME_FMT 0x00 -#define TYPE4_LME_FMT 0x10 -#define TYPE4_SCR_FMT 0x40 -#define TYPE4_LCR_FMT 0x50 - -struct type4_sme { - struct type4_hdr header; - unsigned char message[128]; - unsigned char exponent[128]; - unsigned char modulus[128]; -}; - -struct type4_lme { - struct type4_hdr header; - unsigned char message[256]; - unsigned char exponent[256]; - unsigned char modulus[256]; -}; - -struct type4_scr { - struct type4_hdr header; - unsigned char message[128]; - unsigned char dp[72]; - unsigned char dq[64]; - unsigned char p[72]; - unsigned char q[64]; - unsigned char u[72]; -}; - -struct type4_lcr { - struct type4_hdr header; - unsigned char message[256]; - unsigned char dp[136]; - unsigned char dq[128]; - unsigned char p[136]; - unsigned char q[128]; - unsigned char u[136]; -}; - -union type4_msg { - struct type4_sme sme; - struct type4_lme lme; - struct type4_scr scr; - struct type4_lcr lcr; -}; - -struct type84_hdr { - unsigned char reserved1; - unsigned char code; - unsigned short len; - unsigned char reserved2[4]; -}; - -#define TYPE84_RSP_CODE 0x84 - -struct type6_hdr { - unsigned char reserved1; - unsigned char type; - unsigned char reserved2[2]; - unsigned char right[4]; - unsigned char reserved3[2]; - unsigned char reserved4[2]; - unsigned char apfs[4]; - unsigned int offset1; - unsigned int offset2; - unsigned int offset3; - unsigned int offset4; - unsigned char agent_id[16]; - unsigned char rqid[2]; - unsigned char reserved5[2]; - unsigned char function_code[2]; - unsigned char reserved6[2]; - unsigned int ToCardLen1; - unsigned int ToCardLen2; - unsigned int ToCardLen3; - unsigned int ToCardLen4; - unsigned int FromCardLen1; - unsigned int FromCardLen2; - unsigned int FromCardLen3; - unsigned int FromCardLen4; -}; - -struct CPRB { - unsigned char cprb_len[2]; - unsigned char cprb_ver_id; - unsigned char pad_000; - unsigned char srpi_rtcode[4]; - unsigned char srpi_verb; - unsigned char flags; - unsigned char func_id[2]; - unsigned char checkpoint_flag; - unsigned char resv2; - unsigned char req_parml[2]; - unsigned char req_parmp[4]; - unsigned char req_datal[4]; - unsigned char req_datap[4]; - unsigned char rpl_parml[2]; - unsigned char pad_001[2]; - unsigned char rpl_parmp[4]; - unsigned char rpl_datal[4]; - unsigned char rpl_datap[4]; - unsigned char ccp_rscode[2]; - unsigned char ccp_rtcode[2]; - unsigned char repd_parml[2]; - unsigned char mac_data_len[2]; - unsigned char repd_datal[4]; - unsigned char req_pc[2]; - unsigned char res_origin[8]; - unsigned char mac_value[8]; - unsigned char logon_id[8]; - unsigned char usage_domain[2]; - unsigned char resv3[18]; - unsigned char svr_namel[2]; - unsigned char svr_name[8]; -}; - -struct type6_msg { - struct type6_hdr header; - struct CPRB CPRB; -}; - -struct type86_hdr { - unsigned char reserved1; - unsigned char type; - unsigned char format; - unsigned char reserved2; - unsigned char reply_code; - unsigned char reserved3[3]; -}; - -#define TYPE86_RSP_CODE 0x86 -#define TYPE86_FMT2 0x02 - -struct type86_fmt2_msg { - struct type86_hdr header; - unsigned char reserved[4]; - unsigned char apfs[4]; - unsigned int count1; - unsigned int offset1; - unsigned int count2; - unsigned int offset2; - unsigned int count3; - unsigned int offset3; - unsigned int count4; - unsigned int offset4; -}; - -static struct type6_hdr static_type6_hdr = { - 0x00, - 0x06, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - 0x00000058, - 0x00000000, - 0x00000000, - 0x00000000, - {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50, - 0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01}, - {0x00,0x00}, - {0x00,0x00}, - {0x50,0x44}, - {0x00,0x00}, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000 -}; - -static struct type6_hdr static_type6_hdrX = { - 0x00, - 0x06, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - 0x00000058, - 0x00000000, - 0x00000000, - 0x00000000, - {0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x50,0x44}, - {0x00,0x00}, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000 -}; - -static struct CPRB static_cprb = { - {0x70,0x00}, - 0x41, - 0x00, - {0x00,0x00,0x00,0x00}, - 0x00, - 0x00, - {0x54,0x32}, - 0x01, - 0x00, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00}, - {0x08,0x00}, - {0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20} -}; - -struct function_and_rules_block { - unsigned char function_code[2]; - unsigned char ulen[2]; - unsigned char only_rule[8]; -}; - -static struct function_and_rules_block static_pkd_function_and_rules = { - {0x50,0x44}, - {0x0A,0x00}, - {'P','K','C','S','-','1','.','2'} -}; - -static struct function_and_rules_block static_pke_function_and_rules = { - {0x50,0x4B}, - {0x0A,0x00}, - {'P','K','C','S','-','1','.','2'} -}; - -struct T6_keyBlock_hdr { - unsigned char blen[2]; - unsigned char ulen[2]; - unsigned char flags[2]; -}; - -static struct T6_keyBlock_hdr static_T6_keyBlock_hdr = { - {0x89,0x01}, - {0x87,0x01}, - {0x00} -}; - -static struct CPRBX static_cprbx = { - 0x00DC, - 0x02, - {0x00,0x00,0x00}, - {0x54,0x32}, - {0x00,0x00,0x00,0x00}, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - {0x00,0x00,0x00,0x00}, - 0x00000000, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - 0x0000, - 0x0000, - 0x00000000, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - 0x00, - 0x00, - 0x0000, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00} -}; - -static struct function_and_rules_block static_pkd_function_and_rulesX_MCL2 = { - {0x50,0x44}, - {0x00,0x0A}, - {'P','K','C','S','-','1','.','2'} -}; - -static struct function_and_rules_block static_pke_function_and_rulesX_MCL2 = { - {0x50,0x4B}, - {0x00,0x0A}, - {'Z','E','R','O','-','P','A','D'} -}; - -static struct function_and_rules_block static_pkd_function_and_rulesX = { - {0x50,0x44}, - {0x00,0x0A}, - {'Z','E','R','O','-','P','A','D'} -}; - -static struct function_and_rules_block static_pke_function_and_rulesX = { - {0x50,0x4B}, - {0x00,0x0A}, - {'M','R','P',' ',' ',' ',' ',' '} -}; - -static unsigned char static_PKE_function_code[2] = {0x50, 0x4B}; - -struct T6_keyBlock_hdrX { - unsigned short blen; - unsigned short ulen; - unsigned char flags[2]; -}; - -static unsigned char static_pad[256] = { -0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57, -0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39, -0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D, -0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F, -0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45, -0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F, -0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D, -0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9, -0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B, -0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD, -0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1, -0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23, -0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43, -0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F, -0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD, -0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09 -}; - -static struct cca_private_ext_ME static_pvt_me_key = { - { - 0x1E, - 0x00, - 0x0183, - {0x00,0x00,0x00,0x00} - }, - - { - 0x02, - 0x00, - 0x016C, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00}, - 0x00, - 0x00, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00}, - {0x80,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}, - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00} - }, - - { - 0x04, - 0x00, - 0x000F, - {0x00,0x00}, - 0x0003, - 0x0000, - 0x0000, - {0x01,0x00,0x01} - } -}; - -static struct cca_public_key static_public_key = { - { - 0x1E, - 0x00, - 0x0000, - {0x00,0x00,0x00,0x00} - }, - - { - 0x04, - 0x00, - 0x0000, - {0x00,0x00}, - 0x0000, - 0x0000, - 0x0000, - {0x01,0x00,0x01} - } -}; - -#define FIXED_TYPE6_ME_LEN 0x0000025F - -#define FIXED_TYPE6_ME_EN_LEN 0x000000F0 - -#define FIXED_TYPE6_ME_LENX 0x000002CB - -#define FIXED_TYPE6_ME_EN_LENX 0x0000015C - -static struct cca_public_sec static_cca_pub_sec = { - 0x04, - 0x00, - 0x000f, - {0x00,0x00}, - 0x0003, - 0x0000, - 0x0000, - {0x01,0x00,0x01} -}; - -#define FIXED_TYPE6_CR_LEN 0x00000177 - -#define FIXED_TYPE6_CR_LENX 0x000001E3 - -#define MAX_RESPONSE_SIZE 0x00000710 - -#define MAX_RESPONSEX_SIZE 0x0000077C - -#define RESPONSE_CPRB_SIZE 0x000006B8 -#define RESPONSE_CPRBX_SIZE 0x00000724 - -struct type50_hdr { - u8 reserved1; - u8 msg_type_code; - u16 msg_len; - u8 reserved2; - u8 ignored; - u16 reserved3; -}; - -#define TYPE50_TYPE_CODE 0x50 - -#define TYPE50_MEB1_LEN (sizeof(struct type50_meb1_msg)) -#define TYPE50_MEB2_LEN (sizeof(struct type50_meb2_msg)) -#define TYPE50_CRB1_LEN (sizeof(struct type50_crb1_msg)) -#define TYPE50_CRB2_LEN (sizeof(struct type50_crb2_msg)) - -#define TYPE50_MEB1_FMT 0x0001 -#define TYPE50_MEB2_FMT 0x0002 -#define TYPE50_CRB1_FMT 0x0011 -#define TYPE50_CRB2_FMT 0x0012 - -struct type50_meb1_msg { - struct type50_hdr header; - u16 keyblock_type; - u8 reserved[6]; - u8 exponent[128]; - u8 modulus[128]; - u8 message[128]; -}; - -struct type50_meb2_msg { - struct type50_hdr header; - u16 keyblock_type; - u8 reserved[6]; - u8 exponent[256]; - u8 modulus[256]; - u8 message[256]; -}; - -struct type50_crb1_msg { - struct type50_hdr header; - u16 keyblock_type; - u8 reserved[6]; - u8 p[64]; - u8 q[64]; - u8 dp[64]; - u8 dq[64]; - u8 u[64]; - u8 message[128]; -}; - -struct type50_crb2_msg { - struct type50_hdr header; - u16 keyblock_type; - u8 reserved[6]; - u8 p[128]; - u8 q[128]; - u8 dp[128]; - u8 dq[128]; - u8 u[128]; - u8 message[256]; -}; - -union type50_msg { - struct type50_meb1_msg meb1; - struct type50_meb2_msg meb2; - struct type50_crb1_msg crb1; - struct type50_crb2_msg crb2; -}; - -struct type80_hdr { - u8 reserved1; - u8 type; - u16 len; - u8 code; - u8 reserved2[3]; - u8 reserved3[8]; -}; - -#define TYPE80_RSP_CODE 0x80 - -struct error_hdr { - unsigned char reserved1; - unsigned char type; - unsigned char reserved2[2]; - unsigned char reply_code; - unsigned char reserved3[3]; -}; - -#define TYPE82_RSP_CODE 0x82 -#define TYPE88_RSP_CODE 0x88 - -#define REP82_ERROR_MACHINE_FAILURE 0x10 -#define REP82_ERROR_PREEMPT_FAILURE 0x12 -#define REP82_ERROR_CHECKPT_FAILURE 0x14 -#define REP82_ERROR_MESSAGE_TYPE 0x20 -#define REP82_ERROR_INVALID_COMM_CD 0x21 -#define REP82_ERROR_INVALID_MSG_LEN 0x23 -#define REP82_ERROR_RESERVD_FIELD 0x24 -#define REP82_ERROR_FORMAT_FIELD 0x29 -#define REP82_ERROR_INVALID_COMMAND 0x30 -#define REP82_ERROR_MALFORMED_MSG 0x40 -#define REP82_ERROR_RESERVED_FIELDO 0x50 -#define REP82_ERROR_WORD_ALIGNMENT 0x60 -#define REP82_ERROR_MESSAGE_LENGTH 0x80 -#define REP82_ERROR_OPERAND_INVALID 0x82 -#define REP82_ERROR_OPERAND_SIZE 0x84 -#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85 -#define REP82_ERROR_RESERVED_FIELD 0x88 -#define REP82_ERROR_TRANSPORT_FAIL 0x90 -#define REP82_ERROR_PACKET_TRUNCATED 0xA0 -#define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 - -#define REP88_ERROR_MODULE_FAILURE 0x10 -#define REP88_ERROR_MODULE_TIMEOUT 0x11 -#define REP88_ERROR_MODULE_NOTINIT 0x13 -#define REP88_ERROR_MODULE_NOTAVAIL 0x14 -#define REP88_ERROR_MODULE_DISABLED 0x15 -#define REP88_ERROR_MODULE_IN_DIAGN 0x17 -#define REP88_ERROR_FASTPATH_DISABLD 0x19 -#define REP88_ERROR_MESSAGE_TYPE 0x20 -#define REP88_ERROR_MESSAGE_MALFORMD 0x22 -#define REP88_ERROR_MESSAGE_LENGTH 0x23 -#define REP88_ERROR_RESERVED_FIELD 0x24 -#define REP88_ERROR_KEY_TYPE 0x34 -#define REP88_ERROR_INVALID_KEY 0x82 -#define REP88_ERROR_OPERAND 0x84 -#define REP88_ERROR_OPERAND_EVEN_MOD 0x85 - -#define CALLER_HEADER 12 - -static inline int -testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat) -{ - int ccode; - - asm volatile -#ifdef CONFIG_64BIT - (" llgfr 0,%4 \n" - " slgr 1,1 \n" - " lgr 2,1 \n" - "0: .long 0xb2af0000 \n" - "1: ipm %0 \n" - " srl %0,28 \n" - " iihh %0,0 \n" - " iihl %0,0 \n" - " lgr %1,1 \n" - " lgr %3,2 \n" - " srl %3,24 \n" - " sll 2,24 \n" - " srl 2,24 \n" - " lgr %2,2 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h5 \n" - " jg 2b \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 8 \n" - " .quad 0b,3b \n" - " .quad 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type) - :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION) - :"cc","0","1","2","memory"); -#else - (" lr 0,%4 \n" - " slr 1,1 \n" - " lr 2,1 \n" - "0: .long 0xb2af0000 \n" - "1: ipm %0 \n" - " srl %0,28 \n" - " lr %1,1 \n" - " lr %3,2 \n" - " srl %3,24 \n" - " sll 2,24 \n" - " srl 2,24 \n" - " lr %2,2 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h5 \n" - " bras 1,4f \n" - " .long 2b \n" - "4: \n" - " l 1,0(1) \n" - " br 1 \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 4 \n" - " .long 0b,3b \n" - " .long 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type) - :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION) - :"cc","0","1","2","memory"); -#endif - return ccode; -} - -static inline int -resetq(int q_nr, struct ap_status_word *stat_p) -{ - int ccode; - - asm volatile -#ifdef CONFIG_64BIT - (" llgfr 0,%2 \n" - " lghi 1,1 \n" - " sll 1,24 \n" - " or 0,1 \n" - " slgr 1,1 \n" - " lgr 2,1 \n" - "0: .long 0xb2af0000 \n" - "1: ipm %0 \n" - " srl %0,28 \n" - " iihh %0,0 \n" - " iihl %0,0 \n" - " lgr %1,1 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h3 \n" - " jg 2b \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 8 \n" - " .quad 0b,3b \n" - " .quad 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat_p) - :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION) - :"cc","0","1","2","memory"); -#else - (" lr 0,%2 \n" - " lhi 1,1 \n" - " sll 1,24 \n" - " or 0,1 \n" - " slr 1,1 \n" - " lr 2,1 \n" - "0: .long 0xb2af0000 \n" - "1: ipm %0 \n" - " srl %0,28 \n" - " lr %1,1 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h3 \n" - " bras 1,4f \n" - " .long 2b \n" - "4: \n" - " l 1,0(1) \n" - " br 1 \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 4 \n" - " .long 0b,3b \n" - " .long 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat_p) - :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION) - :"cc","0","1","2","memory"); -#endif - return ccode; -} - -static inline int -sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat) -{ - int ccode; - - asm volatile -#ifdef CONFIG_64BIT - (" lgr 6,%3 \n" - " llgfr 7,%2 \n" - " llgt 0,0(6) \n" - " lghi 1,64 \n" - " sll 1,24 \n" - " or 0,1 \n" - " la 6,4(6) \n" - " llgt 2,0(6) \n" - " llgt 3,4(6) \n" - " la 6,8(6) \n" - " slr 1,1 \n" - "0: .long 0xb2ad0026 \n" - "1: brc 2,0b \n" - " ipm %0 \n" - " srl %0,28 \n" - " iihh %0,0 \n" - " iihl %0,0 \n" - " lgr %1,1 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h4 \n" - " jg 2b \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 8 \n" - " .quad 0b,3b \n" - " .quad 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat) - :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION) - :"cc","0","1","2","3","6","7","memory"); -#else - (" lr 6,%3 \n" - " lr 7,%2 \n" - " l 0,0(6) \n" - " lhi 1,64 \n" - " sll 1,24 \n" - " or 0,1 \n" - " la 6,4(6) \n" - " l 2,0(6) \n" - " l 3,4(6) \n" - " la 6,8(6) \n" - " slr 1,1 \n" - "0: .long 0xb2ad0026 \n" - "1: brc 2,0b \n" - " ipm %0 \n" - " srl %0,28 \n" - " lr %1,1 \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h4 \n" - " bras 1,4f \n" - " .long 2b \n" - "4: \n" - " l 1,0(1) \n" - " br 1 \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 4 \n" - " .long 0b,3b \n" - " .long 1b,3b \n" - ".previous" - :"=d" (ccode),"=d" (*stat) - :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION) - :"cc","0","1","2","3","6","7","memory"); -#endif - return ccode; -} - -static inline int -rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id, - struct ap_status_word *st) -{ - int ccode; - - asm volatile -#ifdef CONFIG_64BIT - (" llgfr 0,%2 \n" - " lgr 3,%4 \n" - " lgr 6,%3 \n" - " llgfr 7,%5 \n" - " lghi 1,128 \n" - " sll 1,24 \n" - " or 0,1 \n" - " slgr 1,1 \n" - " lgr 2,1 \n" - " lgr 4,1 \n" - " lgr 5,1 \n" - "0: .long 0xb2ae0046 \n" - "1: brc 2,0b \n" - " brc 4,0b \n" - " ipm %0 \n" - " srl %0,28 \n" - " iihh %0,0 \n" - " iihl %0,0 \n" - " lgr %1,1 \n" - " st 4,0(3) \n" - " st 5,4(3) \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h6 \n" - " jg 2b \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 8 \n" - " .quad 0b,3b \n" - " .quad 1b,3b \n" - ".previous" - :"=d"(ccode),"=d"(*st) - :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION) - :"cc","0","1","2","3","4","5","6","7","memory"); -#else - (" lr 0,%2 \n" - " lr 3,%4 \n" - " lr 6,%3 \n" - " lr 7,%5 \n" - " lhi 1,128 \n" - " sll 1,24 \n" - " or 0,1 \n" - " slr 1,1 \n" - " lr 2,1 \n" - " lr 4,1 \n" - " lr 5,1 \n" - "0: .long 0xb2ae0046 \n" - "1: brc 2,0b \n" - " brc 4,0b \n" - " ipm %0 \n" - " srl %0,28 \n" - " lr %1,1 \n" - " st 4,0(3) \n" - " st 5,4(3) \n" - "2: \n" - ".section .fixup,\"ax\" \n" - "3: \n" - " lhi %0,%h6 \n" - " bras 1,4f \n" - " .long 2b \n" - "4: \n" - " l 1,0(1) \n" - " br 1 \n" - ".previous \n" - ".section __ex_table,\"a\" \n" - " .align 4 \n" - " .long 0b,3b \n" - " .long 1b,3b \n" - ".previous" - :"=d"(ccode),"=d"(*st) - :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION) - :"cc","0","1","2","3","4","5","6","7","memory"); -#endif - return ccode; -} - -static inline void -itoLe2(int *i_p, unsigned char *lechars) -{ - *lechars = *((unsigned char *) i_p + sizeof(int) - 1); - *(lechars + 1) = *((unsigned char *) i_p + sizeof(int) - 2); -} - -static inline void -le2toI(unsigned char *lechars, int *i_p) -{ - unsigned char *ic_p; - *i_p = 0; - ic_p = (unsigned char *) i_p; - *(ic_p + 2) = *(lechars + 1); - *(ic_p + 3) = *(lechars); -} - -static inline int -is_empty(unsigned char *ptr, int len) -{ - return !memcmp(ptr, (unsigned char *) &static_pvt_me_key+60, len); -} - -enum hdstat -query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type) -{ - int q_nr, i, t_depth, t_dev_type; - enum devstat ccode; - struct ap_status_word stat_word; - enum hdstat stat; - int break_out; - - q_nr = (deviceNr << SKIP_BITL) + cdx; - stat = HD_BUSY; - ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word); - PDEBUG("ccode %d response_code %02X\n", ccode, stat_word.response_code); - break_out = 0; - for (i = 0; i < resetNr; i++) { - if (ccode > 3) { - PRINTKC("Exception testing device %d\n", i); - return HD_TSQ_EXCEPTION; - } - switch (ccode) { - case 0: - PDEBUG("t_dev_type %d\n", t_dev_type); - break_out = 1; - stat = HD_ONLINE; - *q_depth = t_depth + 1; - switch (t_dev_type) { - case PCICA_HW: - *dev_type = PCICA; - break; - case PCICC_HW: - *dev_type = PCICC; - break; - case PCIXCC_HW: - *dev_type = PCIXCC_UNK; - break; - case CEX2C_HW: - *dev_type = CEX2C; - break; - case CEX2A_HW: - *dev_type = CEX2A; - break; - default: - *dev_type = NILDEV; - break; - } - PDEBUG("available device %d: Q depth = %d, dev " - "type = %d, stat = %02X%02X%02X%02X\n", - deviceNr, *q_depth, *dev_type, - stat_word.q_stat_flags, - stat_word.response_code, - stat_word.reserved[0], - stat_word.reserved[1]); - break; - case 3: - switch (stat_word.response_code) { - case AP_RESPONSE_NORMAL: - stat = HD_ONLINE; - break_out = 1; - *q_depth = t_depth + 1; - *dev_type = t_dev_type; - PDEBUG("cc3, available device " - "%d: Q depth = %d, dev " - "type = %d, stat = " - "%02X%02X%02X%02X\n", - deviceNr, *q_depth, - *dev_type, - stat_word.q_stat_flags, - stat_word.response_code, - stat_word.reserved[0], - stat_word.reserved[1]); - break; - case AP_RESPONSE_Q_NOT_AVAIL: - stat = HD_NOT_THERE; - break_out = 1; - break; - case AP_RESPONSE_RESET_IN_PROGRESS: - PDEBUG("device %d in reset\n", - deviceNr); - break; - case AP_RESPONSE_DECONFIGURED: - stat = HD_DECONFIGURED; - break_out = 1; - break; - case AP_RESPONSE_CHECKSTOPPED: - stat = HD_CHECKSTOPPED; - break_out = 1; - break; - case AP_RESPONSE_BUSY: - PDEBUG("device %d busy\n", - deviceNr); - break; - default: - break; - } - break; - default: - stat = HD_NOT_THERE; - break_out = 1; - break; - } - if (break_out) - break; - - udelay(5); - - ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word); - } - return stat; -} - -enum devstat -reset_device(int deviceNr, int cdx, int resetNr) -{ - int q_nr, ccode = 0, dummy_qdepth, dummy_devType, i; - struct ap_status_word stat_word; - enum devstat stat; - int break_out; - - q_nr = (deviceNr << SKIP_BITL) + cdx; - stat = DEV_GONE; - ccode = resetq(q_nr, &stat_word); - if (ccode > 3) - return DEV_RSQ_EXCEPTION; - - break_out = 0; - for (i = 0; i < resetNr; i++) { - switch (ccode) { - case 0: - stat = DEV_ONLINE; - if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY) - break_out = 1; - break; - case 3: - switch (stat_word.response_code) { - case AP_RESPONSE_NORMAL: - stat = DEV_ONLINE; - if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY) - break_out = 1; - break; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: - stat = DEV_GONE; - break_out = 1; - break; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: - default: - break; - } - break; - default: - stat = DEV_GONE; - break_out = 1; - break; - } - if (break_out == 1) - break; - udelay(5); - - ccode = testq(q_nr, &dummy_qdepth, &dummy_devType, &stat_word); - if (ccode > 3) { - stat = DEV_TSQ_EXCEPTION; - break; - } - } - PDEBUG("Number of testq's needed for reset: %d\n", i); - - if (i >= resetNr) { - stat = DEV_GONE; - } - - return stat; -} - -#ifdef DEBUG_HYDRA_MSGS -static inline void -print_buffer(unsigned char *buffer, int bufflen) -{ - int i; - for (i = 0; i < bufflen; i += 16) { - PRINTK("%04X: %02X%02X%02X%02X %02X%02X%02X%02X " - "%02X%02X%02X%02X %02X%02X%02X%02X\n", i, - buffer[i+0], buffer[i+1], buffer[i+2], buffer[i+3], - buffer[i+4], buffer[i+5], buffer[i+6], buffer[i+7], - buffer[i+8], buffer[i+9], buffer[i+10], buffer[i+11], - buffer[i+12], buffer[i+13], buffer[i+14], buffer[i+15]); - } -} -#endif - -enum devstat -send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext) -{ - struct ap_status_word stat_word; - enum devstat stat; - int ccode; - u32 *q_nr_p = (u32 *)msg_ext; - - *q_nr_p = (dev_nr << SKIP_BITL) + cdx; - PDEBUG("msg_len passed to sen: %d\n", msg_len); - PDEBUG("q number passed to sen: %02x%02x%02x%02x\n", - msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3]); - stat = DEV_GONE; - -#ifdef DEBUG_HYDRA_MSGS - PRINTK("Request header: %02X%02X%02X%02X %02X%02X%02X%02X " - "%02X%02X%02X%02X\n", - msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3], - msg_ext[4], msg_ext[5], msg_ext[6], msg_ext[7], - msg_ext[8], msg_ext[9], msg_ext[10], msg_ext[11]); - print_buffer(msg_ext+CALLER_HEADER, msg_len); -#endif - - ccode = sen(msg_len, msg_ext, &stat_word); - if (ccode > 3) - return DEV_SEN_EXCEPTION; - - PDEBUG("nq cc: %u, st: %02x%02x%02x%02x\n", - ccode, stat_word.q_stat_flags, stat_word.response_code, - stat_word.reserved[0], stat_word.reserved[1]); - switch (ccode) { - case 0: - stat = DEV_ONLINE; - break; - case 1: - stat = DEV_GONE; - break; - case 3: - switch (stat_word.response_code) { - case AP_RESPONSE_NORMAL: - stat = DEV_ONLINE; - break; - case AP_RESPONSE_Q_FULL: - stat = DEV_QUEUE_FULL; - break; - default: - stat = DEV_GONE; - break; - } - break; - default: - stat = DEV_GONE; - break; - } - - return stat; -} - -enum devstat -receive_from_AP(int dev_nr, int cdx, int resplen, unsigned char *resp, - unsigned char *psmid) -{ - int ccode; - struct ap_status_word stat_word; - enum devstat stat; - - memset(resp, 0x00, 8); - - ccode = rec((dev_nr << SKIP_BITL) + cdx, resplen, resp, psmid, - &stat_word); - if (ccode > 3) - return DEV_REC_EXCEPTION; - - PDEBUG("dq cc: %u, st: %02x%02x%02x%02x\n", - ccode, stat_word.q_stat_flags, stat_word.response_code, - stat_word.reserved[0], stat_word.reserved[1]); - - stat = DEV_GONE; - switch (ccode) { - case 0: - stat = DEV_ONLINE; -#ifdef DEBUG_HYDRA_MSGS - print_buffer(resp, resplen); -#endif - break; - case 3: - switch (stat_word.response_code) { - case AP_RESPONSE_NORMAL: - stat = DEV_ONLINE; - break; - case AP_RESPONSE_NO_PENDING_REPLY: - if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY) - stat = DEV_EMPTY; - else - stat = DEV_NO_WORK; - break; - case AP_RESPONSE_INDEX_TOO_BIG: - case AP_RESPONSE_NO_FIRST_PART: - case AP_RESPONSE_MESSAGE_TOO_BIG: - stat = DEV_BAD_MESSAGE; - break; - default: - break; - } - break; - default: - break; - } - - return stat; -} - -static inline int -pad_msg(unsigned char *buffer, int totalLength, int msgLength) -{ - int pad_len; - - for (pad_len = 0; pad_len < (totalLength - msgLength); pad_len++) - if (buffer[pad_len] != 0x00) - break; - pad_len -= 3; - if (pad_len < 8) - return SEN_PAD_ERROR; - - buffer[0] = 0x00; - buffer[1] = 0x02; - - memcpy(buffer+2, static_pad, pad_len); - - buffer[pad_len + 2] = 0x00; - - return 0; -} - -static inline int -is_common_public_key(unsigned char *key, int len) -{ - int i; - - for (i = 0; i < len; i++) - if (key[i]) - break; - key += i; - len -= i; - if (((len == 1) && (key[0] == 3)) || - ((len == 3) && (key[0] == 1) && (key[1] == 0) && (key[2] == 1))) - return 1; - - return 0; -} - -static int -ICAMEX_msg_to_type4MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p, - union type4_msg *z90cMsg_p) -{ - int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len; - unsigned char *mod_tgt, *exp_tgt, *inp_tgt; - union type4_msg *tmp_type4_msg; - - mod_len = icaMex_p->inputdatalength; - - msg_size = ((mod_len <= 128) ? TYPE4_SME_LEN : TYPE4_LME_LEN) + - CALLER_HEADER; - - memset(z90cMsg_p, 0, msg_size); - - tmp_type4_msg = (union type4_msg *) - ((unsigned char *) z90cMsg_p + CALLER_HEADER); - - tmp_type4_msg->sme.header.msg_type_code = TYPE4_TYPE_CODE; - tmp_type4_msg->sme.header.request_code = TYPE4_REQU_CODE; - - if (mod_len <= 128) { - tmp_type4_msg->sme.header.msg_fmt = TYPE4_SME_FMT; - tmp_type4_msg->sme.header.msg_len = TYPE4_SME_LEN; - mod_tgt = tmp_type4_msg->sme.modulus; - mod_tgt_len = sizeof(tmp_type4_msg->sme.modulus); - exp_tgt = tmp_type4_msg->sme.exponent; - exp_tgt_len = sizeof(tmp_type4_msg->sme.exponent); - inp_tgt = tmp_type4_msg->sme.message; - inp_tgt_len = sizeof(tmp_type4_msg->sme.message); - } else { - tmp_type4_msg->lme.header.msg_fmt = TYPE4_LME_FMT; - tmp_type4_msg->lme.header.msg_len = TYPE4_LME_LEN; - mod_tgt = tmp_type4_msg->lme.modulus; - mod_tgt_len = sizeof(tmp_type4_msg->lme.modulus); - exp_tgt = tmp_type4_msg->lme.exponent; - exp_tgt_len = sizeof(tmp_type4_msg->lme.exponent); - inp_tgt = tmp_type4_msg->lme.message; - inp_tgt_len = sizeof(tmp_type4_msg->lme.message); - } - - mod_tgt += (mod_tgt_len - mod_len); - if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len)) - return SEN_RELEASED; - if (is_empty(mod_tgt, mod_len)) - return SEN_USER_ERROR; - exp_tgt += (exp_tgt_len - mod_len); - if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len)) - return SEN_RELEASED; - if (is_empty(exp_tgt, mod_len)) - return SEN_USER_ERROR; - inp_tgt += (inp_tgt_len - mod_len); - if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(inp_tgt, mod_len)) - return SEN_USER_ERROR; - - *z90cMsg_l_p = msg_size - CALLER_HEADER; - - return 0; -} - -static int -ICACRT_msg_to_type4CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, - int *z90cMsg_l_p, union type4_msg *z90cMsg_p) -{ - int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len, - dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len; - unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt; - union type4_msg *tmp_type4_msg; - - mod_len = icaMsg_p->inputdatalength; - short_len = mod_len / 2; - long_len = mod_len / 2 + 8; - - tmp_size = ((mod_len <= 128) ? TYPE4_SCR_LEN : TYPE4_LCR_LEN) + - CALLER_HEADER; - - memset(z90cMsg_p, 0, tmp_size); - - tmp_type4_msg = (union type4_msg *) - ((unsigned char *) z90cMsg_p + CALLER_HEADER); - - tmp_type4_msg->scr.header.msg_type_code = TYPE4_TYPE_CODE; - tmp_type4_msg->scr.header.request_code = TYPE4_REQU_CODE; - if (mod_len <= 128) { - tmp_type4_msg->scr.header.msg_fmt = TYPE4_SCR_FMT; - tmp_type4_msg->scr.header.msg_len = TYPE4_SCR_LEN; - p_tgt = tmp_type4_msg->scr.p; - p_tgt_len = sizeof(tmp_type4_msg->scr.p); - q_tgt = tmp_type4_msg->scr.q; - q_tgt_len = sizeof(tmp_type4_msg->scr.q); - dp_tgt = tmp_type4_msg->scr.dp; - dp_tgt_len = sizeof(tmp_type4_msg->scr.dp); - dq_tgt = tmp_type4_msg->scr.dq; - dq_tgt_len = sizeof(tmp_type4_msg->scr.dq); - u_tgt = tmp_type4_msg->scr.u; - u_tgt_len = sizeof(tmp_type4_msg->scr.u); - inp_tgt = tmp_type4_msg->scr.message; - inp_tgt_len = sizeof(tmp_type4_msg->scr.message); - } else { - tmp_type4_msg->lcr.header.msg_fmt = TYPE4_LCR_FMT; - tmp_type4_msg->lcr.header.msg_len = TYPE4_LCR_LEN; - p_tgt = tmp_type4_msg->lcr.p; - p_tgt_len = sizeof(tmp_type4_msg->lcr.p); - q_tgt = tmp_type4_msg->lcr.q; - q_tgt_len = sizeof(tmp_type4_msg->lcr.q); - dp_tgt = tmp_type4_msg->lcr.dp; - dp_tgt_len = sizeof(tmp_type4_msg->lcr.dp); - dq_tgt = tmp_type4_msg->lcr.dq; - dq_tgt_len = sizeof(tmp_type4_msg->lcr.dq); - u_tgt = tmp_type4_msg->lcr.u; - u_tgt_len = sizeof(tmp_type4_msg->lcr.u); - inp_tgt = tmp_type4_msg->lcr.message; - inp_tgt_len = sizeof(tmp_type4_msg->lcr.message); - } - - p_tgt += (p_tgt_len - long_len); - if (copy_from_user(p_tgt, icaMsg_p->np_prime, long_len)) - return SEN_RELEASED; - if (is_empty(p_tgt, long_len)) - return SEN_USER_ERROR; - q_tgt += (q_tgt_len - short_len); - if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len)) - return SEN_RELEASED; - if (is_empty(q_tgt, short_len)) - return SEN_USER_ERROR; - dp_tgt += (dp_tgt_len - long_len); - if (copy_from_user(dp_tgt, icaMsg_p->bp_key, long_len)) - return SEN_RELEASED; - if (is_empty(dp_tgt, long_len)) - return SEN_USER_ERROR; - dq_tgt += (dq_tgt_len - short_len); - if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len)) - return SEN_RELEASED; - if (is_empty(dq_tgt, short_len)) - return SEN_USER_ERROR; - u_tgt += (u_tgt_len - long_len); - if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv, long_len)) - return SEN_RELEASED; - if (is_empty(u_tgt, long_len)) - return SEN_USER_ERROR; - inp_tgt += (inp_tgt_len - mod_len); - if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(inp_tgt, mod_len)) - return SEN_USER_ERROR; - - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICAMEX_msg_to_type6MEX_de_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx, - int *z90cMsg_l_p, struct type6_msg *z90cMsg_p) -{ - int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l; - unsigned char *temp; - struct type6_hdr *tp6Hdr_p; - struct CPRB *cprb_p; - struct cca_private_ext_ME *key_p; - static int deprecated_msg_count = 0; - - mod_len = icaMsg_p->inputdatalength; - tmp_size = FIXED_TYPE6_ME_LEN + mod_len; - total_CPRB_len = tmp_size - sizeof(struct type6_hdr); - parmBlock_l = total_CPRB_len - sizeof(struct CPRB); - tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER; - - memset(z90cMsg_p, 0, tmp_size); - - temp = (unsigned char *)z90cMsg_p + CALLER_HEADER; - memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr)); - tp6Hdr_p = (struct type6_hdr *)temp; - tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4); - tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE; - - temp += sizeof(struct type6_hdr); - memcpy(temp, &static_cprb, sizeof(struct CPRB)); - cprb_p = (struct CPRB *) temp; - cprb_p->usage_domain[0]= (unsigned char)cdx; - itoLe2(&parmBlock_l, cprb_p->req_parml); - itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml); - - temp += sizeof(struct CPRB); - memcpy(temp, &static_pkd_function_and_rules, - sizeof(struct function_and_rules_block)); - - temp += sizeof(struct function_and_rules_block); - vud_len = 2 + icaMsg_p->inputdatalength; - itoLe2(&vud_len, temp); - - temp += 2; - if (copy_from_user(temp, icaMsg_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(temp, mod_len)) - return SEN_USER_ERROR; - - temp += mod_len; - memcpy(temp, &static_T6_keyBlock_hdr, sizeof(struct T6_keyBlock_hdr)); - - temp += sizeof(struct T6_keyBlock_hdr); - memcpy(temp, &static_pvt_me_key, sizeof(struct cca_private_ext_ME)); - key_p = (struct cca_private_ext_ME *)temp; - temp = key_p->pvtMESec.exponent + sizeof(key_p->pvtMESec.exponent) - - mod_len; - if (copy_from_user(temp, icaMsg_p->b_key, mod_len)) - return SEN_RELEASED; - if (is_empty(temp, mod_len)) - return SEN_USER_ERROR; - - if (is_common_public_key(temp, mod_len)) { - if (deprecated_msg_count < 20) { - PRINTK("Common public key used for modex decrypt\n"); - deprecated_msg_count++; - if (deprecated_msg_count == 20) - PRINTK("No longer issuing messages about common" - " public key for modex decrypt.\n"); - } - return SEN_NOT_AVAIL; - } - - temp = key_p->pvtMESec.modulus + sizeof(key_p->pvtMESec.modulus) - - mod_len; - if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len)) - return SEN_RELEASED; - if (is_empty(temp, mod_len)) - return SEN_USER_ERROR; - - key_p->pubMESec.modulus_bit_len = 8 * mod_len; - - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICAMEX_msg_to_type6MEX_en_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx, - int *z90cMsg_l_p, struct type6_msg *z90cMsg_p) -{ - int mod_len, vud_len, exp_len, key_len; - int pad_len, tmp_size, total_CPRB_len, parmBlock_l, i; - unsigned char *temp_exp, *exp_p, *temp; - struct type6_hdr *tp6Hdr_p; - struct CPRB *cprb_p; - struct cca_public_key *key_p; - struct T6_keyBlock_hdr *keyb_p; - - temp_exp = kmalloc(256, GFP_KERNEL); - if (!temp_exp) - return EGETBUFF; - mod_len = icaMsg_p->inputdatalength; - if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) { - kfree(temp_exp); - return SEN_RELEASED; - } - if (is_empty(temp_exp, mod_len)) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - - exp_p = temp_exp; - for (i = 0; i < mod_len; i++) - if (exp_p[i]) - break; - if (i >= mod_len) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - - exp_len = mod_len - i; - exp_p += i; - - PDEBUG("exp_len after computation: %08x\n", exp_len); - tmp_size = FIXED_TYPE6_ME_EN_LEN + 2 * mod_len + exp_len; - total_CPRB_len = tmp_size - sizeof(struct type6_hdr); - parmBlock_l = total_CPRB_len - sizeof(struct CPRB); - tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER; - - vud_len = 2 + mod_len; - memset(z90cMsg_p, 0, tmp_size); - - temp = (unsigned char *)z90cMsg_p + CALLER_HEADER; - memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr)); - tp6Hdr_p = (struct type6_hdr *)temp; - tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4); - tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE; - memcpy(tp6Hdr_p->function_code, static_PKE_function_code, - sizeof(static_PKE_function_code)); - temp += sizeof(struct type6_hdr); - memcpy(temp, &static_cprb, sizeof(struct CPRB)); - cprb_p = (struct CPRB *) temp; - cprb_p->usage_domain[0]= (unsigned char)cdx; - itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml); - temp += sizeof(struct CPRB); - memcpy(temp, &static_pke_function_and_rules, - sizeof(struct function_and_rules_block)); - temp += sizeof(struct function_and_rules_block); - temp += 2; - if (copy_from_user(temp, icaMsg_p->inputdata, mod_len)) { - kfree(temp_exp); - return SEN_RELEASED; - } - if (is_empty(temp, mod_len)) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - if ((temp[0] != 0x00) || (temp[1] != 0x02)) { - kfree(temp_exp); - return SEN_NOT_AVAIL; - } - for (i = 2; i < mod_len; i++) - if (temp[i] == 0x00) - break; - if ((i < 9) || (i > (mod_len - 2))) { - kfree(temp_exp); - return SEN_NOT_AVAIL; - } - pad_len = i + 1; - vud_len = mod_len - pad_len; - memmove(temp, temp+pad_len, vud_len); - temp -= 2; - vud_len += 2; - itoLe2(&vud_len, temp); - temp += (vud_len); - keyb_p = (struct T6_keyBlock_hdr *)temp; - temp += sizeof(struct T6_keyBlock_hdr); - memcpy(temp, &static_public_key, sizeof(static_public_key)); - key_p = (struct cca_public_key *)temp; - temp = key_p->pubSec.exponent; - memcpy(temp, exp_p, exp_len); - kfree(temp_exp); - temp += exp_len; - if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len)) - return SEN_RELEASED; - if (is_empty(temp, mod_len)) - return SEN_USER_ERROR; - key_p->pubSec.modulus_bit_len = 8 * mod_len; - key_p->pubSec.modulus_byte_len = mod_len; - key_p->pubSec.exponent_len = exp_len; - key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len; - key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr); - key_p->pubHdr.token_length = key_len; - key_len += 4; - itoLe2(&key_len, keyb_p->ulen); - key_len += 2; - itoLe2(&key_len, keyb_p->blen); - parmBlock_l -= pad_len; - itoLe2(&parmBlock_l, cprb_p->req_parml); - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICACRT_msg_to_type6CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx, - int *z90cMsg_l_p, struct type6_msg *z90cMsg_p) -{ - int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len; - int long_len, pad_len, keyPartsLen, tmp_l; - unsigned char *tgt_p, *temp; - struct type6_hdr *tp6Hdr_p; - struct CPRB *cprb_p; - struct cca_token_hdr *keyHdr_p; - struct cca_pvt_ext_CRT_sec *pvtSec_p; - struct cca_public_sec *pubSec_p; - - mod_len = icaMsg_p->inputdatalength; - short_len = mod_len / 2; - long_len = 8 + short_len; - keyPartsLen = 3 * long_len + 2 * short_len; - pad_len = (8 - (keyPartsLen % 8)) % 8; - keyPartsLen += pad_len + mod_len; - tmp_size = FIXED_TYPE6_CR_LEN + keyPartsLen + mod_len; - total_CPRB_len = tmp_size - sizeof(struct type6_hdr); - parmBlock_l = total_CPRB_len - sizeof(struct CPRB); - vud_len = 2 + mod_len; - tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER; - - memset(z90cMsg_p, 0, tmp_size); - tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER; - memcpy(tgt_p, &static_type6_hdr, sizeof(struct type6_hdr)); - tp6Hdr_p = (struct type6_hdr *)tgt_p; - tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4); - tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE; - tgt_p += sizeof(struct type6_hdr); - cprb_p = (struct CPRB *) tgt_p; - memcpy(tgt_p, &static_cprb, sizeof(struct CPRB)); - cprb_p->usage_domain[0]= *((unsigned char *)(&(cdx))+3); - itoLe2(&parmBlock_l, cprb_p->req_parml); - memcpy(cprb_p->rpl_parml, cprb_p->req_parml, - sizeof(cprb_p->req_parml)); - tgt_p += sizeof(struct CPRB); - memcpy(tgt_p, &static_pkd_function_and_rules, - sizeof(struct function_and_rules_block)); - tgt_p += sizeof(struct function_and_rules_block); - itoLe2(&vud_len, tgt_p); - tgt_p += 2; - if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, mod_len)) - return SEN_USER_ERROR; - tgt_p += mod_len; - tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) + - sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen; - itoLe2(&tmp_l, tgt_p); - temp = tgt_p + 2; - tmp_l -= 2; - itoLe2(&tmp_l, temp); - tgt_p += sizeof(struct T6_keyBlock_hdr); - keyHdr_p = (struct cca_token_hdr *)tgt_p; - keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT; - tmp_l -= 4; - keyHdr_p->token_length = tmp_l; - tgt_p += sizeof(struct cca_token_hdr); - pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p; - pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT; - pvtSec_p->section_length = - sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen; - pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL; - pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL; - pvtSec_p->p_len = long_len; - pvtSec_p->q_len = short_len; - pvtSec_p->dp_len = long_len; - pvtSec_p->dq_len = short_len; - pvtSec_p->u_len = long_len; - pvtSec_p->mod_len = mod_len; - pvtSec_p->pad_len = pad_len; - tgt_p += sizeof(struct cca_pvt_ext_CRT_sec); - if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, short_len)) - return SEN_USER_ERROR; - tgt_p += short_len; - if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, short_len)) - return SEN_USER_ERROR; - tgt_p += short_len; - if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - tgt_p += pad_len; - memset(tgt_p, 0xFF, mod_len); - tgt_p += mod_len; - memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec)); - pubSec_p = (struct cca_public_sec *) tgt_p; - pubSec_p->modulus_bit_len = 8 * mod_len; - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICAMEX_msg_to_type6MEX_msgX(struct ica_rsa_modexpo *icaMsg_p, int cdx, - int *z90cMsg_l_p, struct type6_msg *z90cMsg_p, - int dev_type) -{ - int mod_len, exp_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l; - int key_len, i; - unsigned char *temp_exp, *tgt_p, *temp, *exp_p; - struct type6_hdr *tp6Hdr_p; - struct CPRBX *cprbx_p; - struct cca_public_key *key_p; - struct T6_keyBlock_hdrX *keyb_p; - - temp_exp = kmalloc(256, GFP_KERNEL); - if (!temp_exp) - return EGETBUFF; - mod_len = icaMsg_p->inputdatalength; - if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) { - kfree(temp_exp); - return SEN_RELEASED; - } - if (is_empty(temp_exp, mod_len)) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - exp_p = temp_exp; - for (i = 0; i < mod_len; i++) - if (exp_p[i]) - break; - if (i >= mod_len) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - exp_len = mod_len - i; - exp_p += i; - PDEBUG("exp_len after computation: %08x\n", exp_len); - tmp_size = FIXED_TYPE6_ME_EN_LENX + 2 * mod_len + exp_len; - total_CPRB_len = tmp_size - sizeof(struct type6_hdr); - parmBlock_l = total_CPRB_len - sizeof(struct CPRBX); - tmp_size = tmp_size + CALLER_HEADER; - vud_len = 2 + mod_len; - memset(z90cMsg_p, 0, tmp_size); - tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER; - memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr)); - tp6Hdr_p = (struct type6_hdr *)tgt_p; - tp6Hdr_p->ToCardLen1 = total_CPRB_len; - tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE; - memcpy(tp6Hdr_p->function_code, static_PKE_function_code, - sizeof(static_PKE_function_code)); - tgt_p += sizeof(struct type6_hdr); - memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX)); - cprbx_p = (struct CPRBX *) tgt_p; - cprbx_p->domain = (unsigned short)cdx; - cprbx_p->rpl_msgbl = RESPONSE_CPRBX_SIZE; - tgt_p += sizeof(struct CPRBX); - if (dev_type == PCIXCC_MCL2) - memcpy(tgt_p, &static_pke_function_and_rulesX_MCL2, - sizeof(struct function_and_rules_block)); - else - memcpy(tgt_p, &static_pke_function_and_rulesX, - sizeof(struct function_and_rules_block)); - tgt_p += sizeof(struct function_and_rules_block); - - tgt_p += 2; - if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len)) { - kfree(temp_exp); - return SEN_RELEASED; - } - if (is_empty(tgt_p, mod_len)) { - kfree(temp_exp); - return SEN_USER_ERROR; - } - tgt_p -= 2; - *((short *)tgt_p) = (short) vud_len; - tgt_p += vud_len; - keyb_p = (struct T6_keyBlock_hdrX *)tgt_p; - tgt_p += sizeof(struct T6_keyBlock_hdrX); - memcpy(tgt_p, &static_public_key, sizeof(static_public_key)); - key_p = (struct cca_public_key *)tgt_p; - temp = key_p->pubSec.exponent; - memcpy(temp, exp_p, exp_len); - kfree(temp_exp); - temp += exp_len; - if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len)) - return SEN_RELEASED; - if (is_empty(temp, mod_len)) - return SEN_USER_ERROR; - key_p->pubSec.modulus_bit_len = 8 * mod_len; - key_p->pubSec.modulus_byte_len = mod_len; - key_p->pubSec.exponent_len = exp_len; - key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len; - key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr); - key_p->pubHdr.token_length = key_len; - key_len += 4; - keyb_p->ulen = (unsigned short)key_len; - key_len += 2; - keyb_p->blen = (unsigned short)key_len; - cprbx_p->req_parml = parmBlock_l; - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICACRT_msg_to_type6CRT_msgX(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx, - int *z90cMsg_l_p, struct type6_msg *z90cMsg_p, - int dev_type) -{ - int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len; - int long_len, pad_len, keyPartsLen, tmp_l; - unsigned char *tgt_p, *temp; - struct type6_hdr *tp6Hdr_p; - struct CPRBX *cprbx_p; - struct cca_token_hdr *keyHdr_p; - struct cca_pvt_ext_CRT_sec *pvtSec_p; - struct cca_public_sec *pubSec_p; - - mod_len = icaMsg_p->inputdatalength; - short_len = mod_len / 2; - long_len = 8 + short_len; - keyPartsLen = 3 * long_len + 2 * short_len; - pad_len = (8 - (keyPartsLen % 8)) % 8; - keyPartsLen += pad_len + mod_len; - tmp_size = FIXED_TYPE6_CR_LENX + keyPartsLen + mod_len; - total_CPRB_len = tmp_size - sizeof(struct type6_hdr); - parmBlock_l = total_CPRB_len - sizeof(struct CPRBX); - vud_len = 2 + mod_len; - tmp_size = tmp_size + CALLER_HEADER; - memset(z90cMsg_p, 0, tmp_size); - tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER; - memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr)); - tp6Hdr_p = (struct type6_hdr *)tgt_p; - tp6Hdr_p->ToCardLen1 = total_CPRB_len; - tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE; - tgt_p += sizeof(struct type6_hdr); - cprbx_p = (struct CPRBX *) tgt_p; - memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX)); - cprbx_p->domain = (unsigned short)cdx; - cprbx_p->req_parml = parmBlock_l; - cprbx_p->rpl_msgbl = parmBlock_l; - tgt_p += sizeof(struct CPRBX); - if (dev_type == PCIXCC_MCL2) - memcpy(tgt_p, &static_pkd_function_and_rulesX_MCL2, - sizeof(struct function_and_rules_block)); - else - memcpy(tgt_p, &static_pkd_function_and_rulesX, - sizeof(struct function_and_rules_block)); - tgt_p += sizeof(struct function_and_rules_block); - *((short *)tgt_p) = (short) vud_len; - tgt_p += 2; - if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, mod_len)) - return SEN_USER_ERROR; - tgt_p += mod_len; - tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) + - sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen; - *((short *)tgt_p) = (short) tmp_l; - temp = tgt_p + 2; - tmp_l -= 2; - *((short *)temp) = (short) tmp_l; - tgt_p += sizeof(struct T6_keyBlock_hdr); - keyHdr_p = (struct cca_token_hdr *)tgt_p; - keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT; - tmp_l -= 4; - keyHdr_p->token_length = tmp_l; - tgt_p += sizeof(struct cca_token_hdr); - pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p; - pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT; - pvtSec_p->section_length = - sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen; - pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL; - pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL; - pvtSec_p->p_len = long_len; - pvtSec_p->q_len = short_len; - pvtSec_p->dp_len = long_len; - pvtSec_p->dq_len = short_len; - pvtSec_p->u_len = long_len; - pvtSec_p->mod_len = mod_len; - pvtSec_p->pad_len = pad_len; - tgt_p += sizeof(struct cca_pvt_ext_CRT_sec); - if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, short_len)) - return SEN_USER_ERROR; - tgt_p += short_len; - if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, short_len)) - return SEN_USER_ERROR; - tgt_p += short_len; - if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len)) - return SEN_RELEASED; - if (is_empty(tgt_p, long_len)) - return SEN_USER_ERROR; - tgt_p += long_len; - tgt_p += pad_len; - memset(tgt_p, 0xFF, mod_len); - tgt_p += mod_len; - memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec)); - pubSec_p = (struct cca_public_sec *) tgt_p; - pubSec_p->modulus_bit_len = 8 * mod_len; - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -static int -ICAMEX_msg_to_type50MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p, - union type50_msg *z90cMsg_p) -{ - int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len; - unsigned char *mod_tgt, *exp_tgt, *inp_tgt; - union type50_msg *tmp_type50_msg; - - mod_len = icaMex_p->inputdatalength; - - msg_size = ((mod_len <= 128) ? TYPE50_MEB1_LEN : TYPE50_MEB2_LEN) + - CALLER_HEADER; - - memset(z90cMsg_p, 0, msg_size); - - tmp_type50_msg = (union type50_msg *) - ((unsigned char *) z90cMsg_p + CALLER_HEADER); - - tmp_type50_msg->meb1.header.msg_type_code = TYPE50_TYPE_CODE; - - if (mod_len <= 128) { - tmp_type50_msg->meb1.header.msg_len = TYPE50_MEB1_LEN; - tmp_type50_msg->meb1.keyblock_type = TYPE50_MEB1_FMT; - mod_tgt = tmp_type50_msg->meb1.modulus; - mod_tgt_len = sizeof(tmp_type50_msg->meb1.modulus); - exp_tgt = tmp_type50_msg->meb1.exponent; - exp_tgt_len = sizeof(tmp_type50_msg->meb1.exponent); - inp_tgt = tmp_type50_msg->meb1.message; - inp_tgt_len = sizeof(tmp_type50_msg->meb1.message); - } else { - tmp_type50_msg->meb2.header.msg_len = TYPE50_MEB2_LEN; - tmp_type50_msg->meb2.keyblock_type = TYPE50_MEB2_FMT; - mod_tgt = tmp_type50_msg->meb2.modulus; - mod_tgt_len = sizeof(tmp_type50_msg->meb2.modulus); - exp_tgt = tmp_type50_msg->meb2.exponent; - exp_tgt_len = sizeof(tmp_type50_msg->meb2.exponent); - inp_tgt = tmp_type50_msg->meb2.message; - inp_tgt_len = sizeof(tmp_type50_msg->meb2.message); - } - - mod_tgt += (mod_tgt_len - mod_len); - if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len)) - return SEN_RELEASED; - if (is_empty(mod_tgt, mod_len)) - return SEN_USER_ERROR; - exp_tgt += (exp_tgt_len - mod_len); - if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len)) - return SEN_RELEASED; - if (is_empty(exp_tgt, mod_len)) - return SEN_USER_ERROR; - inp_tgt += (inp_tgt_len - mod_len); - if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(inp_tgt, mod_len)) - return SEN_USER_ERROR; - - *z90cMsg_l_p = msg_size - CALLER_HEADER; - - return 0; -} - -static int -ICACRT_msg_to_type50CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, - int *z90cMsg_l_p, union type50_msg *z90cMsg_p) -{ - int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len, - dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len, long_offset; - unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt, - temp[8]; - union type50_msg *tmp_type50_msg; - - mod_len = icaMsg_p->inputdatalength; - short_len = mod_len / 2; - long_len = mod_len / 2 + 8; - long_offset = 0; - - if (long_len > 128) { - memset(temp, 0x00, sizeof(temp)); - if (copy_from_user(temp, icaMsg_p->np_prime, long_len-128)) - return SEN_RELEASED; - if (!is_empty(temp, 8)) - return SEN_NOT_AVAIL; - if (copy_from_user(temp, icaMsg_p->bp_key, long_len-128)) - return SEN_RELEASED; - if (!is_empty(temp, 8)) - return SEN_NOT_AVAIL; - if (copy_from_user(temp, icaMsg_p->u_mult_inv, long_len-128)) - return SEN_RELEASED; - if (!is_empty(temp, 8)) - return SEN_NOT_AVAIL; - long_offset = long_len - 128; - long_len = 128; - } - - tmp_size = ((long_len <= 64) ? TYPE50_CRB1_LEN : TYPE50_CRB2_LEN) + - CALLER_HEADER; - - memset(z90cMsg_p, 0, tmp_size); - - tmp_type50_msg = (union type50_msg *) - ((unsigned char *) z90cMsg_p + CALLER_HEADER); - - tmp_type50_msg->crb1.header.msg_type_code = TYPE50_TYPE_CODE; - if (long_len <= 64) { - tmp_type50_msg->crb1.header.msg_len = TYPE50_CRB1_LEN; - tmp_type50_msg->crb1.keyblock_type = TYPE50_CRB1_FMT; - p_tgt = tmp_type50_msg->crb1.p; - p_tgt_len = sizeof(tmp_type50_msg->crb1.p); - q_tgt = tmp_type50_msg->crb1.q; - q_tgt_len = sizeof(tmp_type50_msg->crb1.q); - dp_tgt = tmp_type50_msg->crb1.dp; - dp_tgt_len = sizeof(tmp_type50_msg->crb1.dp); - dq_tgt = tmp_type50_msg->crb1.dq; - dq_tgt_len = sizeof(tmp_type50_msg->crb1.dq); - u_tgt = tmp_type50_msg->crb1.u; - u_tgt_len = sizeof(tmp_type50_msg->crb1.u); - inp_tgt = tmp_type50_msg->crb1.message; - inp_tgt_len = sizeof(tmp_type50_msg->crb1.message); - } else { - tmp_type50_msg->crb2.header.msg_len = TYPE50_CRB2_LEN; - tmp_type50_msg->crb2.keyblock_type = TYPE50_CRB2_FMT; - p_tgt = tmp_type50_msg->crb2.p; - p_tgt_len = sizeof(tmp_type50_msg->crb2.p); - q_tgt = tmp_type50_msg->crb2.q; - q_tgt_len = sizeof(tmp_type50_msg->crb2.q); - dp_tgt = tmp_type50_msg->crb2.dp; - dp_tgt_len = sizeof(tmp_type50_msg->crb2.dp); - dq_tgt = tmp_type50_msg->crb2.dq; - dq_tgt_len = sizeof(tmp_type50_msg->crb2.dq); - u_tgt = tmp_type50_msg->crb2.u; - u_tgt_len = sizeof(tmp_type50_msg->crb2.u); - inp_tgt = tmp_type50_msg->crb2.message; - inp_tgt_len = sizeof(tmp_type50_msg->crb2.message); - } - - p_tgt += (p_tgt_len - long_len); - if (copy_from_user(p_tgt, icaMsg_p->np_prime + long_offset, long_len)) - return SEN_RELEASED; - if (is_empty(p_tgt, long_len)) - return SEN_USER_ERROR; - q_tgt += (q_tgt_len - short_len); - if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len)) - return SEN_RELEASED; - if (is_empty(q_tgt, short_len)) - return SEN_USER_ERROR; - dp_tgt += (dp_tgt_len - long_len); - if (copy_from_user(dp_tgt, icaMsg_p->bp_key + long_offset, long_len)) - return SEN_RELEASED; - if (is_empty(dp_tgt, long_len)) - return SEN_USER_ERROR; - dq_tgt += (dq_tgt_len - short_len); - if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len)) - return SEN_RELEASED; - if (is_empty(dq_tgt, short_len)) - return SEN_USER_ERROR; - u_tgt += (u_tgt_len - long_len); - if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv + long_offset, long_len)) - return SEN_RELEASED; - if (is_empty(u_tgt, long_len)) - return SEN_USER_ERROR; - inp_tgt += (inp_tgt_len - mod_len); - if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len)) - return SEN_RELEASED; - if (is_empty(inp_tgt, mod_len)) - return SEN_USER_ERROR; - - *z90cMsg_l_p = tmp_size - CALLER_HEADER; - - return 0; -} - -int -convert_request(unsigned char *buffer, int func, unsigned short function, - int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p) -{ - if (dev_type == PCICA) { - if (func == ICARSACRT) - return ICACRT_msg_to_type4CRT_msg( - (struct ica_rsa_modexpo_crt *) buffer, - msg_l_p, (union type4_msg *) msg_p); - else - return ICAMEX_msg_to_type4MEX_msg( - (struct ica_rsa_modexpo *) buffer, - msg_l_p, (union type4_msg *) msg_p); - } - if (dev_type == PCICC) { - if (func == ICARSACRT) - return ICACRT_msg_to_type6CRT_msg( - (struct ica_rsa_modexpo_crt *) buffer, - cdx, msg_l_p, (struct type6_msg *)msg_p); - if (function == PCI_FUNC_KEY_ENCRYPT) - return ICAMEX_msg_to_type6MEX_en_msg( - (struct ica_rsa_modexpo *) buffer, - cdx, msg_l_p, (struct type6_msg *) msg_p); - else - return ICAMEX_msg_to_type6MEX_de_msg( - (struct ica_rsa_modexpo *) buffer, - cdx, msg_l_p, (struct type6_msg *) msg_p); - } - if ((dev_type == PCIXCC_MCL2) || - (dev_type == PCIXCC_MCL3) || - (dev_type == CEX2C)) { - if (func == ICARSACRT) - return ICACRT_msg_to_type6CRT_msgX( - (struct ica_rsa_modexpo_crt *) buffer, - cdx, msg_l_p, (struct type6_msg *) msg_p, - dev_type); - else - return ICAMEX_msg_to_type6MEX_msgX( - (struct ica_rsa_modexpo *) buffer, - cdx, msg_l_p, (struct type6_msg *) msg_p, - dev_type); - } - if (dev_type == CEX2A) { - if (func == ICARSACRT) - return ICACRT_msg_to_type50CRT_msg( - (struct ica_rsa_modexpo_crt *) buffer, - msg_l_p, (union type50_msg *) msg_p); - else - return ICAMEX_msg_to_type50MEX_msg( - (struct ica_rsa_modexpo *) buffer, - msg_l_p, (union type50_msg *) msg_p); - } - - return 0; -} - -int ext_bitlens_msg_count = 0; -static inline void -unset_ext_bitlens(void) -{ - if (!ext_bitlens_msg_count) { - PRINTK("Unable to use coprocessors for extended bitlengths. " - "Using PCICAs/CEX2As (if present) for extended " - "bitlengths. This is not an error.\n"); - ext_bitlens_msg_count++; - } - ext_bitlens = 0; -} - -int -convert_response(unsigned char *response, unsigned char *buffer, - int *respbufflen_p, unsigned char *resp_buff) -{ - struct ica_rsa_modexpo *icaMsg_p = (struct ica_rsa_modexpo *) buffer; - struct error_hdr *errh_p = (struct error_hdr *) response; - struct type80_hdr *t80h_p = (struct type80_hdr *) response; - struct type84_hdr *t84h_p = (struct type84_hdr *) response; - struct type86_fmt2_msg *t86m_p = (struct type86_fmt2_msg *) response; - int reply_code, service_rc, service_rs, src_l; - unsigned char *src_p, *tgt_p; - struct CPRB *cprb_p; - struct CPRBX *cprbx_p; - - src_p = 0; - reply_code = 0; - service_rc = 0; - service_rs = 0; - src_l = 0; - switch (errh_p->type) { - case TYPE82_RSP_CODE: - case TYPE88_RSP_CODE: - reply_code = errh_p->reply_code; - src_p = (unsigned char *)errh_p; - PRINTK("Hardware error: Type %02X Message Header: " - "%02x%02x%02x%02x%02x%02x%02x%02x\n", - errh_p->type, - src_p[0], src_p[1], src_p[2], src_p[3], - src_p[4], src_p[5], src_p[6], src_p[7]); - break; - case TYPE80_RSP_CODE: - src_l = icaMsg_p->outputdatalength; - src_p = response + (int)t80h_p->len - src_l; - break; - case TYPE84_RSP_CODE: - src_l = icaMsg_p->outputdatalength; - src_p = response + (int)t84h_p->len - src_l; - break; - case TYPE86_RSP_CODE: - reply_code = t86m_p->header.reply_code; - if (reply_code != 0) - break; - cprb_p = (struct CPRB *) - (response + sizeof(struct type86_fmt2_msg)); - cprbx_p = (struct CPRBX *) cprb_p; - if (cprb_p->cprb_ver_id != 0x02) { - le2toI(cprb_p->ccp_rtcode, &service_rc); - if (service_rc != 0) { - le2toI(cprb_p->ccp_rscode, &service_rs); - if ((service_rc == 8) && (service_rs == 66)) - PDEBUG("Bad block format on PCICC\n"); - else if ((service_rc == 8) && (service_rs == 65)) - PDEBUG("Probably an even modulus on " - "PCICC\n"); - else if ((service_rc == 8) && (service_rs == 770)) { - PDEBUG("Invalid key length on PCICC\n"); - unset_ext_bitlens(); - return REC_USE_PCICA; - } - else if ((service_rc == 8) && (service_rs == 783)) { - PDEBUG("Extended bitlengths not enabled" - "on PCICC\n"); - unset_ext_bitlens(); - return REC_USE_PCICA; - } - else - PRINTK("service rc/rs (PCICC): %d/%d\n", - service_rc, service_rs); - return REC_OPERAND_INV; - } - src_p = (unsigned char *)cprb_p + sizeof(struct CPRB); - src_p += 4; - le2toI(src_p, &src_l); - src_l -= 2; - src_p += 2; - } else { - service_rc = (int)cprbx_p->ccp_rtcode; - if (service_rc != 0) { - service_rs = (int) cprbx_p->ccp_rscode; - if ((service_rc == 8) && (service_rs == 66)) - PDEBUG("Bad block format on PCIXCC\n"); - else if ((service_rc == 8) && (service_rs == 65)) - PDEBUG("Probably an even modulus on " - "PCIXCC\n"); - else if ((service_rc == 8) && (service_rs == 770)) { - PDEBUG("Invalid key length on PCIXCC\n"); - unset_ext_bitlens(); - return REC_USE_PCICA; - } - else if ((service_rc == 8) && (service_rs == 783)) { - PDEBUG("Extended bitlengths not enabled" - "on PCIXCC\n"); - unset_ext_bitlens(); - return REC_USE_PCICA; - } - else - PRINTK("service rc/rs (PCIXCC): %d/%d\n", - service_rc, service_rs); - return REC_OPERAND_INV; - } - src_p = (unsigned char *) - cprbx_p + sizeof(struct CPRBX); - src_p += 4; - src_l = (int)(*((short *) src_p)); - src_l -= 2; - src_p += 2; - } - break; - default: - src_p = (unsigned char *)errh_p; - PRINTK("Unrecognized Message Header: " - "%02x%02x%02x%02x%02x%02x%02x%02x\n", - src_p[0], src_p[1], src_p[2], src_p[3], - src_p[4], src_p[5], src_p[6], src_p[7]); - return REC_BAD_MESSAGE; - } - - if (reply_code) - switch (reply_code) { - case REP82_ERROR_MACHINE_FAILURE: - if (errh_p->type == TYPE82_RSP_CODE) - PRINTKW("Machine check failure\n"); - else - PRINTKW("Module failure\n"); - return REC_HARDWAR_ERR; - case REP82_ERROR_OPERAND_INVALID: - return REC_OPERAND_INV; - case REP88_ERROR_MESSAGE_MALFORMD: - PRINTKW("Message malformed\n"); - return REC_OPERAND_INV; - case REP82_ERROR_OPERAND_SIZE: - return REC_OPERAND_SIZE; - case REP82_ERROR_EVEN_MOD_IN_OPND: - return REC_EVEN_MOD; - case REP82_ERROR_MESSAGE_TYPE: - return WRONG_DEVICE_TYPE; - case REP82_ERROR_TRANSPORT_FAIL: - PRINTKW("Transport failed (APFS = %02X%02X%02X%02X)\n", - t86m_p->apfs[0], t86m_p->apfs[1], - t86m_p->apfs[2], t86m_p->apfs[3]); - return REC_HARDWAR_ERR; - default: - PRINTKW("reply code = %d\n", reply_code); - return REC_HARDWAR_ERR; - } - - if (service_rc != 0) - return REC_OPERAND_INV; - - if ((src_l > icaMsg_p->outputdatalength) || - (src_l > RESPBUFFSIZE) || - (src_l <= 0)) - return REC_OPERAND_SIZE; - - PDEBUG("Length returned = %d\n", src_l); - tgt_p = resp_buff + icaMsg_p->outputdatalength - src_l; - memcpy(tgt_p, src_p, src_l); - if ((errh_p->type == TYPE86_RSP_CODE) && (resp_buff < tgt_p)) { - memset(resp_buff, 0, icaMsg_p->outputdatalength - src_l); - if (pad_msg(resp_buff, icaMsg_p->outputdatalength, src_l)) - return REC_INVALID_PAD; - } - *respbufflen_p = icaMsg_p->outputdatalength; - if (*respbufflen_p == 0) - PRINTK("Zero *respbufflen_p\n"); - - return 0; -} - diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c deleted file mode 100644 index b2f20ab8431a..000000000000 --- a/drivers/s390/crypto/z90main.c +++ /dev/null @@ -1,3379 +0,0 @@ -/* - * linux/drivers/s390/crypto/z90main.c - * - * z90crypt 1.3.3 - * - * Copyright (C) 2001, 2005 IBM Corporation - * Author(s): Robert Burroughs (burrough@us.ibm.com) - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include // copy_(from|to)_user -#include -#include -#include // mdelay -#include -#include // for tasklets -#include -#include -#include -#include -#include -#include "z90crypt.h" -#include "z90common.h" - -/** - * Defaults that may be modified. - */ - -/** - * You can specify a different minor at compile time. - */ -#ifndef Z90CRYPT_MINOR -#define Z90CRYPT_MINOR MISC_DYNAMIC_MINOR -#endif - -/** - * You can specify a different domain at compile time or on the insmod - * command line. - */ -#ifndef DOMAIN_INDEX -#define DOMAIN_INDEX -1 -#endif - -/** - * This is the name under which the device is registered in /proc/modules. - */ -#define REG_NAME "z90crypt" - -/** - * Cleanup should run every CLEANUPTIME seconds and should clean up requests - * older than CLEANUPTIME seconds in the past. - */ -#ifndef CLEANUPTIME -#define CLEANUPTIME 15 -#endif - -/** - * Config should run every CONFIGTIME seconds - */ -#ifndef CONFIGTIME -#define CONFIGTIME 30 -#endif - -/** - * The first execution of the config task should take place - * immediately after initialization - */ -#ifndef INITIAL_CONFIGTIME -#define INITIAL_CONFIGTIME 1 -#endif - -/** - * Reader should run every READERTIME milliseconds - * With the 100Hz patch for s390, z90crypt can lock the system solid while - * under heavy load. We'll try to avoid that. - */ -#ifndef READERTIME -#if HZ > 1000 -#define READERTIME 2 -#else -#define READERTIME 10 -#endif -#endif - -/** - * turn long device array index into device pointer - */ -#define LONG2DEVPTR(ndx) (z90crypt.device_p[(ndx)]) - -/** - * turn short device array index into long device array index - */ -#define SHRT2LONG(ndx) (z90crypt.overall_device_x.device_index[(ndx)]) - -/** - * turn short device array index into device pointer - */ -#define SHRT2DEVPTR(ndx) LONG2DEVPTR(SHRT2LONG(ndx)) - -/** - * Status for a work-element - */ -#define STAT_DEFAULT 0x00 // request has not been processed - -#define STAT_ROUTED 0x80 // bit 7: requests get routed to specific device - // else, device is determined each write -#define STAT_FAILED 0x40 // bit 6: this bit is set if the request failed - // before being sent to the hardware. -#define STAT_WRITTEN 0x30 // bits 5-4: work to be done, not sent to device -// 0x20 // UNUSED state -#define STAT_READPEND 0x10 // bits 5-4: work done, we're returning data now -#define STAT_NOWORK 0x00 // bits off: no work on any queue -#define STAT_RDWRMASK 0x30 // mask for bits 5-4 - -/** - * Macros to check the status RDWRMASK - */ -#define CHK_RDWRMASK(statbyte) ((statbyte) & STAT_RDWRMASK) -#define SET_RDWRMASK(statbyte, newval) \ - {(statbyte) &= ~STAT_RDWRMASK; (statbyte) |= newval;} - -/** - * Audit Trail. Progress of a Work element - * audit[0]: Unless noted otherwise, these bits are all set by the process - */ -#define FP_COPYFROM 0x80 // Caller's buffer has been copied to work element -#define FP_BUFFREQ 0x40 // Low Level buffer requested -#define FP_BUFFGOT 0x20 // Low Level buffer obtained -#define FP_SENT 0x10 // Work element sent to a crypto device - // (may be set by process or by reader task) -#define FP_PENDING 0x08 // Work element placed on pending queue - // (may be set by process or by reader task) -#define FP_REQUEST 0x04 // Work element placed on request queue -#define FP_ASLEEP 0x02 // Work element about to sleep -#define FP_AWAKE 0x01 // Work element has been awakened - -/** - * audit[1]: These bits are set by the reader task and/or the cleanup task - */ -#define FP_NOTPENDING 0x80 // Work element removed from pending queue -#define FP_AWAKENING 0x40 // Caller about to be awakened -#define FP_TIMEDOUT 0x20 // Caller timed out -#define FP_RESPSIZESET 0x10 // Response size copied to work element -#define FP_RESPADDRCOPIED 0x08 // Response address copied to work element -#define FP_RESPBUFFCOPIED 0x04 // Response buffer copied to work element -#define FP_REMREQUEST 0x02 // Work element removed from request queue -#define FP_SIGNALED 0x01 // Work element was awakened by a signal - -/** - * audit[2]: unused - */ - -/** - * state of the file handle in private_data.status - */ -#define STAT_OPEN 0 -#define STAT_CLOSED 1 - -/** - * PID() expands to the process ID of the current process - */ -#define PID() (current->pid) - -/** - * Selected Constants. The number of APs and the number of devices - */ -#ifndef Z90CRYPT_NUM_APS -#define Z90CRYPT_NUM_APS 64 -#endif -#ifndef Z90CRYPT_NUM_DEVS -#define Z90CRYPT_NUM_DEVS Z90CRYPT_NUM_APS -#endif - -/** - * Buffer size for receiving responses. The maximum Response Size - * is actually the maximum request size, since in an error condition - * the request itself may be returned unchanged. - */ -#define MAX_RESPONSE_SIZE 0x0000077C - -/** - * A count and status-byte mask - */ -struct status { - int st_count; // # of enabled devices - int disabled_count; // # of disabled devices - int user_disabled_count; // # of devices disabled via proc fs - unsigned char st_mask[Z90CRYPT_NUM_APS]; // current status mask -}; - -/** - * The array of device indexes is a mechanism for fast indexing into - * a long (and sparse) array. For instance, if APs 3, 9 and 47 are - * installed, z90CDeviceIndex[0] is 3, z90CDeviceIndex[1] is 9, and - * z90CDeviceIndex[2] is 47. - */ -struct device_x { - int device_index[Z90CRYPT_NUM_DEVS]; -}; - -/** - * All devices are arranged in a single array: 64 APs - */ -struct device { - int dev_type; // PCICA, PCICC, PCIXCC_MCL2, - // PCIXCC_MCL3, CEX2C, CEX2A - enum devstat dev_stat; // current device status - int dev_self_x; // Index in array - int disabled; // Set when device is in error - int user_disabled; // Set when device is disabled by user - int dev_q_depth; // q depth - unsigned char * dev_resp_p; // Response buffer address - int dev_resp_l; // Response Buffer length - int dev_caller_count; // Number of callers - int dev_total_req_cnt; // # requests for device since load - struct list_head dev_caller_list; // List of callers -}; - -/** - * There's a struct status and a struct device_x for each device type. - */ -struct hdware_block { - struct status hdware_mask; - struct status type_mask[Z90CRYPT_NUM_TYPES]; - struct device_x type_x_addr[Z90CRYPT_NUM_TYPES]; - unsigned char device_type_array[Z90CRYPT_NUM_APS]; -}; - -/** - * z90crypt is the topmost data structure in the hierarchy. - */ -struct z90crypt { - int max_count; // Nr of possible crypto devices - struct status mask; - int q_depth_array[Z90CRYPT_NUM_DEVS]; - int dev_type_array[Z90CRYPT_NUM_DEVS]; - struct device_x overall_device_x; // array device indexes - struct device * device_p[Z90CRYPT_NUM_DEVS]; - int terminating; - int domain_established;// TRUE: domain has been found - int cdx; // Crypto Domain Index - int len; // Length of this data structure - struct hdware_block *hdware_info; -}; - -/** - * An array of these structures is pointed to from dev_caller - * The length of the array depends on the device type. For APs, - * there are 8. - * - * The caller buffer is allocated to the user at OPEN. At WRITE, - * it contains the request; at READ, the response. The function - * send_to_crypto_device converts the request to device-dependent - * form and use the caller's OPEN-allocated buffer for the response. - * - * For the contents of caller_dev_dep_req and caller_dev_dep_req_p - * because that points to it, see the discussion in z90hardware.c. - * Search for "extended request message block". - */ -struct caller { - int caller_buf_l; // length of original request - unsigned char * caller_buf_p; // Original request on WRITE - int caller_dev_dep_req_l; // len device dependent request - unsigned char * caller_dev_dep_req_p; // Device dependent form - unsigned char caller_id[8]; // caller-supplied message id - struct list_head caller_liste; - unsigned char caller_dev_dep_req[MAX_RESPONSE_SIZE]; -}; - -/** - * Function prototypes from z90hardware.c - */ -enum hdstat query_online(int deviceNr, int cdx, int resetNr, int *q_depth, - int *dev_type); -enum devstat reset_device(int deviceNr, int cdx, int resetNr); -enum devstat send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext); -enum devstat receive_from_AP(int dev_nr, int cdx, int resplen, - unsigned char *resp, unsigned char *psmid); -int convert_request(unsigned char *buffer, int func, unsigned short function, - int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p); -int convert_response(unsigned char *response, unsigned char *buffer, - int *respbufflen_p, unsigned char *resp_buff); - -/** - * Low level function prototypes - */ -static int create_z90crypt(int *cdx_p); -static int refresh_z90crypt(int *cdx_p); -static int find_crypto_devices(struct status *deviceMask); -static int create_crypto_device(int index); -static int destroy_crypto_device(int index); -static void destroy_z90crypt(void); -static int refresh_index_array(struct status *status_str, - struct device_x *index_array); -static int probe_device_type(struct device *devPtr); -static int probe_PCIXCC_type(struct device *devPtr); - -/** - * proc fs definitions - */ -static struct proc_dir_entry *z90crypt_entry; - -/** - * data structures - */ - -/** - * work_element.opener points back to this structure - */ -struct priv_data { - pid_t opener_pid; - unsigned char status; // 0: open 1: closed -}; - -/** - * A work element is allocated for each request - */ -struct work_element { - struct priv_data *priv_data; - pid_t pid; - int devindex; // index of device processing this w_e - // (If request did not specify device, - // -1 until placed onto a queue) - int devtype; - struct list_head liste; // used for requestq and pendingq - char buffer[128]; // local copy of user request - int buff_size; // size of the buffer for the request - char resp_buff[RESPBUFFSIZE]; - int resp_buff_size; - char __user * resp_addr; // address of response in user space - unsigned int funccode; // function code of request - wait_queue_head_t waitq; - unsigned long requestsent; // time at which the request was sent - atomic_t alarmrung; // wake-up signal - unsigned char caller_id[8]; // pid + counter, for this w_e - unsigned char status[1]; // bits to mark status of the request - unsigned char audit[3]; // record of work element's progress - unsigned char * requestptr; // address of request buffer - int retcode; // return code of request -}; - -/** - * High level function prototypes - */ -static int z90crypt_open(struct inode *, struct file *); -static int z90crypt_release(struct inode *, struct file *); -static ssize_t z90crypt_read(struct file *, char __user *, size_t, loff_t *); -static ssize_t z90crypt_write(struct file *, const char __user *, - size_t, loff_t *); -static long z90crypt_unlocked_ioctl(struct file *, unsigned int, unsigned long); -static long z90crypt_compat_ioctl(struct file *, unsigned int, unsigned long); - -static void z90crypt_reader_task(unsigned long); -static void z90crypt_schedule_reader_task(unsigned long); -static void z90crypt_config_task(unsigned long); -static void z90crypt_cleanup_task(unsigned long); - -static int z90crypt_status(char *, char **, off_t, int, int *, void *); -static int z90crypt_status_write(struct file *, const char __user *, - unsigned long, void *); - -/** - * Storage allocated at initialization and used throughout the life of - * this insmod - */ -static int domain = DOMAIN_INDEX; -static struct z90crypt z90crypt; -static int quiesce_z90crypt; -static spinlock_t queuespinlock; -static struct list_head request_list; -static int requestq_count; -static struct list_head pending_list; -static int pendingq_count; - -static struct tasklet_struct reader_tasklet; -static struct timer_list reader_timer; -static struct timer_list config_timer; -static struct timer_list cleanup_timer; -static atomic_t total_open; -static atomic_t z90crypt_step; - -static struct file_operations z90crypt_fops = { - .owner = THIS_MODULE, - .read = z90crypt_read, - .write = z90crypt_write, - .unlocked_ioctl = z90crypt_unlocked_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = z90crypt_compat_ioctl, -#endif - .open = z90crypt_open, - .release = z90crypt_release -}; - -static struct miscdevice z90crypt_misc_device = { - .minor = Z90CRYPT_MINOR, - .name = DEV_NAME, - .fops = &z90crypt_fops, -}; - -/** - * Documentation values. - */ -MODULE_AUTHOR("zSeries Linux Crypto Team: Robert H. Burroughs, Eric D. Rossman" - "and Jochen Roehrig"); -MODULE_DESCRIPTION("zSeries Linux Cryptographic Coprocessor device driver, " - "Copyright 2001, 2005 IBM Corporation"); -MODULE_LICENSE("GPL"); -module_param(domain, int, 0); -MODULE_PARM_DESC(domain, "domain index for device"); - -#ifdef CONFIG_COMPAT -/** - * ioctl32 conversion routines - */ -struct ica_rsa_modexpo_32 { // For 32-bit callers - compat_uptr_t inputdata; - unsigned int inputdatalength; - compat_uptr_t outputdata; - unsigned int outputdatalength; - compat_uptr_t b_key; - compat_uptr_t n_modulus; -}; - -static long -trans_modexpo32(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct ica_rsa_modexpo_32 __user *mex32u = compat_ptr(arg); - struct ica_rsa_modexpo_32 mex32k; - struct ica_rsa_modexpo __user *mex64; - long ret = 0; - unsigned int i; - - if (!access_ok(VERIFY_WRITE, mex32u, sizeof(struct ica_rsa_modexpo_32))) - return -EFAULT; - mex64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo)); - if (!access_ok(VERIFY_WRITE, mex64, sizeof(struct ica_rsa_modexpo))) - return -EFAULT; - if (copy_from_user(&mex32k, mex32u, sizeof(struct ica_rsa_modexpo_32))) - return -EFAULT; - if (__put_user(compat_ptr(mex32k.inputdata), &mex64->inputdata) || - __put_user(mex32k.inputdatalength, &mex64->inputdatalength) || - __put_user(compat_ptr(mex32k.outputdata), &mex64->outputdata) || - __put_user(mex32k.outputdatalength, &mex64->outputdatalength) || - __put_user(compat_ptr(mex32k.b_key), &mex64->b_key) || - __put_user(compat_ptr(mex32k.n_modulus), &mex64->n_modulus)) - return -EFAULT; - ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)mex64); - if (!ret) - if (__get_user(i, &mex64->outputdatalength) || - __put_user(i, &mex32u->outputdatalength)) - ret = -EFAULT; - return ret; -} - -struct ica_rsa_modexpo_crt_32 { // For 32-bit callers - compat_uptr_t inputdata; - unsigned int inputdatalength; - compat_uptr_t outputdata; - unsigned int outputdatalength; - compat_uptr_t bp_key; - compat_uptr_t bq_key; - compat_uptr_t np_prime; - compat_uptr_t nq_prime; - compat_uptr_t u_mult_inv; -}; - -static long -trans_modexpo_crt32(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct ica_rsa_modexpo_crt_32 __user *crt32u = compat_ptr(arg); - struct ica_rsa_modexpo_crt_32 crt32k; - struct ica_rsa_modexpo_crt __user *crt64; - long ret = 0; - unsigned int i; - - if (!access_ok(VERIFY_WRITE, crt32u, - sizeof(struct ica_rsa_modexpo_crt_32))) - return -EFAULT; - crt64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo_crt)); - if (!access_ok(VERIFY_WRITE, crt64, sizeof(struct ica_rsa_modexpo_crt))) - return -EFAULT; - if (copy_from_user(&crt32k, crt32u, - sizeof(struct ica_rsa_modexpo_crt_32))) - return -EFAULT; - if (__put_user(compat_ptr(crt32k.inputdata), &crt64->inputdata) || - __put_user(crt32k.inputdatalength, &crt64->inputdatalength) || - __put_user(compat_ptr(crt32k.outputdata), &crt64->outputdata) || - __put_user(crt32k.outputdatalength, &crt64->outputdatalength) || - __put_user(compat_ptr(crt32k.bp_key), &crt64->bp_key) || - __put_user(compat_ptr(crt32k.bq_key), &crt64->bq_key) || - __put_user(compat_ptr(crt32k.np_prime), &crt64->np_prime) || - __put_user(compat_ptr(crt32k.nq_prime), &crt64->nq_prime) || - __put_user(compat_ptr(crt32k.u_mult_inv), &crt64->u_mult_inv)) - return -EFAULT; - ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)crt64); - if (!ret) - if (__get_user(i, &crt64->outputdatalength) || - __put_user(i, &crt32u->outputdatalength)) - ret = -EFAULT; - return ret; -} - -static long -z90crypt_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case ICAZ90STATUS: - case Z90QUIESCE: - case Z90STAT_TOTALCOUNT: - case Z90STAT_PCICACOUNT: - case Z90STAT_PCICCCOUNT: - case Z90STAT_PCIXCCCOUNT: - case Z90STAT_PCIXCCMCL2COUNT: - case Z90STAT_PCIXCCMCL3COUNT: - case Z90STAT_CEX2CCOUNT: - case Z90STAT_REQUESTQ_COUNT: - case Z90STAT_PENDINGQ_COUNT: - case Z90STAT_TOTALOPEN_COUNT: - case Z90STAT_DOMAIN_INDEX: - case Z90STAT_STATUS_MASK: - case Z90STAT_QDEPTH_MASK: - case Z90STAT_PERDEV_REQCNT: - return z90crypt_unlocked_ioctl(filp, cmd, arg); - case ICARSAMODEXPO: - return trans_modexpo32(filp, cmd, arg); - case ICARSACRT: - return trans_modexpo_crt32(filp, cmd, arg); - default: - return -ENOIOCTLCMD; - } -} -#endif - -/** - * The module initialization code. - */ -static int __init -z90crypt_init_module(void) -{ - int result, nresult; - struct proc_dir_entry *entry; - - PDEBUG("PID %d\n", PID()); - - if ((domain < -1) || (domain > 15)) { - PRINTKW("Invalid param: domain = %d. Not loading.\n", domain); - return -EINVAL; - } - - /* Register as misc device with given minor (or get a dynamic one). */ - result = misc_register(&z90crypt_misc_device); - if (result < 0) { - PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n", - z90crypt_misc_device.minor, result); - return result; - } - - PDEBUG("Registered " DEV_NAME " with result %d\n", result); - - result = create_z90crypt(&domain); - if (result != 0) { - PRINTKW("create_z90crypt (domain index %d) failed with %d.\n", - domain, result); - result = -ENOMEM; - goto init_module_cleanup; - } - - if (result == 0) { - PRINTKN("Version %d.%d.%d loaded, built on %s %s\n", - z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT, - __DATE__, __TIME__); - PDEBUG("create_z90crypt (domain index %d) successful.\n", - domain); - } else - PRINTK("No devices at startup\n"); - - /* Initialize globals. */ - spin_lock_init(&queuespinlock); - - INIT_LIST_HEAD(&pending_list); - pendingq_count = 0; - - INIT_LIST_HEAD(&request_list); - requestq_count = 0; - - quiesce_z90crypt = 0; - - atomic_set(&total_open, 0); - atomic_set(&z90crypt_step, 0); - - /* Set up the cleanup task. */ - init_timer(&cleanup_timer); - cleanup_timer.function = z90crypt_cleanup_task; - cleanup_timer.data = 0; - cleanup_timer.expires = jiffies + (CLEANUPTIME * HZ); - add_timer(&cleanup_timer); - - /* Set up the proc file system */ - entry = create_proc_entry("driver/z90crypt", 0644, 0); - if (entry) { - entry->nlink = 1; - entry->data = 0; - entry->read_proc = z90crypt_status; - entry->write_proc = z90crypt_status_write; - } - else - PRINTK("Couldn't create z90crypt proc entry\n"); - z90crypt_entry = entry; - - /* Set up the configuration task. */ - init_timer(&config_timer); - config_timer.function = z90crypt_config_task; - config_timer.data = 0; - config_timer.expires = jiffies + (INITIAL_CONFIGTIME * HZ); - add_timer(&config_timer); - - /* Set up the reader task */ - tasklet_init(&reader_tasklet, z90crypt_reader_task, 0); - init_timer(&reader_timer); - reader_timer.function = z90crypt_schedule_reader_task; - reader_timer.data = 0; - reader_timer.expires = jiffies + (READERTIME * HZ / 1000); - add_timer(&reader_timer); - - return 0; // success - -init_module_cleanup: - if ((nresult = misc_deregister(&z90crypt_misc_device))) - PRINTK("misc_deregister failed with %d.\n", nresult); - else - PDEBUG("misc_deregister successful.\n"); - - return result; // failure -} - -/** - * The module termination code - */ -static void __exit -z90crypt_cleanup_module(void) -{ - int nresult; - - PDEBUG("PID %d\n", PID()); - - remove_proc_entry("driver/z90crypt", 0); - - if ((nresult = misc_deregister(&z90crypt_misc_device))) - PRINTK("misc_deregister failed with %d.\n", nresult); - else - PDEBUG("misc_deregister successful.\n"); - - /* Remove the tasks */ - tasklet_kill(&reader_tasklet); - del_timer(&reader_timer); - del_timer(&config_timer); - del_timer(&cleanup_timer); - - destroy_z90crypt(); - - PRINTKN("Unloaded.\n"); -} - -/** - * Functions running under a process id - * - * The I/O functions: - * z90crypt_open - * z90crypt_release - * z90crypt_read - * z90crypt_write - * z90crypt_unlocked_ioctl - * z90crypt_status - * z90crypt_status_write - * disable_card - * enable_card - * - * Helper functions: - * z90crypt_rsa - * z90crypt_prepare - * z90crypt_send - * z90crypt_process_results - * - */ -static int -z90crypt_open(struct inode *inode, struct file *filp) -{ - struct priv_data *private_data_p; - - if (quiesce_z90crypt) - return -EQUIESCE; - - private_data_p = kzalloc(sizeof(struct priv_data), GFP_KERNEL); - if (!private_data_p) { - PRINTK("Memory allocate failed\n"); - return -ENOMEM; - } - - private_data_p->status = STAT_OPEN; - private_data_p->opener_pid = PID(); - filp->private_data = private_data_p; - atomic_inc(&total_open); - - return 0; -} - -static int -z90crypt_release(struct inode *inode, struct file *filp) -{ - struct priv_data *private_data_p = filp->private_data; - - PDEBUG("PID %d (filp %p)\n", PID(), filp); - - private_data_p->status = STAT_CLOSED; - memset(private_data_p, 0, sizeof(struct priv_data)); - kfree(private_data_p); - atomic_dec(&total_open); - - return 0; -} - -/* - * there are two read functions, of which compile options will choose one - * without USE_GET_RANDOM_BYTES - * => read() always returns -EPERM; - * otherwise - * => read() uses get_random_bytes() kernel function - */ -#ifndef USE_GET_RANDOM_BYTES -/** - * z90crypt_read will not be supported beyond z90crypt 1.3.1 - */ -static ssize_t -z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) -{ - PDEBUG("filp %p (PID %d)\n", filp, PID()); - return -EPERM; -} -#else // we want to use get_random_bytes -/** - * read() just returns a string of random bytes. Since we have no way - * to generate these cryptographically, we just execute get_random_bytes - * for the length specified. - */ -#include -static ssize_t -z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) -{ - unsigned char *temp_buff; - - PDEBUG("filp %p (PID %d)\n", filp, PID()); - - if (quiesce_z90crypt) - return -EQUIESCE; - if (count < 0) { - PRINTK("Requested random byte count negative: %ld\n", count); - return -EINVAL; - } - if (count > RESPBUFFSIZE) { - PDEBUG("count[%d] > RESPBUFFSIZE", count); - return -EINVAL; - } - if (count == 0) - return 0; - temp_buff = kmalloc(RESPBUFFSIZE, GFP_KERNEL); - if (!temp_buff) { - PRINTK("Memory allocate failed\n"); - return -ENOMEM; - } - get_random_bytes(temp_buff, count); - - if (copy_to_user(buf, temp_buff, count) != 0) { - kfree(temp_buff); - return -EFAULT; - } - kfree(temp_buff); - return count; -} -#endif - -/** - * Write is is not allowed - */ -static ssize_t -z90crypt_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) -{ - PDEBUG("filp %p (PID %d)\n", filp, PID()); - return -EPERM; -} - -/** - * New status functions - */ -static inline int -get_status_totalcount(void) -{ - return z90crypt.hdware_info->hdware_mask.st_count; -} - -static inline int -get_status_PCICAcount(void) -{ - return z90crypt.hdware_info->type_mask[PCICA].st_count; -} - -static inline int -get_status_PCICCcount(void) -{ - return z90crypt.hdware_info->type_mask[PCICC].st_count; -} - -static inline int -get_status_PCIXCCcount(void) -{ - return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count + - z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count; -} - -static inline int -get_status_PCIXCCMCL2count(void) -{ - return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count; -} - -static inline int -get_status_PCIXCCMCL3count(void) -{ - return z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count; -} - -static inline int -get_status_CEX2Ccount(void) -{ - return z90crypt.hdware_info->type_mask[CEX2C].st_count; -} - -static inline int -get_status_CEX2Acount(void) -{ - return z90crypt.hdware_info->type_mask[CEX2A].st_count; -} - -static inline int -get_status_requestq_count(void) -{ - return requestq_count; -} - -static inline int -get_status_pendingq_count(void) -{ - return pendingq_count; -} - -static inline int -get_status_totalopen_count(void) -{ - return atomic_read(&total_open); -} - -static inline int -get_status_domain_index(void) -{ - return z90crypt.cdx; -} - -static inline unsigned char * -get_status_status_mask(unsigned char status[Z90CRYPT_NUM_APS]) -{ - int i, ix; - - memcpy(status, z90crypt.hdware_info->device_type_array, - Z90CRYPT_NUM_APS); - - for (i = 0; i < get_status_totalcount(); i++) { - ix = SHRT2LONG(i); - if (LONG2DEVPTR(ix)->user_disabled) - status[ix] = 0x0d; - } - - return status; -} - -static inline unsigned char * -get_status_qdepth_mask(unsigned char qdepth[Z90CRYPT_NUM_APS]) -{ - int i, ix; - - memset(qdepth, 0, Z90CRYPT_NUM_APS); - - for (i = 0; i < get_status_totalcount(); i++) { - ix = SHRT2LONG(i); - qdepth[ix] = LONG2DEVPTR(ix)->dev_caller_count; - } - - return qdepth; -} - -static inline unsigned int * -get_status_perdevice_reqcnt(unsigned int reqcnt[Z90CRYPT_NUM_APS]) -{ - int i, ix; - - memset(reqcnt, 0, Z90CRYPT_NUM_APS * sizeof(int)); - - for (i = 0; i < get_status_totalcount(); i++) { - ix = SHRT2LONG(i); - reqcnt[ix] = LONG2DEVPTR(ix)->dev_total_req_cnt; - } - - return reqcnt; -} - -static inline void -init_work_element(struct work_element *we_p, - struct priv_data *priv_data, pid_t pid) -{ - int step; - - we_p->requestptr = (unsigned char *)we_p + sizeof(struct work_element); - /* Come up with a unique id for this caller. */ - step = atomic_inc_return(&z90crypt_step); - memcpy(we_p->caller_id+0, (void *) &pid, sizeof(pid)); - memcpy(we_p->caller_id+4, (void *) &step, sizeof(step)); - we_p->pid = pid; - we_p->priv_data = priv_data; - we_p->status[0] = STAT_DEFAULT; - we_p->audit[0] = 0x00; - we_p->audit[1] = 0x00; - we_p->audit[2] = 0x00; - we_p->resp_buff_size = 0; - we_p->retcode = 0; - we_p->devindex = -1; - we_p->devtype = -1; - atomic_set(&we_p->alarmrung, 0); - init_waitqueue_head(&we_p->waitq); - INIT_LIST_HEAD(&(we_p->liste)); -} - -static inline int -allocate_work_element(struct work_element **we_pp, - struct priv_data *priv_data_p, pid_t pid) -{ - struct work_element *we_p; - - we_p = (struct work_element *) get_zeroed_page(GFP_KERNEL); - if (!we_p) - return -ENOMEM; - init_work_element(we_p, priv_data_p, pid); - *we_pp = we_p; - return 0; -} - -static inline void -remove_device(struct device *device_p) -{ - if (!device_p || (device_p->disabled != 0)) - return; - device_p->disabled = 1; - z90crypt.hdware_info->type_mask[device_p->dev_type].disabled_count++; - z90crypt.hdware_info->hdware_mask.disabled_count++; -} - -/** - * Bitlength limits for each card - * - * There are new MCLs which allow more bitlengths. See the table for details. - * The MCL must be applied and the newer bitlengths enabled for these to work. - * - * Card Type Old limit New limit - * PCICA ??-2048 same (the lower limit is less than 128 bit...) - * PCICC 512-1024 512-2048 - * PCIXCC_MCL2 512-2048 ----- (applying any GA LIC will make an MCL3 card) - * PCIXCC_MCL3 ----- 128-2048 - * CEX2C 512-2048 128-2048 - * CEX2A ??-2048 same (the lower limit is less than 128 bit...) - * - * ext_bitlens (extended bitlengths) is a global, since you should not apply an - * MCL to just one card in a machine. We assume, at first, that all cards have - * these capabilities. - */ -int ext_bitlens = 1; // This is global -#define PCIXCC_MIN_MOD_SIZE 16 // 128 bits -#define OLD_PCIXCC_MIN_MOD_SIZE 64 // 512 bits -#define PCICC_MIN_MOD_SIZE 64 // 512 bits -#define OLD_PCICC_MAX_MOD_SIZE 128 // 1024 bits -#define MAX_MOD_SIZE 256 // 2048 bits - -static inline int -select_device_type(int *dev_type_p, int bytelength) -{ - static int count = 0; - int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, CEX2A_avail, - index_to_use; - struct status *stat; - if ((*dev_type_p != PCICC) && (*dev_type_p != PCICA) && - (*dev_type_p != PCIXCC_MCL2) && (*dev_type_p != PCIXCC_MCL3) && - (*dev_type_p != CEX2C) && (*dev_type_p != CEX2A) && - (*dev_type_p != ANYDEV)) - return -1; - if (*dev_type_p != ANYDEV) { - stat = &z90crypt.hdware_info->type_mask[*dev_type_p]; - if (stat->st_count > - (stat->disabled_count + stat->user_disabled_count)) - return 0; - return -1; - } - - /** - * Assumption: PCICA, PCIXCC_MCL3, CEX2C, and CEX2A are all similar in - * speed. - * - * PCICA and CEX2A do NOT co-exist, so it would be either one or the - * other present. - */ - stat = &z90crypt.hdware_info->type_mask[PCICA]; - PCICA_avail = stat->st_count - - (stat->disabled_count + stat->user_disabled_count); - stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL3]; - PCIXCC_MCL3_avail = stat->st_count - - (stat->disabled_count + stat->user_disabled_count); - stat = &z90crypt.hdware_info->type_mask[CEX2C]; - CEX2C_avail = stat->st_count - - (stat->disabled_count + stat->user_disabled_count); - stat = &z90crypt.hdware_info->type_mask[CEX2A]; - CEX2A_avail = stat->st_count - - (stat->disabled_count + stat->user_disabled_count); - if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail || CEX2A_avail) { - /** - * bitlength is a factor, PCICA or CEX2A are the most capable, - * even with the new MCL for PCIXCC. - */ - if ((bytelength < PCIXCC_MIN_MOD_SIZE) || - (!ext_bitlens && (bytelength < OLD_PCIXCC_MIN_MOD_SIZE))) { - if (PCICA_avail) { - *dev_type_p = PCICA; - return 0; - } - if (CEX2A_avail) { - *dev_type_p = CEX2A; - return 0; - } - return -1; - } - - index_to_use = count % (PCICA_avail + PCIXCC_MCL3_avail + - CEX2C_avail + CEX2A_avail); - if (index_to_use < PCICA_avail) - *dev_type_p = PCICA; - else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail)) - *dev_type_p = PCIXCC_MCL3; - else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail + - CEX2C_avail)) - *dev_type_p = CEX2C; - else - *dev_type_p = CEX2A; - count++; - return 0; - } - - /* Less than OLD_PCIXCC_MIN_MOD_SIZE cannot go to a PCIXCC_MCL2 */ - if (bytelength < OLD_PCIXCC_MIN_MOD_SIZE) - return -1; - stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL2]; - if (stat->st_count > - (stat->disabled_count + stat->user_disabled_count)) { - *dev_type_p = PCIXCC_MCL2; - return 0; - } - - /** - * Less than PCICC_MIN_MOD_SIZE or more than OLD_PCICC_MAX_MOD_SIZE - * (if we don't have the MCL applied and the newer bitlengths enabled) - * cannot go to a PCICC - */ - if ((bytelength < PCICC_MIN_MOD_SIZE) || - (!ext_bitlens && (bytelength > OLD_PCICC_MAX_MOD_SIZE))) { - return -1; - } - stat = &z90crypt.hdware_info->type_mask[PCICC]; - if (stat->st_count > - (stat->disabled_count + stat->user_disabled_count)) { - *dev_type_p = PCICC; - return 0; - } - - return -1; -} - -/** - * Try the selected number, then the selected type (can be ANYDEV) - */ -static inline int -select_device(int *dev_type_p, int *device_nr_p, int bytelength) -{ - int i, indx, devTp, low_count, low_indx; - struct device_x *index_p; - struct device *dev_ptr; - - PDEBUG("device type = %d, index = %d\n", *dev_type_p, *device_nr_p); - if ((*device_nr_p >= 0) && (*device_nr_p < Z90CRYPT_NUM_DEVS)) { - PDEBUG("trying index = %d\n", *device_nr_p); - dev_ptr = z90crypt.device_p[*device_nr_p]; - - if (dev_ptr && - (dev_ptr->dev_stat != DEV_GONE) && - (dev_ptr->disabled == 0) && - (dev_ptr->user_disabled == 0)) { - PDEBUG("selected by number, index = %d\n", - *device_nr_p); - *dev_type_p = dev_ptr->dev_type; - return *device_nr_p; - } - } - *device_nr_p = -1; - PDEBUG("trying type = %d\n", *dev_type_p); - devTp = *dev_type_p; - if (select_device_type(&devTp, bytelength) == -1) { - PDEBUG("failed to select by type\n"); - return -1; - } - PDEBUG("selected type = %d\n", devTp); - index_p = &z90crypt.hdware_info->type_x_addr[devTp]; - low_count = 0x0000FFFF; - low_indx = -1; - for (i = 0; i < z90crypt.hdware_info->type_mask[devTp].st_count; i++) { - indx = index_p->device_index[i]; - dev_ptr = z90crypt.device_p[indx]; - if (dev_ptr && - (dev_ptr->dev_stat != DEV_GONE) && - (dev_ptr->disabled == 0) && - (dev_ptr->user_disabled == 0) && - (devTp == dev_ptr->dev_type) && - (low_count > dev_ptr->dev_caller_count)) { - low_count = dev_ptr->dev_caller_count; - low_indx = indx; - } - } - *device_nr_p = low_indx; - return low_indx; -} - -static inline int -send_to_crypto_device(struct work_element *we_p) -{ - struct caller *caller_p; - struct device *device_p; - int dev_nr; - int bytelen = ((struct ica_rsa_modexpo *)we_p->buffer)->inputdatalength; - - if (!we_p->requestptr) - return SEN_FATAL_ERROR; - caller_p = (struct caller *)we_p->requestptr; - dev_nr = we_p->devindex; - if (select_device(&we_p->devtype, &dev_nr, bytelen) == -1) { - if (z90crypt.hdware_info->hdware_mask.st_count != 0) - return SEN_RETRY; - else - return SEN_NOT_AVAIL; - } - we_p->devindex = dev_nr; - device_p = z90crypt.device_p[dev_nr]; - if (!device_p) - return SEN_NOT_AVAIL; - if (device_p->dev_type != we_p->devtype) - return SEN_RETRY; - if (device_p->dev_caller_count >= device_p->dev_q_depth) - return SEN_QUEUE_FULL; - PDEBUG("device number prior to send: %d\n", dev_nr); - switch (send_to_AP(dev_nr, z90crypt.cdx, - caller_p->caller_dev_dep_req_l, - caller_p->caller_dev_dep_req_p)) { - case DEV_SEN_EXCEPTION: - PRINTKC("Exception during send to device %d\n", dev_nr); - z90crypt.terminating = 1; - return SEN_FATAL_ERROR; - case DEV_GONE: - PRINTK("Device %d not available\n", dev_nr); - remove_device(device_p); - return SEN_NOT_AVAIL; - case DEV_EMPTY: - return SEN_NOT_AVAIL; - case DEV_NO_WORK: - return SEN_FATAL_ERROR; - case DEV_BAD_MESSAGE: - return SEN_USER_ERROR; - case DEV_QUEUE_FULL: - return SEN_QUEUE_FULL; - default: - case DEV_ONLINE: - break; - } - list_add_tail(&(caller_p->caller_liste), &(device_p->dev_caller_list)); - device_p->dev_caller_count++; - return 0; -} - -/** - * Send puts the user's work on one of two queues: - * the pending queue if the send was successful - * the request queue if the send failed because device full or busy - */ -static inline int -z90crypt_send(struct work_element *we_p, const char *buf) -{ - int rv; - - PDEBUG("PID %d\n", PID()); - - if (CHK_RDWRMASK(we_p->status[0]) != STAT_NOWORK) { - PDEBUG("PID %d tried to send more work but has outstanding " - "work.\n", PID()); - return -EWORKPEND; - } - we_p->devindex = -1; // Reset device number - spin_lock_irq(&queuespinlock); - rv = send_to_crypto_device(we_p); - switch (rv) { - case 0: - we_p->requestsent = jiffies; - we_p->audit[0] |= FP_SENT; - list_add_tail(&we_p->liste, &pending_list); - ++pendingq_count; - we_p->audit[0] |= FP_PENDING; - break; - case SEN_BUSY: - case SEN_QUEUE_FULL: - rv = 0; - we_p->devindex = -1; // any device will do - we_p->requestsent = jiffies; - list_add_tail(&we_p->liste, &request_list); - ++requestq_count; - we_p->audit[0] |= FP_REQUEST; - break; - case SEN_RETRY: - rv = -ERESTARTSYS; - break; - case SEN_NOT_AVAIL: - PRINTK("*** No devices available.\n"); - rv = we_p->retcode = -ENODEV; - we_p->status[0] |= STAT_FAILED; - break; - case REC_OPERAND_INV: - case REC_OPERAND_SIZE: - case REC_EVEN_MOD: - case REC_INVALID_PAD: - rv = we_p->retcode = -EINVAL; - we_p->status[0] |= STAT_FAILED; - break; - default: - we_p->retcode = rv; - we_p->status[0] |= STAT_FAILED; - break; - } - if (rv != -ERESTARTSYS) - SET_RDWRMASK(we_p->status[0], STAT_WRITTEN); - spin_unlock_irq(&queuespinlock); - if (rv == 0) - tasklet_schedule(&reader_tasklet); - return rv; -} - -/** - * process_results copies the user's work from kernel space. - */ -static inline int -z90crypt_process_results(struct work_element *we_p, char __user *buf) -{ - int rv; - - PDEBUG("we_p %p (PID %d)\n", we_p, PID()); - - LONG2DEVPTR(we_p->devindex)->dev_total_req_cnt++; - SET_RDWRMASK(we_p->status[0], STAT_READPEND); - - rv = 0; - if (!we_p->buffer) { - PRINTK("we_p %p PID %d in STAT_READPEND: buffer NULL.\n", - we_p, PID()); - rv = -ENOBUFF; - } - - if (!rv) - if ((rv = copy_to_user(buf, we_p->buffer, we_p->buff_size))) { - PDEBUG("copy_to_user failed: rv = %d\n", rv); - rv = -EFAULT; - } - - if (!rv) - rv = we_p->retcode; - if (!rv) - if (we_p->resp_buff_size - && copy_to_user(we_p->resp_addr, we_p->resp_buff, - we_p->resp_buff_size)) - rv = -EFAULT; - - SET_RDWRMASK(we_p->status[0], STAT_NOWORK); - return rv; -} - -static unsigned char NULL_psmid[8] = -{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - -/** - * Used in device configuration functions - */ -#define MAX_RESET 90 - -/** - * This is used only for PCICC support - */ -static inline int -is_PKCS11_padded(unsigned char *buffer, int length) -{ - int i; - if ((buffer[0] != 0x00) || (buffer[1] != 0x01)) - return 0; - for (i = 2; i < length; i++) - if (buffer[i] != 0xFF) - break; - if ((i < 10) || (i == length)) - return 0; - if (buffer[i] != 0x00) - return 0; - return 1; -} - -/** - * This is used only for PCICC support - */ -static inline int -is_PKCS12_padded(unsigned char *buffer, int length) -{ - int i; - if ((buffer[0] != 0x00) || (buffer[1] != 0x02)) - return 0; - for (i = 2; i < length; i++) - if (buffer[i] == 0x00) - break; - if ((i < 10) || (i == length)) - return 0; - if (buffer[i] != 0x00) - return 0; - return 1; -} - -/** - * builds struct caller and converts message from generic format to - * device-dependent format - * func is ICARSAMODEXPO or ICARSACRT - * function is PCI_FUNC_KEY_ENCRYPT or PCI_FUNC_KEY_DECRYPT - */ -static inline int -build_caller(struct work_element *we_p, short function) -{ - int rv; - struct caller *caller_p = (struct caller *)we_p->requestptr; - - if ((we_p->devtype != PCICC) && (we_p->devtype != PCICA) && - (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) && - (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A)) - return SEN_NOT_AVAIL; - - memcpy(caller_p->caller_id, we_p->caller_id, - sizeof(caller_p->caller_id)); - caller_p->caller_dev_dep_req_p = caller_p->caller_dev_dep_req; - caller_p->caller_dev_dep_req_l = MAX_RESPONSE_SIZE; - caller_p->caller_buf_p = we_p->buffer; - INIT_LIST_HEAD(&(caller_p->caller_liste)); - - rv = convert_request(we_p->buffer, we_p->funccode, function, - z90crypt.cdx, we_p->devtype, - &caller_p->caller_dev_dep_req_l, - caller_p->caller_dev_dep_req_p); - if (rv) { - if (rv == SEN_NOT_AVAIL) - PDEBUG("request can't be processed on hdwr avail\n"); - else - PRINTK("Error from convert_request: %d\n", rv); - } - else - memcpy(&(caller_p->caller_dev_dep_req_p[4]), we_p->caller_id,8); - return rv; -} - -static inline void -unbuild_caller(struct device *device_p, struct caller *caller_p) -{ - if (!caller_p) - return; - if (caller_p->caller_liste.next && caller_p->caller_liste.prev) - if (!list_empty(&caller_p->caller_liste)) { - list_del_init(&caller_p->caller_liste); - device_p->dev_caller_count--; - } - memset(caller_p->caller_id, 0, sizeof(caller_p->caller_id)); -} - -static inline int -get_crypto_request_buffer(struct work_element *we_p) -{ - struct ica_rsa_modexpo *mex_p; - struct ica_rsa_modexpo_crt *crt_p; - unsigned char *temp_buffer; - short function; - int rv; - - mex_p = (struct ica_rsa_modexpo *) we_p->buffer; - crt_p = (struct ica_rsa_modexpo_crt *) we_p->buffer; - - PDEBUG("device type input = %d\n", we_p->devtype); - - if (z90crypt.terminating) - return REC_NO_RESPONSE; - if (memcmp(we_p->caller_id, NULL_psmid, 8) == 0) { - PRINTK("psmid zeroes\n"); - return SEN_FATAL_ERROR; - } - if (!we_p->buffer) { - PRINTK("buffer pointer NULL\n"); - return SEN_USER_ERROR; - } - if (!we_p->requestptr) { - PRINTK("caller pointer NULL\n"); - return SEN_USER_ERROR; - } - - if ((we_p->devtype != PCICA) && (we_p->devtype != PCICC) && - (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) && - (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A) && - (we_p->devtype != ANYDEV)) { - PRINTK("invalid device type\n"); - return SEN_USER_ERROR; - } - - if ((mex_p->inputdatalength < 1) || - (mex_p->inputdatalength > MAX_MOD_SIZE)) { - PRINTK("inputdatalength[%d] is not valid\n", - mex_p->inputdatalength); - return SEN_USER_ERROR; - } - - if (mex_p->outputdatalength < mex_p->inputdatalength) { - PRINTK("outputdatalength[%d] < inputdatalength[%d]\n", - mex_p->outputdatalength, mex_p->inputdatalength); - return SEN_USER_ERROR; - } - - if (!mex_p->inputdata || !mex_p->outputdata) { - PRINTK("inputdata[%p] or outputdata[%p] is NULL\n", - mex_p->outputdata, mex_p->inputdata); - return SEN_USER_ERROR; - } - - /** - * As long as outputdatalength is big enough, we can set the - * outputdatalength equal to the inputdatalength, since that is the - * number of bytes we will copy in any case - */ - mex_p->outputdatalength = mex_p->inputdatalength; - - rv = 0; - switch (we_p->funccode) { - case ICARSAMODEXPO: - if (!mex_p->b_key || !mex_p->n_modulus) - rv = SEN_USER_ERROR; - break; - case ICARSACRT: - if (!IS_EVEN(crt_p->inputdatalength)) { - PRINTK("inputdatalength[%d] is odd, CRT form\n", - crt_p->inputdatalength); - rv = SEN_USER_ERROR; - break; - } - if (!crt_p->bp_key || - !crt_p->bq_key || - !crt_p->np_prime || - !crt_p->nq_prime || - !crt_p->u_mult_inv) { - PRINTK("CRT form, bad data: %p/%p/%p/%p/%p\n", - crt_p->bp_key, crt_p->bq_key, - crt_p->np_prime, crt_p->nq_prime, - crt_p->u_mult_inv); - rv = SEN_USER_ERROR; - } - break; - default: - PRINTK("bad func = %d\n", we_p->funccode); - rv = SEN_USER_ERROR; - break; - } - if (rv != 0) - return rv; - - if (select_device_type(&we_p->devtype, mex_p->inputdatalength) < 0) - return SEN_NOT_AVAIL; - - temp_buffer = (unsigned char *)we_p + sizeof(struct work_element) + - sizeof(struct caller); - if (copy_from_user(temp_buffer, mex_p->inputdata, - mex_p->inputdatalength) != 0) - return SEN_RELEASED; - - function = PCI_FUNC_KEY_ENCRYPT; - switch (we_p->devtype) { - /* PCICA and CEX2A do everything with a simple RSA mod-expo operation */ - case PCICA: - case CEX2A: - function = PCI_FUNC_KEY_ENCRYPT; - break; - /** - * PCIXCC_MCL2 does all Mod-Expo form with a simple RSA mod-expo - * operation, and all CRT forms with a PKCS-1.2 format decrypt. - * PCIXCC_MCL3 and CEX2C do all Mod-Expo and CRT forms with a simple RSA - * mod-expo operation - */ - case PCIXCC_MCL2: - if (we_p->funccode == ICARSAMODEXPO) - function = PCI_FUNC_KEY_ENCRYPT; - else - function = PCI_FUNC_KEY_DECRYPT; - break; - case PCIXCC_MCL3: - case CEX2C: - if (we_p->funccode == ICARSAMODEXPO) - function = PCI_FUNC_KEY_ENCRYPT; - else - function = PCI_FUNC_KEY_DECRYPT; - break; - /** - * PCICC does everything as a PKCS-1.2 format request - */ - case PCICC: - /* PCICC cannot handle input that is is PKCS#1.1 padded */ - if (is_PKCS11_padded(temp_buffer, mex_p->inputdatalength)) { - return SEN_NOT_AVAIL; - } - if (we_p->funccode == ICARSAMODEXPO) { - if (is_PKCS12_padded(temp_buffer, - mex_p->inputdatalength)) - function = PCI_FUNC_KEY_ENCRYPT; - else - function = PCI_FUNC_KEY_DECRYPT; - } else - /* all CRT forms are decrypts */ - function = PCI_FUNC_KEY_DECRYPT; - break; - } - PDEBUG("function: %04x\n", function); - rv = build_caller(we_p, function); - PDEBUG("rv from build_caller = %d\n", rv); - return rv; -} - -static inline int -z90crypt_prepare(struct work_element *we_p, unsigned int funccode, - const char __user *buffer) -{ - int rv; - - we_p->devindex = -1; - if (funccode == ICARSAMODEXPO) - we_p->buff_size = sizeof(struct ica_rsa_modexpo); - else - we_p->buff_size = sizeof(struct ica_rsa_modexpo_crt); - - if (copy_from_user(we_p->buffer, buffer, we_p->buff_size)) - return -EFAULT; - - we_p->audit[0] |= FP_COPYFROM; - SET_RDWRMASK(we_p->status[0], STAT_WRITTEN); - we_p->funccode = funccode; - we_p->devtype = -1; - we_p->audit[0] |= FP_BUFFREQ; - rv = get_crypto_request_buffer(we_p); - switch (rv) { - case 0: - we_p->audit[0] |= FP_BUFFGOT; - break; - case SEN_USER_ERROR: - rv = -EINVAL; - break; - case SEN_QUEUE_FULL: - rv = 0; - break; - case SEN_RELEASED: - rv = -EFAULT; - break; - case REC_NO_RESPONSE: - rv = -ENODEV; - break; - case SEN_NOT_AVAIL: - case EGETBUFF: - rv = -EGETBUFF; - break; - default: - PRINTK("rv = %d\n", rv); - rv = -EGETBUFF; - break; - } - if (CHK_RDWRMASK(we_p->status[0]) == STAT_WRITTEN) - SET_RDWRMASK(we_p->status[0], STAT_DEFAULT); - return rv; -} - -static inline void -purge_work_element(struct work_element *we_p) -{ - struct list_head *lptr; - - spin_lock_irq(&queuespinlock); - list_for_each(lptr, &request_list) { - if (lptr == &we_p->liste) { - list_del_init(lptr); - requestq_count--; - break; - } - } - list_for_each(lptr, &pending_list) { - if (lptr == &we_p->liste) { - list_del_init(lptr); - pendingq_count--; - break; - } - } - spin_unlock_irq(&queuespinlock); -} - -/** - * Build the request and send it. - */ -static inline int -z90crypt_rsa(struct priv_data *private_data_p, pid_t pid, - unsigned int cmd, unsigned long arg) -{ - struct work_element *we_p; - int rv; - - if ((rv = allocate_work_element(&we_p, private_data_p, pid))) { - PDEBUG("PID %d: allocate_work_element returned ENOMEM\n", pid); - return rv; - } - if ((rv = z90crypt_prepare(we_p, cmd, (const char __user *)arg))) - PDEBUG("PID %d: rv = %d from z90crypt_prepare\n", pid, rv); - if (!rv) - if ((rv = z90crypt_send(we_p, (const char *)arg))) - PDEBUG("PID %d: rv %d from z90crypt_send.\n", pid, rv); - if (!rv) { - we_p->audit[0] |= FP_ASLEEP; - wait_event(we_p->waitq, atomic_read(&we_p->alarmrung)); - we_p->audit[0] |= FP_AWAKE; - rv = we_p->retcode; - } - if (!rv) - rv = z90crypt_process_results(we_p, (char __user *)arg); - - if ((we_p->status[0] & STAT_FAILED)) { - switch (rv) { - /** - * EINVAL *after* receive is almost always a padding error or - * length error issued by a coprocessor (not an accelerator). - * We convert this return value to -EGETBUFF which should - * trigger a fallback to software. - */ - case -EINVAL: - if ((we_p->devtype != PCICA) && - (we_p->devtype != CEX2A)) - rv = -EGETBUFF; - break; - case -ETIMEOUT: - if (z90crypt.mask.st_count > 0) - rv = -ERESTARTSYS; // retry with another - else - rv = -ENODEV; // no cards left - /* fall through to clean up request queue */ - case -ERESTARTSYS: - case -ERELEASED: - switch (CHK_RDWRMASK(we_p->status[0])) { - case STAT_WRITTEN: - purge_work_element(we_p); - break; - case STAT_READPEND: - case STAT_NOWORK: - default: - break; - } - break; - default: - we_p->status[0] ^= STAT_FAILED; - break; - } - } - free_page((long)we_p); - return rv; -} - -/** - * This function is a little long, but it's really just one large switch - * statement. - */ -static long -z90crypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct priv_data *private_data_p = filp->private_data; - unsigned char *status; - unsigned char *qdepth; - unsigned int *reqcnt; - struct ica_z90_status *pstat; - int ret, i, loopLim, tempstat; - static int deprecated_msg_count1 = 0; - static int deprecated_msg_count2 = 0; - - PDEBUG("filp %p (PID %d), cmd 0x%08X\n", filp, PID(), cmd); - PDEBUG("cmd 0x%08X: dir %s, size 0x%04X, type 0x%02X, nr 0x%02X\n", - cmd, - !_IOC_DIR(cmd) ? "NO" - : ((_IOC_DIR(cmd) == (_IOC_READ|_IOC_WRITE)) ? "RW" - : ((_IOC_DIR(cmd) == _IOC_READ) ? "RD" - : "WR")), - _IOC_SIZE(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd)); - - if (_IOC_TYPE(cmd) != Z90_IOCTL_MAGIC) { - PRINTK("cmd 0x%08X contains bad magic\n", cmd); - return -ENOTTY; - } - - ret = 0; - switch (cmd) { - case ICARSAMODEXPO: - case ICARSACRT: - if (quiesce_z90crypt) { - ret = -EQUIESCE; - break; - } - ret = -ENODEV; // Default if no devices - loopLim = z90crypt.hdware_info->hdware_mask.st_count - - (z90crypt.hdware_info->hdware_mask.disabled_count + - z90crypt.hdware_info->hdware_mask.user_disabled_count); - for (i = 0; i < loopLim; i++) { - ret = z90crypt_rsa(private_data_p, PID(), cmd, arg); - if (ret != -ERESTARTSYS) - break; - } - if (ret == -ERESTARTSYS) - ret = -ENODEV; - break; - - case Z90STAT_TOTALCOUNT: - tempstat = get_status_totalcount(); - if (copy_to_user((int __user *)arg, &tempstat,sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_PCICACOUNT: - tempstat = get_status_PCICAcount(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_PCICCCOUNT: - tempstat = get_status_PCICCcount(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_PCIXCCMCL2COUNT: - tempstat = get_status_PCIXCCMCL2count(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_PCIXCCMCL3COUNT: - tempstat = get_status_PCIXCCMCL3count(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_CEX2CCOUNT: - tempstat = get_status_CEX2Ccount(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_CEX2ACOUNT: - tempstat = get_status_CEX2Acount(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_REQUESTQ_COUNT: - tempstat = get_status_requestq_count(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_PENDINGQ_COUNT: - tempstat = get_status_pendingq_count(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_TOTALOPEN_COUNT: - tempstat = get_status_totalopen_count(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_DOMAIN_INDEX: - tempstat = get_status_domain_index(); - if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90STAT_STATUS_MASK: - status = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL); - if (!status) { - PRINTK("kmalloc for status failed!\n"); - ret = -ENOMEM; - break; - } - get_status_status_mask(status); - if (copy_to_user((char __user *) arg, status, Z90CRYPT_NUM_APS) - != 0) - ret = -EFAULT; - kfree(status); - break; - - case Z90STAT_QDEPTH_MASK: - qdepth = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL); - if (!qdepth) { - PRINTK("kmalloc for qdepth failed!\n"); - ret = -ENOMEM; - break; - } - get_status_qdepth_mask(qdepth); - if (copy_to_user((char __user *) arg, qdepth, Z90CRYPT_NUM_APS) != 0) - ret = -EFAULT; - kfree(qdepth); - break; - - case Z90STAT_PERDEV_REQCNT: - reqcnt = kmalloc(sizeof(int) * Z90CRYPT_NUM_APS, GFP_KERNEL); - if (!reqcnt) { - PRINTK("kmalloc for reqcnt failed!\n"); - ret = -ENOMEM; - break; - } - get_status_perdevice_reqcnt(reqcnt); - if (copy_to_user((char __user *) arg, reqcnt, - Z90CRYPT_NUM_APS * sizeof(int)) != 0) - ret = -EFAULT; - kfree(reqcnt); - break; - - /* THIS IS DEPRECATED. USE THE NEW STATUS CALLS */ - case ICAZ90STATUS: - if (deprecated_msg_count1 < 20) { - PRINTK("deprecated call to ioctl (ICAZ90STATUS)!\n"); - deprecated_msg_count1++; - if (deprecated_msg_count1 == 20) - PRINTK("No longer issuing messages related to " - "deprecated call to ICAZ90STATUS.\n"); - } - - pstat = kmalloc(sizeof(struct ica_z90_status), GFP_KERNEL); - if (!pstat) { - PRINTK("kmalloc for pstat failed!\n"); - ret = -ENOMEM; - break; - } - - pstat->totalcount = get_status_totalcount(); - pstat->leedslitecount = get_status_PCICAcount(); - pstat->leeds2count = get_status_PCICCcount(); - pstat->requestqWaitCount = get_status_requestq_count(); - pstat->pendingqWaitCount = get_status_pendingq_count(); - pstat->totalOpenCount = get_status_totalopen_count(); - pstat->cryptoDomain = get_status_domain_index(); - get_status_status_mask(pstat->status); - get_status_qdepth_mask(pstat->qdepth); - - if (copy_to_user((struct ica_z90_status __user *) arg, pstat, - sizeof(struct ica_z90_status)) != 0) - ret = -EFAULT; - kfree(pstat); - break; - - /* THIS IS DEPRECATED. USE THE NEW STATUS CALLS */ - case Z90STAT_PCIXCCCOUNT: - if (deprecated_msg_count2 < 20) { - PRINTK("deprecated ioctl (Z90STAT_PCIXCCCOUNT)!\n"); - deprecated_msg_count2++; - if (deprecated_msg_count2 == 20) - PRINTK("No longer issuing messages about depre" - "cated ioctl Z90STAT_PCIXCCCOUNT.\n"); - } - - tempstat = get_status_PCIXCCcount(); - if (copy_to_user((int *)arg, &tempstat, sizeof(int)) != 0) - ret = -EFAULT; - break; - - case Z90QUIESCE: - if (current->euid != 0) { - PRINTK("QUIESCE fails: euid %d\n", - current->euid); - ret = -EACCES; - } else { - PRINTK("QUIESCE device from PID %d\n", PID()); - quiesce_z90crypt = 1; - } - break; - - default: - /* user passed an invalid IOCTL number */ - PDEBUG("cmd 0x%08X contains invalid ioctl code\n", cmd); - ret = -ENOTTY; - break; - } - - return ret; -} - -static inline int -sprintcl(unsigned char *outaddr, unsigned char *addr, unsigned int len) -{ - int hl, i; - - hl = 0; - for (i = 0; i < len; i++) - hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]); - hl += sprintf(outaddr+hl, " "); - - return hl; -} - -static inline int -sprintrw(unsigned char *outaddr, unsigned char *addr, unsigned int len) -{ - int hl, inl, c, cx; - - hl = sprintf(outaddr, " "); - inl = 0; - for (c = 0; c < (len / 16); c++) { - hl += sprintcl(outaddr+hl, addr+inl, 16); - inl += 16; - } - - cx = len%16; - if (cx) { - hl += sprintcl(outaddr+hl, addr+inl, cx); - inl += cx; - } - - hl += sprintf(outaddr+hl, "\n"); - - return hl; -} - -static inline int -sprinthx(unsigned char *title, unsigned char *outaddr, - unsigned char *addr, unsigned int len) -{ - int hl, inl, r, rx; - - hl = sprintf(outaddr, "\n%s\n", title); - inl = 0; - for (r = 0; r < (len / 64); r++) { - hl += sprintrw(outaddr+hl, addr+inl, 64); - inl += 64; - } - rx = len % 64; - if (rx) { - hl += sprintrw(outaddr+hl, addr+inl, rx); - inl += rx; - } - - hl += sprintf(outaddr+hl, "\n"); - - return hl; -} - -static inline int -sprinthx4(unsigned char *title, unsigned char *outaddr, - unsigned int *array, unsigned int len) -{ - int hl, r; - - hl = sprintf(outaddr, "\n%s\n", title); - - for (r = 0; r < len; r++) { - if ((r % 8) == 0) - hl += sprintf(outaddr+hl, " "); - hl += sprintf(outaddr+hl, "%08X ", array[r]); - if ((r % 8) == 7) - hl += sprintf(outaddr+hl, "\n"); - } - - hl += sprintf(outaddr+hl, "\n"); - - return hl; -} - -static int -z90crypt_status(char *resp_buff, char **start, off_t offset, - int count, int *eof, void *data) -{ - unsigned char *workarea; - int len; - - /* resp_buff is a page. Use the right half for a work area */ - workarea = resp_buff+2000; - len = 0; - len += sprintf(resp_buff+len, "\nz90crypt version: %d.%d.%d\n", - z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT); - len += sprintf(resp_buff+len, "Cryptographic domain: %d\n", - get_status_domain_index()); - len += sprintf(resp_buff+len, "Total device count: %d\n", - get_status_totalcount()); - len += sprintf(resp_buff+len, "PCICA count: %d\n", - get_status_PCICAcount()); - len += sprintf(resp_buff+len, "PCICC count: %d\n", - get_status_PCICCcount()); - len += sprintf(resp_buff+len, "PCIXCC MCL2 count: %d\n", - get_status_PCIXCCMCL2count()); - len += sprintf(resp_buff+len, "PCIXCC MCL3 count: %d\n", - get_status_PCIXCCMCL3count()); - len += sprintf(resp_buff+len, "CEX2C count: %d\n", - get_status_CEX2Ccount()); - len += sprintf(resp_buff+len, "CEX2A count: %d\n", - get_status_CEX2Acount()); - len += sprintf(resp_buff+len, "requestq count: %d\n", - get_status_requestq_count()); - len += sprintf(resp_buff+len, "pendingq count: %d\n", - get_status_pendingq_count()); - len += sprintf(resp_buff+len, "Total open handles: %d\n\n", - get_status_totalopen_count()); - len += sprinthx( - "Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) " - "4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A", - resp_buff+len, - get_status_status_mask(workarea), - Z90CRYPT_NUM_APS); - len += sprinthx("Waiting work element counts", - resp_buff+len, - get_status_qdepth_mask(workarea), - Z90CRYPT_NUM_APS); - len += sprinthx4( - "Per-device successfully completed request counts", - resp_buff+len, - get_status_perdevice_reqcnt((unsigned int *)workarea), - Z90CRYPT_NUM_APS); - *eof = 1; - memset(workarea, 0, Z90CRYPT_NUM_APS * sizeof(unsigned int)); - return len; -} - -static inline void -disable_card(int card_index) -{ - struct device *devp; - - devp = LONG2DEVPTR(card_index); - if (!devp || devp->user_disabled) - return; - devp->user_disabled = 1; - z90crypt.hdware_info->hdware_mask.user_disabled_count++; - if (devp->dev_type == -1) - return; - z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count++; -} - -static inline void -enable_card(int card_index) -{ - struct device *devp; - - devp = LONG2DEVPTR(card_index); - if (!devp || !devp->user_disabled) - return; - devp->user_disabled = 0; - z90crypt.hdware_info->hdware_mask.user_disabled_count--; - if (devp->dev_type == -1) - return; - z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count--; -} - -static int -z90crypt_status_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - int j, eol; - unsigned char *lbuf, *ptr; - unsigned int local_count; - -#define LBUFSIZE 1200 - lbuf = kmalloc(LBUFSIZE, GFP_KERNEL); - if (!lbuf) { - PRINTK("kmalloc failed!\n"); - return 0; - } - - if (count <= 0) - return 0; - - local_count = UMIN((unsigned int)count, LBUFSIZE-1); - - if (copy_from_user(lbuf, buffer, local_count) != 0) { - kfree(lbuf); - return -EFAULT; - } - - lbuf[local_count] = '\0'; - - ptr = strstr(lbuf, "Online devices"); - if (ptr == 0) { - PRINTK("Unable to parse data (missing \"Online devices\")\n"); - kfree(lbuf); - return count; - } - - ptr = strstr(ptr, "\n"); - if (ptr == 0) { - PRINTK("Unable to parse data (missing newline after \"Online devices\")\n"); - kfree(lbuf); - return count; - } - ptr++; - - if (strstr(ptr, "Waiting work element counts") == NULL) { - PRINTK("Unable to parse data (missing \"Waiting work element counts\")\n"); - kfree(lbuf); - return count; - } - - j = 0; - eol = 0; - while ((j < 64) && (*ptr != '\0')) { - switch (*ptr) { - case '\t': - case ' ': - break; - case '\n': - default: - eol = 1; - break; - case '0': // no device - case '1': // PCICA - case '2': // PCICC - case '3': // PCIXCC_MCL2 - case '4': // PCIXCC_MCL3 - case '5': // CEX2C - case '6': // CEX2A - j++; - break; - case 'd': - case 'D': - disable_card(j); - j++; - break; - case 'e': - case 'E': - enable_card(j); - j++; - break; - } - if (eol) - break; - ptr++; - } - - kfree(lbuf); - return count; -} - -/** - * Functions that run under a timer, with no process id - * - * The task functions: - * z90crypt_reader_task - * helper_send_work - * helper_handle_work_element - * helper_receive_rc - * z90crypt_config_task - * z90crypt_cleanup_task - * - * Helper functions: - * z90crypt_schedule_reader_timer - * z90crypt_schedule_reader_task - * z90crypt_schedule_config_task - * z90crypt_schedule_cleanup_task - */ -static inline int -receive_from_crypto_device(int index, unsigned char *psmid, int *buff_len_p, - unsigned char *buff, unsigned char __user **dest_p_p) -{ - int dv, rv; - struct device *dev_ptr; - struct caller *caller_p; - struct ica_rsa_modexpo *icaMsg_p; - struct list_head *ptr, *tptr; - - memcpy(psmid, NULL_psmid, sizeof(NULL_psmid)); - - if (z90crypt.terminating) - return REC_FATAL_ERROR; - - caller_p = 0; - dev_ptr = z90crypt.device_p[index]; - rv = 0; - do { - if (!dev_ptr || dev_ptr->disabled) { - rv = REC_NO_WORK; // a disabled device can't return work - break; - } - if (dev_ptr->dev_self_x != index) { - PRINTKC("Corrupt dev ptr\n"); - z90crypt.terminating = 1; - rv = REC_FATAL_ERROR; - break; - } - if (!dev_ptr->dev_resp_l || !dev_ptr->dev_resp_p) { - dv = DEV_REC_EXCEPTION; - PRINTK("dev_resp_l = %d, dev_resp_p = %p\n", - dev_ptr->dev_resp_l, dev_ptr->dev_resp_p); - } else { - PDEBUG("Dequeue called for device %d\n", index); - dv = receive_from_AP(index, z90crypt.cdx, - dev_ptr->dev_resp_l, - dev_ptr->dev_resp_p, psmid); - } - switch (dv) { - case DEV_REC_EXCEPTION: - rv = REC_FATAL_ERROR; - z90crypt.terminating = 1; - PRINTKC("Exception in receive from device %d\n", - index); - break; - case DEV_ONLINE: - rv = 0; - break; - case DEV_EMPTY: - rv = REC_EMPTY; - break; - case DEV_NO_WORK: - rv = REC_NO_WORK; - break; - case DEV_BAD_MESSAGE: - case DEV_GONE: - case REC_HARDWAR_ERR: - default: - rv = REC_NO_RESPONSE; - break; - } - if (rv) - break; - if (dev_ptr->dev_caller_count <= 0) { - rv = REC_USER_GONE; - break; - } - - list_for_each_safe(ptr, tptr, &dev_ptr->dev_caller_list) { - caller_p = list_entry(ptr, struct caller, caller_liste); - if (!memcmp(caller_p->caller_id, psmid, - sizeof(caller_p->caller_id))) { - if (!list_empty(&caller_p->caller_liste)) { - list_del_init(ptr); - dev_ptr->dev_caller_count--; - break; - } - } - caller_p = 0; - } - if (!caller_p) { - PRINTKW("Unable to locate PSMID %02X%02X%02X%02X%02X" - "%02X%02X%02X in device list\n", - psmid[0], psmid[1], psmid[2], psmid[3], - psmid[4], psmid[5], psmid[6], psmid[7]); - rv = REC_USER_GONE; - break; - } - - PDEBUG("caller_p after successful receive: %p\n", caller_p); - rv = convert_response(dev_ptr->dev_resp_p, - caller_p->caller_buf_p, buff_len_p, buff); - switch (rv) { - case REC_USE_PCICA: - break; - case REC_OPERAND_INV: - case REC_OPERAND_SIZE: - case REC_EVEN_MOD: - case REC_INVALID_PAD: - PDEBUG("device %d: 'user error' %d\n", index, rv); - break; - case WRONG_DEVICE_TYPE: - case REC_HARDWAR_ERR: - case REC_BAD_MESSAGE: - PRINTKW("device %d: hardware error %d\n", index, rv); - rv = REC_NO_RESPONSE; - break; - default: - PDEBUG("device %d: rv = %d\n", index, rv); - break; - } - } while (0); - - switch (rv) { - case 0: - PDEBUG("Successful receive from device %d\n", index); - icaMsg_p = (struct ica_rsa_modexpo *)caller_p->caller_buf_p; - *dest_p_p = icaMsg_p->outputdata; - if (*buff_len_p == 0) - PRINTK("Zero *buff_len_p\n"); - break; - case REC_NO_RESPONSE: - PRINTKW("Removing device %d from availability\n", index); - remove_device(dev_ptr); - break; - } - - if (caller_p) - unbuild_caller(dev_ptr, caller_p); - - return rv; -} - -static inline void -helper_send_work(int index) -{ - struct work_element *rq_p; - int rv; - - if (list_empty(&request_list)) - return; - requestq_count--; - rq_p = list_entry(request_list.next, struct work_element, liste); - list_del_init(&rq_p->liste); - rq_p->audit[1] |= FP_REMREQUEST; - if (rq_p->devtype == SHRT2DEVPTR(index)->dev_type) { - rq_p->devindex = SHRT2LONG(index); - rv = send_to_crypto_device(rq_p); - if (rv == 0) { - rq_p->requestsent = jiffies; - rq_p->audit[0] |= FP_SENT; - list_add_tail(&rq_p->liste, &pending_list); - ++pendingq_count; - rq_p->audit[0] |= FP_PENDING; - } else { - switch (rv) { - case REC_OPERAND_INV: - case REC_OPERAND_SIZE: - case REC_EVEN_MOD: - case REC_INVALID_PAD: - rq_p->retcode = -EINVAL; - break; - case SEN_NOT_AVAIL: - case SEN_RETRY: - case REC_NO_RESPONSE: - default: - if (z90crypt.mask.st_count > 1) - rq_p->retcode = - -ERESTARTSYS; - else - rq_p->retcode = -ENODEV; - break; - } - rq_p->status[0] |= STAT_FAILED; - rq_p->audit[1] |= FP_AWAKENING; - atomic_set(&rq_p->alarmrung, 1); - wake_up(&rq_p->waitq); - } - } else { - if (z90crypt.mask.st_count > 1) - rq_p->retcode = -ERESTARTSYS; - else - rq_p->retcode = -ENODEV; - rq_p->status[0] |= STAT_FAILED; - rq_p->audit[1] |= FP_AWAKENING; - atomic_set(&rq_p->alarmrung, 1); - wake_up(&rq_p->waitq); - } -} - -static inline void -helper_handle_work_element(int index, unsigned char psmid[8], int rc, - int buff_len, unsigned char *buff, - unsigned char __user *resp_addr) -{ - struct work_element *pq_p; - struct list_head *lptr, *tptr; - - pq_p = 0; - list_for_each_safe(lptr, tptr, &pending_list) { - pq_p = list_entry(lptr, struct work_element, liste); - if (!memcmp(pq_p->caller_id, psmid, sizeof(pq_p->caller_id))) { - list_del_init(lptr); - pendingq_count--; - pq_p->audit[1] |= FP_NOTPENDING; - break; - } - pq_p = 0; - } - - if (!pq_p) { - PRINTK("device %d has work but no caller exists on pending Q\n", - SHRT2LONG(index)); - return; - } - - switch (rc) { - case 0: - pq_p->resp_buff_size = buff_len; - pq_p->audit[1] |= FP_RESPSIZESET; - if (buff_len) { - pq_p->resp_addr = resp_addr; - pq_p->audit[1] |= FP_RESPADDRCOPIED; - memcpy(pq_p->resp_buff, buff, buff_len); - pq_p->audit[1] |= FP_RESPBUFFCOPIED; - } - break; - case REC_OPERAND_INV: - case REC_OPERAND_SIZE: - case REC_EVEN_MOD: - case REC_INVALID_PAD: - PDEBUG("-EINVAL after application error %d\n", rc); - pq_p->retcode = -EINVAL; - pq_p->status[0] |= STAT_FAILED; - break; - case REC_USE_PCICA: - pq_p->retcode = -ERESTARTSYS; - pq_p->status[0] |= STAT_FAILED; - break; - case REC_NO_RESPONSE: - default: - if (z90crypt.mask.st_count > 1) - pq_p->retcode = -ERESTARTSYS; - else - pq_p->retcode = -ENODEV; - pq_p->status[0] |= STAT_FAILED; - break; - } - if ((pq_p->status[0] != STAT_FAILED) || (pq_p->retcode != -ERELEASED)) { - pq_p->audit[1] |= FP_AWAKENING; - atomic_set(&pq_p->alarmrung, 1); - wake_up(&pq_p->waitq); - } -} - -/** - * return TRUE if the work element should be removed from the queue - */ -static inline int -helper_receive_rc(int index, int *rc_p) -{ - switch (*rc_p) { - case 0: - case REC_OPERAND_INV: - case REC_OPERAND_SIZE: - case REC_EVEN_MOD: - case REC_INVALID_PAD: - case REC_USE_PCICA: - break; - - case REC_BUSY: - case REC_NO_WORK: - case REC_EMPTY: - case REC_RETRY_DEV: - case REC_FATAL_ERROR: - return 0; - - case REC_NO_RESPONSE: - break; - - default: - PRINTK("rc %d, device %d converted to REC_NO_RESPONSE\n", - *rc_p, SHRT2LONG(index)); - *rc_p = REC_NO_RESPONSE; - break; - } - return 1; -} - -static inline void -z90crypt_schedule_reader_timer(void) -{ - if (timer_pending(&reader_timer)) - return; - if (mod_timer(&reader_timer, jiffies+(READERTIME*HZ/1000)) != 0) - PRINTK("Timer pending while modifying reader timer\n"); -} - -static void -z90crypt_reader_task(unsigned long ptr) -{ - int workavail, index, rc, buff_len; - unsigned char psmid[8]; - unsigned char __user *resp_addr; - static unsigned char buff[1024]; - - /** - * we use workavail = 2 to ensure 2 passes with nothing dequeued before - * exiting the loop. If (pendingq_count+requestq_count) == 0 after the - * loop, there is no work remaining on the queues. - */ - resp_addr = 0; - workavail = 2; - buff_len = 0; - while (workavail) { - workavail--; - rc = 0; - spin_lock_irq(&queuespinlock); - memset(buff, 0x00, sizeof(buff)); - - /* Dequeue once from each device in round robin. */ - for (index = 0; index < z90crypt.mask.st_count; index++) { - PDEBUG("About to receive.\n"); - rc = receive_from_crypto_device(SHRT2LONG(index), - psmid, - &buff_len, - buff, - &resp_addr); - PDEBUG("Dequeued: rc = %d.\n", rc); - - if (helper_receive_rc(index, &rc)) { - if (rc != REC_NO_RESPONSE) { - helper_send_work(index); - workavail = 2; - } - - helper_handle_work_element(index, psmid, rc, - buff_len, buff, - resp_addr); - } - - if (rc == REC_FATAL_ERROR) - PRINTKW("REC_FATAL_ERROR from device %d!\n", - SHRT2LONG(index)); - } - spin_unlock_irq(&queuespinlock); - } - - if (pendingq_count + requestq_count) - z90crypt_schedule_reader_timer(); -} - -static inline void -z90crypt_schedule_config_task(unsigned int expiration) -{ - if (timer_pending(&config_timer)) - return; - if (mod_timer(&config_timer, jiffies+(expiration*HZ)) != 0) - PRINTK("Timer pending while modifying config timer\n"); -} - -static void -z90crypt_config_task(unsigned long ptr) -{ - int rc; - - PDEBUG("jiffies %ld\n", jiffies); - - if ((rc = refresh_z90crypt(&z90crypt.cdx))) - PRINTK("Error %d detected in refresh_z90crypt.\n", rc); - /* If return was fatal, don't bother reconfiguring */ - if ((rc != TSQ_FATAL_ERROR) && (rc != RSQ_FATAL_ERROR)) - z90crypt_schedule_config_task(CONFIGTIME); -} - -static inline void -z90crypt_schedule_cleanup_task(void) -{ - if (timer_pending(&cleanup_timer)) - return; - if (mod_timer(&cleanup_timer, jiffies+(CLEANUPTIME*HZ)) != 0) - PRINTK("Timer pending while modifying cleanup timer\n"); -} - -static inline void -helper_drain_queues(void) -{ - struct work_element *pq_p; - struct list_head *lptr, *tptr; - - list_for_each_safe(lptr, tptr, &pending_list) { - pq_p = list_entry(lptr, struct work_element, liste); - pq_p->retcode = -ENODEV; - pq_p->status[0] |= STAT_FAILED; - unbuild_caller(LONG2DEVPTR(pq_p->devindex), - (struct caller *)pq_p->requestptr); - list_del_init(lptr); - pendingq_count--; - pq_p->audit[1] |= FP_NOTPENDING; - pq_p->audit[1] |= FP_AWAKENING; - atomic_set(&pq_p->alarmrung, 1); - wake_up(&pq_p->waitq); - } - - list_for_each_safe(lptr, tptr, &request_list) { - pq_p = list_entry(lptr, struct work_element, liste); - pq_p->retcode = -ENODEV; - pq_p->status[0] |= STAT_FAILED; - list_del_init(lptr); - requestq_count--; - pq_p->audit[1] |= FP_REMREQUEST; - pq_p->audit[1] |= FP_AWAKENING; - atomic_set(&pq_p->alarmrung, 1); - wake_up(&pq_p->waitq); - } -} - -static inline void -helper_timeout_requests(void) -{ - struct work_element *pq_p; - struct list_head *lptr, *tptr; - long timelimit; - - timelimit = jiffies - (CLEANUPTIME * HZ); - /* The list is in strict chronological order */ - list_for_each_safe(lptr, tptr, &pending_list) { - pq_p = list_entry(lptr, struct work_element, liste); - if (pq_p->requestsent >= timelimit) - break; - PRINTKW("Purging(PQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n", - ((struct caller *)pq_p->requestptr)->caller_id[0], - ((struct caller *)pq_p->requestptr)->caller_id[1], - ((struct caller *)pq_p->requestptr)->caller_id[2], - ((struct caller *)pq_p->requestptr)->caller_id[3], - ((struct caller *)pq_p->requestptr)->caller_id[4], - ((struct caller *)pq_p->requestptr)->caller_id[5], - ((struct caller *)pq_p->requestptr)->caller_id[6], - ((struct caller *)pq_p->requestptr)->caller_id[7]); - pq_p->retcode = -ETIMEOUT; - pq_p->status[0] |= STAT_FAILED; - /* get this off any caller queue it may be on */ - unbuild_caller(LONG2DEVPTR(pq_p->devindex), - (struct caller *) pq_p->requestptr); - list_del_init(lptr); - pendingq_count--; - pq_p->audit[1] |= FP_TIMEDOUT; - pq_p->audit[1] |= FP_NOTPENDING; - pq_p->audit[1] |= FP_AWAKENING; - atomic_set(&pq_p->alarmrung, 1); - wake_up(&pq_p->waitq); - } - - /** - * If pending count is zero, items left on the request queue may - * never be processed. - */ - if (pendingq_count <= 0) { - list_for_each_safe(lptr, tptr, &request_list) { - pq_p = list_entry(lptr, struct work_element, liste); - if (pq_p->requestsent >= timelimit) - break; - PRINTKW("Purging(RQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n", - ((struct caller *)pq_p->requestptr)->caller_id[0], - ((struct caller *)pq_p->requestptr)->caller_id[1], - ((struct caller *)pq_p->requestptr)->caller_id[2], - ((struct caller *)pq_p->requestptr)->caller_id[3], - ((struct caller *)pq_p->requestptr)->caller_id[4], - ((struct caller *)pq_p->requestptr)->caller_id[5], - ((struct caller *)pq_p->requestptr)->caller_id[6], - ((struct caller *)pq_p->requestptr)->caller_id[7]); - pq_p->retcode = -ETIMEOUT; - pq_p->status[0] |= STAT_FAILED; - list_del_init(lptr); - requestq_count--; - pq_p->audit[1] |= FP_TIMEDOUT; - pq_p->audit[1] |= FP_REMREQUEST; - pq_p->audit[1] |= FP_AWAKENING; - atomic_set(&pq_p->alarmrung, 1); - wake_up(&pq_p->waitq); - } - } -} - -static void -z90crypt_cleanup_task(unsigned long ptr) -{ - PDEBUG("jiffies %ld\n", jiffies); - spin_lock_irq(&queuespinlock); - if (z90crypt.mask.st_count <= 0) // no devices! - helper_drain_queues(); - else - helper_timeout_requests(); - spin_unlock_irq(&queuespinlock); - z90crypt_schedule_cleanup_task(); -} - -static void -z90crypt_schedule_reader_task(unsigned long ptr) -{ - tasklet_schedule(&reader_tasklet); -} - -/** - * Lowlevel Functions: - * - * create_z90crypt: creates and initializes basic data structures - * refresh_z90crypt: re-initializes basic data structures - * find_crypto_devices: returns a count and mask of hardware status - * create_crypto_device: builds the descriptor for a device - * destroy_crypto_device: unallocates the descriptor for a device - * destroy_z90crypt: drains all work, unallocates structs - */ - -/** - * build the z90crypt root structure using the given domain index - */ -static int -create_z90crypt(int *cdx_p) -{ - struct hdware_block *hdware_blk_p; - - memset(&z90crypt, 0x00, sizeof(struct z90crypt)); - z90crypt.domain_established = 0; - z90crypt.len = sizeof(struct z90crypt); - z90crypt.max_count = Z90CRYPT_NUM_DEVS; - z90crypt.cdx = *cdx_p; - - hdware_blk_p = kzalloc(sizeof(struct hdware_block), GFP_ATOMIC); - if (!hdware_blk_p) { - PDEBUG("kmalloc for hardware block failed\n"); - return ENOMEM; - } - z90crypt.hdware_info = hdware_blk_p; - - return 0; -} - -static inline int -helper_scan_devices(int cdx_array[16], int *cdx_p, int *correct_cdx_found) -{ - enum hdstat hd_stat; - int q_depth, dev_type; - int indx, chkdom, numdomains; - - q_depth = dev_type = numdomains = 0; - for (chkdom = 0; chkdom <= 15; cdx_array[chkdom++] = -1); - for (indx = 0; indx < z90crypt.max_count; indx++) { - hd_stat = HD_NOT_THERE; - numdomains = 0; - for (chkdom = 0; chkdom <= 15; chkdom++) { - hd_stat = query_online(indx, chkdom, MAX_RESET, - &q_depth, &dev_type); - if (hd_stat == HD_TSQ_EXCEPTION) { - z90crypt.terminating = 1; - PRINTKC("exception taken!\n"); - break; - } - if (hd_stat == HD_ONLINE) { - cdx_array[numdomains++] = chkdom; - if (*cdx_p == chkdom) { - *correct_cdx_found = 1; - break; - } - } - } - if ((*correct_cdx_found == 1) || (numdomains != 0)) - break; - if (z90crypt.terminating) - break; - } - return numdomains; -} - -static inline int -probe_crypto_domain(int *cdx_p) -{ - int cdx_array[16]; - char cdx_array_text[53], temp[5]; - int correct_cdx_found, numdomains; - - correct_cdx_found = 0; - numdomains = helper_scan_devices(cdx_array, cdx_p, &correct_cdx_found); - - if (z90crypt.terminating) - return TSQ_FATAL_ERROR; - - if (correct_cdx_found) - return 0; - - if (numdomains == 0) { - PRINTKW("Unable to find crypto domain: No devices found\n"); - return Z90C_NO_DEVICES; - } - - if (numdomains == 1) { - if (*cdx_p == -1) { - *cdx_p = cdx_array[0]; - return 0; - } - PRINTKW("incorrect domain: specified = %d, found = %d\n", - *cdx_p, cdx_array[0]); - return Z90C_INCORRECT_DOMAIN; - } - - numdomains--; - sprintf(cdx_array_text, "%d", cdx_array[numdomains]); - while (numdomains) { - numdomains--; - sprintf(temp, ", %d", cdx_array[numdomains]); - strcat(cdx_array_text, temp); - } - - PRINTKW("ambiguous domain detected: specified = %d, found array = %s\n", - *cdx_p, cdx_array_text); - return Z90C_AMBIGUOUS_DOMAIN; -} - -static int -refresh_z90crypt(int *cdx_p) -{ - int i, j, indx, rv; - static struct status local_mask; - struct device *devPtr; - unsigned char oldStat, newStat; - int return_unchanged; - - if (z90crypt.len != sizeof(z90crypt)) - return ENOTINIT; - if (z90crypt.terminating) - return TSQ_FATAL_ERROR; - rv = 0; - if (!z90crypt.hdware_info->hdware_mask.st_count && - !z90crypt.domain_established) { - rv = probe_crypto_domain(cdx_p); - if (z90crypt.terminating) - return TSQ_FATAL_ERROR; - if (rv == Z90C_NO_DEVICES) - return 0; // try later - if (rv) - return rv; - z90crypt.cdx = *cdx_p; - z90crypt.domain_established = 1; - } - rv = find_crypto_devices(&local_mask); - if (rv) { - PRINTK("find crypto devices returned %d\n", rv); - return rv; - } - if (!memcmp(&local_mask, &z90crypt.hdware_info->hdware_mask, - sizeof(struct status))) { - return_unchanged = 1; - for (i = 0; i < Z90CRYPT_NUM_TYPES; i++) { - /** - * Check for disabled cards. If any device is marked - * disabled, destroy it. - */ - for (j = 0; - j < z90crypt.hdware_info->type_mask[i].st_count; - j++) { - indx = z90crypt.hdware_info->type_x_addr[i]. - device_index[j]; - devPtr = z90crypt.device_p[indx]; - if (devPtr && devPtr->disabled) { - local_mask.st_mask[indx] = HD_NOT_THERE; - return_unchanged = 0; - } - } - } - if (return_unchanged == 1) - return 0; - } - - spin_lock_irq(&queuespinlock); - for (i = 0; i < z90crypt.max_count; i++) { - oldStat = z90crypt.hdware_info->hdware_mask.st_mask[i]; - newStat = local_mask.st_mask[i]; - if ((oldStat == HD_ONLINE) && (newStat != HD_ONLINE)) - destroy_crypto_device(i); - else if ((oldStat != HD_ONLINE) && (newStat == HD_ONLINE)) { - rv = create_crypto_device(i); - if (rv >= REC_FATAL_ERROR) - return rv; - if (rv != 0) { - local_mask.st_mask[i] = HD_NOT_THERE; - local_mask.st_count--; - } - } - } - memcpy(z90crypt.hdware_info->hdware_mask.st_mask, local_mask.st_mask, - sizeof(local_mask.st_mask)); - z90crypt.hdware_info->hdware_mask.st_count = local_mask.st_count; - z90crypt.hdware_info->hdware_mask.disabled_count = - local_mask.disabled_count; - refresh_index_array(&z90crypt.mask, &z90crypt.overall_device_x); - for (i = 0; i < Z90CRYPT_NUM_TYPES; i++) - refresh_index_array(&(z90crypt.hdware_info->type_mask[i]), - &(z90crypt.hdware_info->type_x_addr[i])); - spin_unlock_irq(&queuespinlock); - - return rv; -} - -static int -find_crypto_devices(struct status *deviceMask) -{ - int i, q_depth, dev_type; - enum hdstat hd_stat; - - deviceMask->st_count = 0; - deviceMask->disabled_count = 0; - deviceMask->user_disabled_count = 0; - - for (i = 0; i < z90crypt.max_count; i++) { - hd_stat = query_online(i, z90crypt.cdx, MAX_RESET, &q_depth, - &dev_type); - if (hd_stat == HD_TSQ_EXCEPTION) { - z90crypt.terminating = 1; - PRINTKC("Exception during probe for crypto devices\n"); - return TSQ_FATAL_ERROR; - } - deviceMask->st_mask[i] = hd_stat; - if (hd_stat == HD_ONLINE) { - PDEBUG("Got an online crypto!: %d\n", i); - PDEBUG("Got a queue depth of %d\n", q_depth); - PDEBUG("Got a device type of %d\n", dev_type); - if (q_depth <= 0) - return TSQ_FATAL_ERROR; - deviceMask->st_count++; - z90crypt.q_depth_array[i] = q_depth; - z90crypt.dev_type_array[i] = dev_type; - } - } - - return 0; -} - -static int -refresh_index_array(struct status *status_str, struct device_x *index_array) -{ - int i, count; - enum devstat stat; - - i = -1; - count = 0; - do { - stat = status_str->st_mask[++i]; - if (stat == DEV_ONLINE) - index_array->device_index[count++] = i; - } while ((i < Z90CRYPT_NUM_DEVS) && (count < status_str->st_count)); - - return count; -} - -static int -create_crypto_device(int index) -{ - int rv, devstat, total_size; - struct device *dev_ptr; - struct status *type_str_p; - int deviceType; - - dev_ptr = z90crypt.device_p[index]; - if (!dev_ptr) { - total_size = sizeof(struct device) + - z90crypt.q_depth_array[index] * sizeof(int); - - dev_ptr = kzalloc(total_size, GFP_ATOMIC); - if (!dev_ptr) { - PRINTK("kmalloc device %d failed\n", index); - return ENOMEM; - } - dev_ptr->dev_resp_p = kmalloc(MAX_RESPONSE_SIZE, GFP_ATOMIC); - if (!dev_ptr->dev_resp_p) { - kfree(dev_ptr); - PRINTK("kmalloc device %d rec buffer failed\n", index); - return ENOMEM; - } - dev_ptr->dev_resp_l = MAX_RESPONSE_SIZE; - INIT_LIST_HEAD(&(dev_ptr->dev_caller_list)); - } - - devstat = reset_device(index, z90crypt.cdx, MAX_RESET); - if (devstat == DEV_RSQ_EXCEPTION) { - PRINTK("exception during reset device %d\n", index); - kfree(dev_ptr->dev_resp_p); - kfree(dev_ptr); - return RSQ_FATAL_ERROR; - } - if (devstat == DEV_ONLINE) { - dev_ptr->dev_self_x = index; - dev_ptr->dev_type = z90crypt.dev_type_array[index]; - if (dev_ptr->dev_type == NILDEV) { - rv = probe_device_type(dev_ptr); - if (rv) { - PRINTK("rv = %d from probe_device_type %d\n", - rv, index); - kfree(dev_ptr->dev_resp_p); - kfree(dev_ptr); - return rv; - } - } - if (dev_ptr->dev_type == PCIXCC_UNK) { - rv = probe_PCIXCC_type(dev_ptr); - if (rv) { - PRINTK("rv = %d from probe_PCIXCC_type %d\n", - rv, index); - kfree(dev_ptr->dev_resp_p); - kfree(dev_ptr); - return rv; - } - } - deviceType = dev_ptr->dev_type; - z90crypt.dev_type_array[index] = deviceType; - if (deviceType == PCICA) - z90crypt.hdware_info->device_type_array[index] = 1; - else if (deviceType == PCICC) - z90crypt.hdware_info->device_type_array[index] = 2; - else if (deviceType == PCIXCC_MCL2) - z90crypt.hdware_info->device_type_array[index] = 3; - else if (deviceType == PCIXCC_MCL3) - z90crypt.hdware_info->device_type_array[index] = 4; - else if (deviceType == CEX2C) - z90crypt.hdware_info->device_type_array[index] = 5; - else if (deviceType == CEX2A) - z90crypt.hdware_info->device_type_array[index] = 6; - else // No idea how this would happen. - z90crypt.hdware_info->device_type_array[index] = -1; - } - - /** - * 'q_depth' returned by the hardware is one less than - * the actual depth - */ - dev_ptr->dev_q_depth = z90crypt.q_depth_array[index]; - dev_ptr->dev_type = z90crypt.dev_type_array[index]; - dev_ptr->dev_stat = devstat; - dev_ptr->disabled = 0; - z90crypt.device_p[index] = dev_ptr; - - if (devstat == DEV_ONLINE) { - if (z90crypt.mask.st_mask[index] != DEV_ONLINE) { - z90crypt.mask.st_mask[index] = DEV_ONLINE; - z90crypt.mask.st_count++; - } - deviceType = dev_ptr->dev_type; - type_str_p = &z90crypt.hdware_info->type_mask[deviceType]; - if (type_str_p->st_mask[index] != DEV_ONLINE) { - type_str_p->st_mask[index] = DEV_ONLINE; - type_str_p->st_count++; - } - } - - return 0; -} - -static int -destroy_crypto_device(int index) -{ - struct device *dev_ptr; - int t, disabledFlag; - - dev_ptr = z90crypt.device_p[index]; - - /* remember device type; get rid of device struct */ - if (dev_ptr) { - disabledFlag = dev_ptr->disabled; - t = dev_ptr->dev_type; - kfree(dev_ptr->dev_resp_p); - kfree(dev_ptr); - } else { - disabledFlag = 0; - t = -1; - } - z90crypt.device_p[index] = 0; - - /* if the type is valid, remove the device from the type_mask */ - if ((t != -1) && z90crypt.hdware_info->type_mask[t].st_mask[index]) { - z90crypt.hdware_info->type_mask[t].st_mask[index] = 0x00; - z90crypt.hdware_info->type_mask[t].st_count--; - if (disabledFlag == 1) - z90crypt.hdware_info->type_mask[t].disabled_count--; - } - if (z90crypt.mask.st_mask[index] != DEV_GONE) { - z90crypt.mask.st_mask[index] = DEV_GONE; - z90crypt.mask.st_count--; - } - z90crypt.hdware_info->device_type_array[index] = 0; - - return 0; -} - -static void -destroy_z90crypt(void) -{ - int i; - - for (i = 0; i < z90crypt.max_count; i++) - if (z90crypt.device_p[i]) - destroy_crypto_device(i); - kfree(z90crypt.hdware_info); - memset((void *)&z90crypt, 0, sizeof(z90crypt)); -} - -static unsigned char static_testmsg[384] = { -0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x00,0x06,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x43,0x43, -0x41,0x2d,0x41,0x50,0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,0x00,0x00,0x00,0x00, -0x50,0x4b,0x00,0x00,0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x54,0x32, -0x01,0x00,0xa0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xb8,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x49,0x43,0x53,0x46, -0x20,0x20,0x20,0x20,0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,0x2d,0x31,0x2e,0x32, -0x37,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44, -0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00, -0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66, -0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,0x88,0x1e,0x00,0x00, -0x57,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,0x03,0x02,0x00,0x00, -0x40,0x01,0x00,0x01,0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,0xf6,0xd2,0x7b,0x58, -0x4b,0xf9,0x28,0x68,0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,0x63,0x42,0xef,0xf8, -0xfd,0xa4,0xf8,0xb0,0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,0x53,0x8c,0x6f,0x4e, -0x72,0x8f,0x6c,0x04,0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,0xf7,0xdd,0xfd,0x4f, -0x11,0x36,0x95,0x5d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -}; - -static int -probe_device_type(struct device *devPtr) -{ - int rv, dv, i, index, length; - unsigned char psmid[8]; - static unsigned char loc_testmsg[sizeof(static_testmsg)]; - - index = devPtr->dev_self_x; - rv = 0; - do { - memcpy(loc_testmsg, static_testmsg, sizeof(static_testmsg)); - length = sizeof(static_testmsg) - 24; - /* the -24 allows for the header */ - dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg); - if (dv) { - PDEBUG("dv returned by send during probe: %d\n", dv); - if (dv == DEV_SEN_EXCEPTION) { - rv = SEN_FATAL_ERROR; - PRINTKC("exception in send to AP %d\n", index); - break; - } - PDEBUG("return value from send_to_AP: %d\n", rv); - switch (dv) { - case DEV_GONE: - PDEBUG("dev %d not available\n", index); - rv = SEN_NOT_AVAIL; - break; - case DEV_ONLINE: - rv = 0; - break; - case DEV_EMPTY: - rv = SEN_NOT_AVAIL; - break; - case DEV_NO_WORK: - rv = SEN_FATAL_ERROR; - break; - case DEV_BAD_MESSAGE: - rv = SEN_USER_ERROR; - break; - case DEV_QUEUE_FULL: - rv = SEN_QUEUE_FULL; - break; - default: - PRINTK("unknown dv=%d for dev %d\n", dv, index); - rv = SEN_NOT_AVAIL; - break; - } - } - - if (rv) - break; - - for (i = 0; i < 6; i++) { - mdelay(300); - dv = receive_from_AP(index, z90crypt.cdx, - devPtr->dev_resp_l, - devPtr->dev_resp_p, psmid); - PDEBUG("dv returned by DQ = %d\n", dv); - if (dv == DEV_REC_EXCEPTION) { - rv = REC_FATAL_ERROR; - PRINTKC("exception in dequeue %d\n", - index); - break; - } - switch (dv) { - case DEV_ONLINE: - rv = 0; - break; - case DEV_EMPTY: - rv = REC_EMPTY; - break; - case DEV_NO_WORK: - rv = REC_NO_WORK; - break; - case DEV_BAD_MESSAGE: - case DEV_GONE: - default: - rv = REC_NO_RESPONSE; - break; - } - if ((rv != 0) && (rv != REC_NO_WORK)) - break; - if (rv == 0) - break; - } - if (rv) - break; - rv = (devPtr->dev_resp_p[0] == 0x00) && - (devPtr->dev_resp_p[1] == 0x86); - if (rv) - devPtr->dev_type = PCICC; - else - devPtr->dev_type = PCICA; - rv = 0; - } while (0); - /* In a general error case, the card is not marked online */ - return rv; -} - -static unsigned char MCL3_testmsg[] = { -0x00,0x00,0x00,0x00,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE, -0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20, -0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D, -0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD, -0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22, -0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54, -0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00, -0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C, -0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9, -0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5, -0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01, -0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91, -0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C, -0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96, -0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47, -0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,0xF1,0x3D,0x93,0x53 -}; - -static int -probe_PCIXCC_type(struct device *devPtr) -{ - int rv, dv, i, index, length; - unsigned char psmid[8]; - static unsigned char loc_testmsg[548]; - struct CPRBX *cprbx_p; - - index = devPtr->dev_self_x; - rv = 0; - do { - memcpy(loc_testmsg, MCL3_testmsg, sizeof(MCL3_testmsg)); - length = sizeof(MCL3_testmsg) - 0x0C; - dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg); - if (dv) { - PDEBUG("dv returned = %d\n", dv); - if (dv == DEV_SEN_EXCEPTION) { - rv = SEN_FATAL_ERROR; - PRINTKC("exception in send to AP %d\n", index); - break; - } - PDEBUG("return value from send_to_AP: %d\n", rv); - switch (dv) { - case DEV_GONE: - PDEBUG("dev %d not available\n", index); - rv = SEN_NOT_AVAIL; - break; - case DEV_ONLINE: - rv = 0; - break; - case DEV_EMPTY: - rv = SEN_NOT_AVAIL; - break; - case DEV_NO_WORK: - rv = SEN_FATAL_ERROR; - break; - case DEV_BAD_MESSAGE: - rv = SEN_USER_ERROR; - break; - case DEV_QUEUE_FULL: - rv = SEN_QUEUE_FULL; - break; - default: - PRINTK("unknown dv=%d for dev %d\n", dv, index); - rv = SEN_NOT_AVAIL; - break; - } - } - - if (rv) - break; - - for (i = 0; i < 6; i++) { - mdelay(300); - dv = receive_from_AP(index, z90crypt.cdx, - devPtr->dev_resp_l, - devPtr->dev_resp_p, psmid); - PDEBUG("dv returned by DQ = %d\n", dv); - if (dv == DEV_REC_EXCEPTION) { - rv = REC_FATAL_ERROR; - PRINTKC("exception in dequeue %d\n", - index); - break; - } - switch (dv) { - case DEV_ONLINE: - rv = 0; - break; - case DEV_EMPTY: - rv = REC_EMPTY; - break; - case DEV_NO_WORK: - rv = REC_NO_WORK; - break; - case DEV_BAD_MESSAGE: - case DEV_GONE: - default: - rv = REC_NO_RESPONSE; - break; - } - if ((rv != 0) && (rv != REC_NO_WORK)) - break; - if (rv == 0) - break; - } - if (rv) - break; - cprbx_p = (struct CPRBX *) (devPtr->dev_resp_p + 48); - if ((cprbx_p->ccp_rtcode == 8) && (cprbx_p->ccp_rscode == 33)) { - devPtr->dev_type = PCIXCC_MCL2; - PDEBUG("device %d is MCL2\n", index); - } else { - devPtr->dev_type = PCIXCC_MCL3; - PDEBUG("device %d is MCL3\n", index); - } - } while (0); - /* In a general error case, the card is not marked online */ - return rv; -} - -module_init(z90crypt_init_module); -module_exit(z90crypt_cleanup_module); diff --git a/include/asm-s390/z90crypt.h b/include/asm-s390/z90crypt.h deleted file mode 100644 index 31a2439b07bd..000000000000 --- a/include/asm-s390/z90crypt.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * include/asm-s390/z90crypt.h - * - * z90crypt 1.3.3 (user-visible header) - * - * Copyright (C) 2001, 2005 IBM Corporation - * Author(s): Robert Burroughs - * Eric Rossman (edrossma@us.ibm.com) - * - * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __ASM_S390_Z90CRYPT_H -#define __ASM_S390_Z90CRYPT_H -#include - -#define z90crypt_VERSION 1 -#define z90crypt_RELEASE 3 // 2 = PCIXCC, 3 = rewrite for coding standards -#define z90crypt_VARIANT 3 // 3 = CEX2A support - -/** - * struct ica_rsa_modexpo - * - * Requirements: - * - outputdatalength is at least as large as inputdatalength. - * - All key parts are right justified in their fields, padded on - * the left with zeroes. - * - length(b_key) = inputdatalength - * - length(n_modulus) = inputdatalength - */ -struct ica_rsa_modexpo { - char __user * inputdata; - unsigned int inputdatalength; - char __user * outputdata; - unsigned int outputdatalength; - char __user * b_key; - char __user * n_modulus; -}; - -/** - * struct ica_rsa_modexpo_crt - * - * Requirements: - * - inputdatalength is even. - * - outputdatalength is at least as large as inputdatalength. - * - All key parts are right justified in their fields, padded on - * the left with zeroes. - * - length(bp_key) = inputdatalength/2 + 8 - * - length(bq_key) = inputdatalength/2 - * - length(np_key) = inputdatalength/2 + 8 - * - length(nq_key) = inputdatalength/2 - * - length(u_mult_inv) = inputdatalength/2 + 8 - */ -struct ica_rsa_modexpo_crt { - char __user * inputdata; - unsigned int inputdatalength; - char __user * outputdata; - unsigned int outputdatalength; - char __user * bp_key; - char __user * bq_key; - char __user * np_prime; - char __user * nq_prime; - char __user * u_mult_inv; -}; - -#define Z90_IOCTL_MAGIC 'z' // NOTE: Need to allocate from linux folks - -/** - * Interface notes: - * - * The ioctl()s which are implemented (along with relevant details) - * are: - * - * ICARSAMODEXPO - * Perform an RSA operation using a Modulus-Exponent pair - * This takes an ica_rsa_modexpo struct as its arg. - * - * NOTE: please refer to the comments preceding this structure - * for the implementation details for the contents of the - * block - * - * ICARSACRT - * Perform an RSA operation using a Chinese-Remainder Theorem key - * This takes an ica_rsa_modexpo_crt struct as its arg. - * - * NOTE: please refer to the comments preceding this structure - * for the implementation details for the contents of the - * block - * - * Z90STAT_TOTALCOUNT - * Return an integer count of all device types together. - * - * Z90STAT_PCICACOUNT - * Return an integer count of all PCICAs. - * - * Z90STAT_PCICCCOUNT - * Return an integer count of all PCICCs. - * - * Z90STAT_PCIXCCMCL2COUNT - * Return an integer count of all MCL2 PCIXCCs. - * - * Z90STAT_PCIXCCMCL3COUNT - * Return an integer count of all MCL3 PCIXCCs. - * - * Z90STAT_CEX2CCOUNT - * Return an integer count of all CEX2Cs. - * - * Z90STAT_CEX2ACOUNT - * Return an integer count of all CEX2As. - * - * Z90STAT_REQUESTQ_COUNT - * Return an integer count of the number of entries waiting to be - * sent to a device. - * - * Z90STAT_PENDINGQ_COUNT - * Return an integer count of the number of entries sent to a - * device awaiting the reply. - * - * Z90STAT_TOTALOPEN_COUNT - * Return an integer count of the number of open file handles. - * - * Z90STAT_DOMAIN_INDEX - * Return the integer value of the Cryptographic Domain. - * - * Z90STAT_STATUS_MASK - * Return an 64 element array of unsigned chars for the status of - * all devices. - * 0x01: PCICA - * 0x02: PCICC - * 0x03: PCIXCC_MCL2 - * 0x04: PCIXCC_MCL3 - * 0x05: CEX2C - * 0x06: CEX2A - * 0x0d: device is disabled via the proc filesystem - * - * Z90STAT_QDEPTH_MASK - * Return an 64 element array of unsigned chars for the queue - * depth of all devices. - * - * Z90STAT_PERDEV_REQCNT - * Return an 64 element array of unsigned integers for the number - * of successfully completed requests per device since the device - * was detected and made available. - * - * ICAZ90STATUS (deprecated) - * Return some device driver status in a ica_z90_status struct - * This takes an ica_z90_status struct as its arg. - * - * NOTE: this ioctl() is deprecated, and has been replaced with - * single ioctl()s for each type of status being requested - * - * Z90STAT_PCIXCCCOUNT (deprecated) - * Return an integer count of all PCIXCCs (MCL2 + MCL3). - * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from - * MCL2 PCIXCCs. - * - * Z90QUIESCE (not recommended) - * Quiesce the driver. This is intended to stop all new - * requests from being processed. Its use is NOT recommended, - * except in circumstances where there is no other way to stop - * callers from accessing the driver. Its original use was to - * allow the driver to be "drained" of work in preparation for - * a system shutdown. - * - * NOTE: once issued, this ban on new work cannot be undone - * except by unloading and reloading the driver. - */ - -/** - * Supported ioctl calls - */ -#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x05, 0) -#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x06, 0) - -/* DEPRECATED status calls (bound for removal at some point) */ -#define ICAZ90STATUS _IOR(Z90_IOCTL_MAGIC, 0x10, struct ica_z90_status) -#define Z90STAT_PCIXCCCOUNT _IOR(Z90_IOCTL_MAGIC, 0x43, int) - -/* unrelated to ICA callers */ -#define Z90QUIESCE _IO(Z90_IOCTL_MAGIC, 0x11) - -/* New status calls */ -#define Z90STAT_TOTALCOUNT _IOR(Z90_IOCTL_MAGIC, 0x40, int) -#define Z90STAT_PCICACOUNT _IOR(Z90_IOCTL_MAGIC, 0x41, int) -#define Z90STAT_PCICCCOUNT _IOR(Z90_IOCTL_MAGIC, 0x42, int) -#define Z90STAT_PCIXCCMCL2COUNT _IOR(Z90_IOCTL_MAGIC, 0x4b, int) -#define Z90STAT_PCIXCCMCL3COUNT _IOR(Z90_IOCTL_MAGIC, 0x4c, int) -#define Z90STAT_CEX2CCOUNT _IOR(Z90_IOCTL_MAGIC, 0x4d, int) -#define Z90STAT_CEX2ACOUNT _IOR(Z90_IOCTL_MAGIC, 0x4e, int) -#define Z90STAT_REQUESTQ_COUNT _IOR(Z90_IOCTL_MAGIC, 0x44, int) -#define Z90STAT_PENDINGQ_COUNT _IOR(Z90_IOCTL_MAGIC, 0x45, int) -#define Z90STAT_TOTALOPEN_COUNT _IOR(Z90_IOCTL_MAGIC, 0x46, int) -#define Z90STAT_DOMAIN_INDEX _IOR(Z90_IOCTL_MAGIC, 0x47, int) -#define Z90STAT_STATUS_MASK _IOR(Z90_IOCTL_MAGIC, 0x48, char[64]) -#define Z90STAT_QDEPTH_MASK _IOR(Z90_IOCTL_MAGIC, 0x49, char[64]) -#define Z90STAT_PERDEV_REQCNT _IOR(Z90_IOCTL_MAGIC, 0x4a, int[64]) - -#endif /* __ASM_S390_Z90CRYPT_H */ From 1534c3820c26aca4e2567f97b8add8bea40e7e2b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:25 +0200 Subject: [PATCH 0222/1063] [S390] zcrypt adjunct processor bus. Add a bus for the adjunct processor interface. Up to 64 devices can be connect to the ap bus interface, each device with 16 domains. That makes 1024 message queues. The interface is asynchronous, the answer to a message sent to a queue needs to be received at some later point in time. Unfortunately the interface does not provide interrupts when a message reply is pending. So the ap bus needs to implement some fancy polling, each active queue is polled once per 1/HZ second or continuously if an idle cpus exsists and the poll thread is activ (see poll_thread parameter). The ap bus uses the sysfs path /sys/bus/ap and has two bus attributes, ap_domain and config_time. The ap_domain selects one of the 16 domains to be used for this system. This limits the maximum number of ap devices to 64. The config_time attribute contains the number of seconds between two ap bus scans to find new devices. The ap bus uses the modalias entries of the form "ap:tN" to autoload the ap driver for hardware type N. Currently known types are: 3 - PCICC, 4 - PCICA, 5 - PCIXCC, 6 - CEX2A and 7 - CEX2C. Signed-off-by: Cornelia Huck Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 1221 +++++++++++++++++++++++++++++++ drivers/s390/crypto/ap_bus.h | 158 ++++ include/linux/mod_devicetable.h | 11 + scripts/mod/file2alias.c | 12 + 4 files changed, 1402 insertions(+) create mode 100644 drivers/s390/crypto/ap_bus.c create mode 100644 drivers/s390/crypto/ap_bus.h diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c new file mode 100644 index 000000000000..6ed0985c0c91 --- /dev/null +++ b/drivers/s390/crypto/ap_bus.c @@ -0,0 +1,1221 @@ +/* + * linux/drivers/s390/crypto/ap_bus.c + * + * Copyright (C) 2006 IBM Corporation + * Author(s): Cornelia Huck + * Martin Schwidefsky + * Ralph Wuerthner + * + * Adjunct processor bus. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ap_bus.h" + +/* Some prototypes. */ +static void ap_scan_bus(void *); +static void ap_poll_all(unsigned long); +static void ap_poll_timeout(unsigned long); +static int ap_poll_thread_start(void); +static void ap_poll_thread_stop(void); + +/** + * Module description. + */ +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("Adjunct Processor Bus driver, " + "Copyright 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); + +/** + * Module parameter + */ +int ap_domain_index = -1; /* Adjunct Processor Domain Index */ +module_param_named(domain, ap_domain_index, int, 0000); +MODULE_PARM_DESC(domain, "domain index for ap devices"); +EXPORT_SYMBOL(ap_domain_index); + +static int ap_thread_flag = 1; +module_param_named(poll_thread, ap_thread_flag, int, 0000); +MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 1 (on)."); + +static struct device *ap_root_device = NULL; + +/** + * Workqueue & timer for bus rescan. + */ +static struct workqueue_struct *ap_work_queue; +static struct timer_list ap_config_timer; +static int ap_config_time = AP_CONFIG_TIME; +static DECLARE_WORK(ap_config_work, ap_scan_bus, NULL); + +/** + * Tasklet & timer for AP request polling. + */ +static struct timer_list ap_poll_timer = TIMER_INITIALIZER(ap_poll_timeout,0,0); +static DECLARE_TASKLET(ap_tasklet, ap_poll_all, 0); +static atomic_t ap_poll_requests = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(ap_poll_wait); +static struct task_struct *ap_poll_kthread = NULL; +static DEFINE_MUTEX(ap_poll_thread_mutex); + +/** + * Test if ap instructions are available. + * + * Returns 0 if the ap instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0,0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %1,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc" ); + return reg1; +} + +/** + * Test adjunct processor queue. + * @qid: the ap queue number + * @queue_depth: pointer to queue depth value + * @device_type: pointer to device type value + * + * Returns ap queue status structure. + */ +static inline struct ap_queue_status +ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + *device_type = (int) (reg2 >> 24); + *queue_depth = (int) (reg2 & 0xff); + return reg1; +} + +/** + * Reset adjunct processor queue. + * @qid: the ap queue number + * + * Returns ap queue status structure. + */ +static inline struct ap_queue_status ap_reset_queue(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | 0x01000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + return reg1; +} + +/** + * Send message to adjunct processor queue. + * @qid: the ap queue number + * @psmid: the program supplied message identifier + * @msg: the message text + * @length: the message length + * + * Returns ap queue status structure. + * + * Condition code 1 on NQAP can't happen because the L bit is 1. + * + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status +__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +{ + typedef struct { char _[length]; } msgblock; + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = (unsigned int) psmid; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* DQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg) + : "cc" ); + return reg1; +} + +int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length) +{ + struct ap_queue_status status; + + status = __ap_send(qid, psmid, msg, length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_Q_FULL: + return -EBUSY; + default: /* Device is gone. */ + return -ENODEV; + } +} +EXPORT_SYMBOL(ap_send); + +/* + * Receive message from adjunct processor queue. + * @qid: the ap queue number + * @psmid: pointer to program supplied message identifier + * @msg: the message text + * @length: the message length + * + * Returns ap queue status structure. + * + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status +__ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) +{ + typedef struct { char _[length]; } msgblock; + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7), + "=m" (*(msgblock *) msg) : : "cc" ); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} + +int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) +{ + struct ap_queue_status status; + + status = __ap_recv(qid, psmid, msg, length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_NO_PENDING_REPLY: + if (status.queue_empty) + return -ENOENT; + return -EBUSY; + default: + return -ENODEV; + } +} +EXPORT_SYMBOL(ap_recv); + +/** + * Check if an AP queue is available. The test is repeated for + * AP_MAX_RESET times. + * @qid: the ap queue number + * @queue_depth: pointer to queue depth value + * @device_type: pointer to device type value + */ +static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type) +{ + struct ap_queue_status status; + int t_depth, t_device_type, rc, i; + + rc = -EBUSY; + for (i = 0; i < AP_MAX_RESET; i++) { + status = ap_test_queue(qid, &t_depth, &t_device_type); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + *queue_depth = t_depth + 1; + *device_type = t_device_type; + rc = 0; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + rc = -ENODEV; + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + break; + case AP_RESPONSE_DECONFIGURED: + rc = -ENODEV; + break; + case AP_RESPONSE_CHECKSTOPPED: + rc = -ENODEV; + break; + case AP_RESPONSE_BUSY: + break; + default: + BUG(); + } + if (rc != -EBUSY) + break; + if (i < AP_MAX_RESET - 1) + udelay(5); + } + return rc; +} + +/** + * Reset an AP queue and wait for it to become available again. + * @qid: the ap queue number + */ +static int ap_init_queue(ap_qid_t qid) +{ + struct ap_queue_status status; + int rc, dummy, i; + + rc = -ENODEV; + status = ap_reset_queue(qid); + for (i = 0; i < AP_MAX_RESET; i++) { + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + if (status.queue_empty) + rc = 0; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + i = AP_MAX_RESET; /* return with -ENODEV */ + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + default: + break; + } + if (rc != -ENODEV) + break; + if (i < AP_MAX_RESET - 1) { + udelay(5); + status = ap_test_queue(qid, &dummy, &dummy); + } + } + return rc; +} + +/** + * AP device related attributes. + */ +static ssize_t ap_hwtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->device_type); +} +static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL); + +static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->queue_depth); +} +static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL); + +static ssize_t ap_request_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc; + + spin_lock_bh(&ap_dev->lock); + rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->total_request_count); + spin_unlock_bh(&ap_dev->lock); + return rc; +} + +static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL); + +static ssize_t ap_modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type); +} + +static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL); + +static struct attribute *ap_dev_attrs[] = { + &dev_attr_hwtype.attr, + &dev_attr_depth.attr, + &dev_attr_request_count.attr, + &dev_attr_modalias.attr, + NULL +}; +static struct attribute_group ap_dev_attr_group = { + .attrs = ap_dev_attrs +}; + +/** + * AP bus driver registration/unregistration. + */ +static int ap_bus_match(struct device *dev, struct device_driver *drv) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = to_ap_drv(drv); + struct ap_device_id *id; + + /** + * Compare device type of the device with the list of + * supported types of the device_driver. + */ + for (id = ap_drv->ids; id->match_flags; id++) { + if ((id->match_flags & AP_DEVICE_ID_MATCH_DEVICE_TYPE) && + (id->dev_type != ap_dev->device_type)) + continue; + return 1; + } + return 0; +} + +/** + * uevent function for AP devices. It sets up a single environment + * variable DEV_TYPE which contains the hardware device type. + */ +static int ap_uevent (struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int length; + + if (!ap_dev) + return -ENODEV; + + /* Set up DEV_TYPE environment variable. */ + envp[0] = buffer; + length = scnprintf(buffer, buffer_size, "DEV_TYPE=%04X", + ap_dev->device_type); + if (buffer_size - length <= 0) + return -ENOMEM; + envp[1] = 0; + return 0; +} + +static struct bus_type ap_bus_type = { + .name = "ap", + .match = &ap_bus_match, + .uevent = &ap_uevent, +}; + +static int ap_device_probe(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = to_ap_drv(dev->driver); + int rc; + + ap_dev->drv = ap_drv; + rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV; + if (rc) + ap_dev->unregistered = 1; + return rc; +} + +/** + * Flush all requests from the request/pending queue of an AP device. + * @ap_dev: pointer to the AP device. + */ +static inline void __ap_flush_queue(struct ap_device *ap_dev) +{ + struct ap_message *ap_msg, *next; + + list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) { + list_del_init(&ap_msg->list); + ap_dev->pendingq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + } + list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) { + list_del_init(&ap_msg->list); + ap_dev->requestq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + } +} + +void ap_flush_queue(struct ap_device *ap_dev) +{ + spin_lock_bh(&ap_dev->lock); + __ap_flush_queue(ap_dev); + spin_unlock_bh(&ap_dev->lock); +} +EXPORT_SYMBOL(ap_flush_queue); + +static int ap_device_remove(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + struct ap_driver *ap_drv = ap_dev->drv; + + spin_lock_bh(&ap_dev->lock); + __ap_flush_queue(ap_dev); + /** + * set ->unregistered to 1 while holding the lock. This prevents + * new messages to be put on the queue from now on. + */ + ap_dev->unregistered = 1; + spin_unlock_bh(&ap_dev->lock); + if (ap_drv->remove) + ap_drv->remove(ap_dev); + return 0; +} + +int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, + char *name) +{ + struct device_driver *drv = &ap_drv->driver; + + drv->bus = &ap_bus_type; + drv->probe = ap_device_probe; + drv->remove = ap_device_remove; + drv->owner = owner; + drv->name = name; + return driver_register(drv); +} +EXPORT_SYMBOL(ap_driver_register); + +void ap_driver_unregister(struct ap_driver *ap_drv) +{ + driver_unregister(&ap_drv->driver); +} +EXPORT_SYMBOL(ap_driver_unregister); + +/** + * AP bus attributes. + */ +static ssize_t ap_domain_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index); +} + +static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL); + +static ssize_t ap_config_time_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time); +} + +static ssize_t ap_config_time_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int time; + + if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120) + return -EINVAL; + ap_config_time = time; + if (!timer_pending(&ap_config_timer) || + !mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ)) { + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); + } + return count; +} + +static BUS_ATTR(config_time, 0644, ap_config_time_show, ap_config_time_store); + +static ssize_t ap_poll_thread_show(struct bus_type *bus, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0); +} + +static ssize_t ap_poll_thread_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int flag, rc; + + if (sscanf(buf, "%d\n", &flag) != 1) + return -EINVAL; + if (flag) { + rc = ap_poll_thread_start(); + if (rc) + return rc; + } + else + ap_poll_thread_stop(); + return count; +} + +static BUS_ATTR(poll_thread, 0644, ap_poll_thread_show, ap_poll_thread_store); + +static struct bus_attribute *const ap_bus_attrs[] = { + &bus_attr_ap_domain, + &bus_attr_config_time, + &bus_attr_poll_thread, + NULL +}; + +/** + * Pick one of the 16 ap domains. + */ +static inline int ap_select_domain(void) +{ + int queue_depth, device_type, count, max_count, best_domain; + int rc, i, j; + + /** + * We want to use a single domain. Either the one specified with + * the "domain=" parameter or the domain with the maximum number + * of devices. + */ + if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS) + /* Domain has already been selected. */ + return 0; + best_domain = -1; + max_count = 0; + for (i = 0; i < AP_DOMAINS; i++) { + count = 0; + for (j = 0; j < AP_DEVICES; j++) { + ap_qid_t qid = AP_MKQID(j, i); + rc = ap_query_queue(qid, &queue_depth, &device_type); + if (rc) + continue; + count++; + } + if (count > max_count) { + max_count = count; + best_domain = i; + } + } + if (best_domain >= 0){ + ap_domain_index = best_domain; + return 0; + } + return -ENODEV; +} + +/** + * Find the device type if query queue returned a device type of 0. + * @ap_dev: pointer to the AP device. + */ +static int ap_probe_device_type(struct ap_device *ap_dev) +{ + static unsigned char msg[] = { + 0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x01,0x00,0x43,0x43,0x41,0x2d,0x41,0x50, + 0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01, + 0x00,0x00,0x00,0x00,0x50,0x4b,0x00,0x00, + 0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x54,0x32,0x01,0x00,0xa0,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0xb8,0x05,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00, + 0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20, + 0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53, + 0x2d,0x31,0x2e,0x32,0x37,0x00,0x11,0x22, + 0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00, + 0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88, + 0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66, + 0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44, + 0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22, + 0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00, + 0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77, + 0x88,0x1e,0x00,0x00,0x57,0x00,0x00,0x00, + 0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00, + 0x03,0x02,0x00,0x00,0x40,0x01,0x00,0x01, + 0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c, + 0xf6,0xd2,0x7b,0x58,0x4b,0xf9,0x28,0x68, + 0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66, + 0x63,0x42,0xef,0xf8,0xfd,0xa4,0xf8,0xb0, + 0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8, + 0x53,0x8c,0x6f,0x4e,0x72,0x8f,0x6c,0x04, + 0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57, + 0xf7,0xdd,0xfd,0x4f,0x11,0x36,0x95,0x5d, + }; + struct ap_queue_status status; + unsigned long long psmid; + char *reply; + int rc, i; + + reply = (void *) get_zeroed_page(GFP_KERNEL); + if (!reply) { + rc = -ENOMEM; + goto out; + } + + status = __ap_send(ap_dev->qid, 0x0102030405060708ULL, + msg, sizeof(msg)); + if (status.response_code != AP_RESPONSE_NORMAL) { + rc = -ENODEV; + goto out_free; + } + + /* Wait for the test message to complete. */ + for (i = 0; i < 6; i++) { + mdelay(300); + status = __ap_recv(ap_dev->qid, &psmid, reply, 4096); + if (status.response_code == AP_RESPONSE_NORMAL && + psmid == 0x0102030405060708ULL) + break; + } + if (i < 6) { + /* Got an answer. */ + if (reply[0] == 0x00 && reply[1] == 0x86) + ap_dev->device_type = AP_DEVICE_TYPE_PCICC; + else + ap_dev->device_type = AP_DEVICE_TYPE_PCICA; + rc = 0; + } else + rc = -ENODEV; + +out_free: + free_page((unsigned long) reply); +out: + return rc; +} + +/** + * Scan the ap bus for new devices. + */ +static int __ap_scan_bus(struct device *dev, void *data) +{ + return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data; +} + +static void ap_device_release(struct device *dev) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + + kfree(ap_dev); +} + +static void ap_scan_bus(void *data) +{ + struct ap_device *ap_dev; + struct device *dev; + ap_qid_t qid; + int queue_depth, device_type; + int rc, i; + + if (ap_select_domain() != 0) + return; + for (i = 0; i < AP_DEVICES; i++) { + qid = AP_MKQID(i, ap_domain_index); + dev = bus_find_device(&ap_bus_type, NULL, + (void *)(unsigned long)qid, + __ap_scan_bus); + if (dev) { + put_device(dev); + continue; + } + rc = ap_query_queue(qid, &queue_depth, &device_type); + if (rc) + continue; + rc = ap_init_queue(qid); + if (rc) + continue; + ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL); + if (!ap_dev) + break; + ap_dev->qid = qid; + ap_dev->queue_depth = queue_depth; + spin_lock_init(&ap_dev->lock); + INIT_LIST_HEAD(&ap_dev->pendingq); + INIT_LIST_HEAD(&ap_dev->requestq); + if (device_type == 0) + ap_probe_device_type(ap_dev); + else + ap_dev->device_type = device_type; + + ap_dev->device.bus = &ap_bus_type; + ap_dev->device.parent = ap_root_device; + snprintf(ap_dev->device.bus_id, BUS_ID_SIZE, "card%02x", + AP_QID_DEVICE(ap_dev->qid)); + ap_dev->device.release = ap_device_release; + rc = device_register(&ap_dev->device); + if (rc) { + kfree(ap_dev); + continue; + } + /* Add device attributes. */ + rc = sysfs_create_group(&ap_dev->device.kobj, + &ap_dev_attr_group); + if (rc) + device_unregister(&ap_dev->device); + } +} + +static void +ap_config_timeout(unsigned long ptr) +{ + queue_work(ap_work_queue, &ap_config_work); + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); +} + +/** + * Set up the timer to run the poll tasklet + */ +static inline void ap_schedule_poll_timer(void) +{ + if (timer_pending(&ap_poll_timer)) + return; + mod_timer(&ap_poll_timer, jiffies + AP_POLL_TIME); +} + +/** + * Receive pending reply messages from an AP device. + * @ap_dev: pointer to the AP device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0 if the device is still present, -ENODEV if not. + */ +static inline int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags) +{ + struct ap_queue_status status; + struct ap_message *ap_msg; + + if (ap_dev->queue_count <= 0) + return 0; + status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid, + ap_dev->reply->message, ap_dev->reply->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + atomic_dec(&ap_poll_requests); + ap_dev->queue_count--; + list_for_each_entry(ap_msg, &ap_dev->pendingq, list) { + if (ap_msg->psmid != ap_dev->reply->psmid) + continue; + list_del_init(&ap_msg->list); + ap_dev->pendingq_count--; + ap_dev->drv->receive(ap_dev, ap_msg, ap_dev->reply); + break; + } + if (ap_dev->queue_count > 0) + *flags |= 1; + break; + case AP_RESPONSE_NO_PENDING_REPLY: + if (status.queue_empty) { + /* The card shouldn't forget requests but who knows. */ + ap_dev->queue_count = 0; + list_splice_init(&ap_dev->pendingq, &ap_dev->requestq); + ap_dev->requestq_count += ap_dev->pendingq_count; + ap_dev->pendingq_count = 0; + } else + *flags |= 2; + break; + default: + return -ENODEV; + } + return 0; +} + +/** + * Send messages from the request queue to an AP device. + * @ap_dev: pointer to the AP device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0 if the device is still present, -ENODEV if not. + */ +static inline int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) +{ + struct ap_queue_status status; + struct ap_message *ap_msg; + + if (ap_dev->requestq_count <= 0 || + ap_dev->queue_count >= ap_dev->queue_depth) + return 0; + /* Start the next request on the queue. */ + ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list); + status = __ap_send(ap_dev->qid, ap_msg->psmid, + ap_msg->message, ap_msg->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + atomic_inc(&ap_poll_requests); + ap_dev->queue_count++; + list_move_tail(&ap_msg->list, &ap_dev->pendingq); + ap_dev->requestq_count--; + ap_dev->pendingq_count++; + if (ap_dev->queue_count < ap_dev->queue_depth && + ap_dev->requestq_count > 0) + *flags |= 1; + *flags |= 2; + break; + case AP_RESPONSE_Q_FULL: + *flags |= 2; + break; + case AP_RESPONSE_MESSAGE_TOO_BIG: + return -EINVAL; + default: + return -ENODEV; + } + return 0; +} + +/** + * Poll AP device for pending replies and send new messages. If either + * ap_poll_read or ap_poll_write returns -ENODEV unregister the device. + * @ap_dev: pointer to the bus device + * @flags: pointer to control flags, bit 2^0 is set if another poll is + * required, bit 2^1 is set if the poll timer needs to get armed + * Returns 0. + */ +static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags) +{ + int rc; + + rc = ap_poll_read(ap_dev, flags); + if (rc) + return rc; + return ap_poll_write(ap_dev, flags); +} + +/** + * Queue a message to a device. + * @ap_dev: pointer to the AP device + * @ap_msg: the message to be queued + */ +static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + struct ap_queue_status status; + + if (list_empty(&ap_dev->requestq) && + ap_dev->queue_count < ap_dev->queue_depth) { + status = __ap_send(ap_dev->qid, ap_msg->psmid, + ap_msg->message, ap_msg->length); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + list_add_tail(&ap_msg->list, &ap_dev->pendingq); + atomic_inc(&ap_poll_requests); + ap_dev->pendingq_count++; + ap_dev->queue_count++; + ap_dev->total_request_count++; + break; + case AP_RESPONSE_Q_FULL: + list_add_tail(&ap_msg->list, &ap_dev->requestq); + ap_dev->requestq_count++; + ap_dev->total_request_count++; + return -EBUSY; + case AP_RESPONSE_MESSAGE_TOO_BIG: + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-EINVAL)); + return -EINVAL; + default: /* Device is gone. */ + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + return -ENODEV; + } + } else { + list_add_tail(&ap_msg->list, &ap_dev->requestq); + ap_dev->requestq_count++; + ap_dev->total_request_count++; + return -EBUSY; + } + ap_schedule_poll_timer(); + return 0; +} + +void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + unsigned long flags; + int rc; + + spin_lock_bh(&ap_dev->lock); + if (!ap_dev->unregistered) { + /* Make room on the queue by polling for finished requests. */ + rc = ap_poll_queue(ap_dev, &flags); + if (!rc) + rc = __ap_queue_message(ap_dev, ap_msg); + if (!rc) + wake_up(&ap_poll_wait); + } else { + ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV)); + rc = 0; + } + spin_unlock_bh(&ap_dev->lock); + if (rc == -ENODEV) + device_unregister(&ap_dev->device); +} +EXPORT_SYMBOL(ap_queue_message); + +/** + * Cancel a crypto request. This is done by removing the request + * from the devive pendingq or requestq queue. Note that the + * request stays on the AP queue. When it finishes the message + * reply will be discarded because the psmid can't be found. + * @ap_dev: AP device that has the message queued + * @ap_msg: the message that is to be removed + */ +void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg) +{ + struct ap_message *tmp; + + spin_lock_bh(&ap_dev->lock); + if (!list_empty(&ap_msg->list)) { + list_for_each_entry(tmp, &ap_dev->pendingq, list) + if (tmp->psmid == ap_msg->psmid) { + ap_dev->pendingq_count--; + goto found; + } + ap_dev->requestq_count--; + found: + list_del_init(&ap_msg->list); + } + spin_unlock_bh(&ap_dev->lock); +} +EXPORT_SYMBOL(ap_cancel_message); + +/** + * AP receive polling for finished AP requests + */ +static void ap_poll_timeout(unsigned long unused) +{ + tasklet_schedule(&ap_tasklet); +} + +/** + * Poll all AP devices on the bus in a round robin fashion. Continue + * polling until bit 2^0 of the control flags is not set. If bit 2^1 + * of the control flags has been set arm the poll timer. + */ +static int __ap_poll_all(struct device *dev, void *data) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc; + + spin_lock(&ap_dev->lock); + if (!ap_dev->unregistered) { + rc = ap_poll_queue(to_ap_dev(dev), (unsigned long *) data); + } else + rc = 0; + spin_unlock(&ap_dev->lock); + if (rc) + device_unregister(&ap_dev->device); + return 0; +} + +static void ap_poll_all(unsigned long dummy) +{ + unsigned long flags; + + do { + flags = 0; + bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all); + } while (flags & 1); + if (flags & 2) + ap_schedule_poll_timer(); +} + +/** + * AP bus poll thread. The purpose of this thread is to poll for + * finished requests in a loop if there is a "free" cpu - that is + * a cpu that doesn't have anything better to do. The polling stops + * as soon as there is another task or if all messages have been + * delivered. + */ +static int ap_poll_thread(void *data) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + int requests; + + set_user_nice(current, -20); + while (1) { + if (need_resched()) { + schedule(); + continue; + } + add_wait_queue(&ap_poll_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + requests = atomic_read(&ap_poll_requests); + if (requests <= 0) + schedule(); + set_current_state(TASK_RUNNING); + remove_wait_queue(&ap_poll_wait, &wait); + + local_bh_disable(); + flags = 0; + bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all); + local_bh_enable(); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&ap_poll_wait, &wait); + return 0; +} + +static int ap_poll_thread_start(void) +{ + int rc; + + mutex_lock(&ap_poll_thread_mutex); + if (!ap_poll_kthread) { + ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll"); + rc = IS_ERR(ap_poll_kthread) ? PTR_ERR(ap_poll_kthread) : 0; + if (rc) + ap_poll_kthread = NULL; + } + else + rc = 0; + mutex_unlock(&ap_poll_thread_mutex); + return rc; +} + +static void ap_poll_thread_stop(void) +{ + mutex_lock(&ap_poll_thread_mutex); + if (ap_poll_kthread) { + kthread_stop(ap_poll_kthread); + ap_poll_kthread = NULL; + } + mutex_unlock(&ap_poll_thread_mutex); +} + +/** + * The module initialization code. + */ +int __init ap_module_init(void) +{ + int rc, i; + + if (ap_domain_index < -1 || ap_domain_index >= AP_DOMAINS) { + printk(KERN_WARNING "Invalid param: domain = %d. " + " Not loading.\n", ap_domain_index); + return -EINVAL; + } + if (ap_instructions_available() != 0) { + printk(KERN_WARNING "AP instructions not installed.\n"); + return -ENODEV; + } + + /* Create /sys/bus/ap. */ + rc = bus_register(&ap_bus_type); + if (rc) + goto out; + for (i = 0; ap_bus_attrs[i]; i++) { + rc = bus_create_file(&ap_bus_type, ap_bus_attrs[i]); + if (rc) + goto out_bus; + } + + /* Create /sys/devices/ap. */ + ap_root_device = s390_root_dev_register("ap"); + rc = IS_ERR(ap_root_device) ? PTR_ERR(ap_root_device) : 0; + if (rc) + goto out_bus; + + ap_work_queue = create_singlethread_workqueue("kapwork"); + if (!ap_work_queue) { + rc = -ENOMEM; + goto out_root; + } + + if (ap_select_domain() == 0) + ap_scan_bus(NULL); + + /* Setup the ap bus rescan timer. */ + init_timer(&ap_config_timer); + ap_config_timer.function = ap_config_timeout; + ap_config_timer.data = 0; + ap_config_timer.expires = jiffies + ap_config_time * HZ; + add_timer(&ap_config_timer); + + /* Start the low priority AP bus poll thread. */ + if (ap_thread_flag) { + rc = ap_poll_thread_start(); + if (rc) + goto out_work; + } + + return 0; + +out_work: + del_timer_sync(&ap_config_timer); + del_timer_sync(&ap_poll_timer); + destroy_workqueue(ap_work_queue); +out_root: + s390_root_dev_unregister(ap_root_device); +out_bus: + while (i--) + bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); + bus_unregister(&ap_bus_type); +out: + return rc; +} + +static int __ap_match_all(struct device *dev, void *data) +{ + return 1; +} + +/** + * The module termination code + */ +void ap_module_exit(void) +{ + int i; + struct device *dev; + + ap_poll_thread_stop(); + del_timer_sync(&ap_config_timer); + del_timer_sync(&ap_poll_timer); + destroy_workqueue(ap_work_queue); + s390_root_dev_unregister(ap_root_device); + while ((dev = bus_find_device(&ap_bus_type, NULL, NULL, + __ap_match_all))) + { + device_unregister(dev); + put_device(dev); + } + for (i = 0; ap_bus_attrs[i]; i++) + bus_remove_file(&ap_bus_type, ap_bus_attrs[i]); + bus_unregister(&ap_bus_type); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(ap_module_init); +module_exit(ap_module_exit); +#endif diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h new file mode 100644 index 000000000000..83b69c01cd6e --- /dev/null +++ b/drivers/s390/crypto/ap_bus.h @@ -0,0 +1,158 @@ +/* + * linux/drivers/s390/crypto/ap_bus.h + * + * Copyright (C) 2006 IBM Corporation + * Author(s): Cornelia Huck + * Martin Schwidefsky + * Ralph Wuerthner + * + * Adjunct processor bus header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _AP_BUS_H_ +#define _AP_BUS_H_ + +#include +#include +#include + +#define AP_DEVICES 64 /* Number of AP devices. */ +#define AP_DOMAINS 16 /* Number of AP domains. */ +#define AP_MAX_RESET 90 /* Maximum number of resets. */ +#define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ +#define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ + +extern int ap_domain_index; + +/** + * The ap_qid_t identifier of an ap queue. It contains a + * 6 bit device index and a 4 bit queue index (domain). + */ +typedef unsigned int ap_qid_t; + +#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15)) +#define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63) +#define AP_QID_QUEUE(_qid) ((_qid) & 15) + +/** + * The ap queue status word is returned by all three AP functions + * (PQAP, NQAP and DQAP). There's a set of flags in the first + * byte, followed by a 1 byte response code. + */ +struct ap_queue_status { + unsigned int queue_empty : 1; + unsigned int replies_waiting : 1; + unsigned int queue_full : 1; + unsigned int pad1 : 5; + unsigned int response_code : 8; + unsigned int pad2 : 16; +}; + +#define AP_RESPONSE_NORMAL 0x00 +#define AP_RESPONSE_Q_NOT_AVAIL 0x01 +#define AP_RESPONSE_RESET_IN_PROGRESS 0x02 +#define AP_RESPONSE_DECONFIGURED 0x03 +#define AP_RESPONSE_CHECKSTOPPED 0x04 +#define AP_RESPONSE_BUSY 0x05 +#define AP_RESPONSE_Q_FULL 0x10 +#define AP_RESPONSE_NO_PENDING_REPLY 0x10 +#define AP_RESPONSE_INDEX_TOO_BIG 0x11 +#define AP_RESPONSE_NO_FIRST_PART 0x13 +#define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 + +/** + * Known device types + */ +#define AP_DEVICE_TYPE_PCICC 3 +#define AP_DEVICE_TYPE_PCICA 4 +#define AP_DEVICE_TYPE_PCIXCC 5 +#define AP_DEVICE_TYPE_CEX2A 6 +#define AP_DEVICE_TYPE_CEX2C 7 + +struct ap_device; +struct ap_message; + +struct ap_driver { + struct device_driver driver; + struct ap_device_id *ids; + + int (*probe)(struct ap_device *); + void (*remove)(struct ap_device *); + /* receive is called from tasklet context */ + void (*receive)(struct ap_device *, struct ap_message *, + struct ap_message *); +}; + +#define to_ap_drv(x) container_of((x), struct ap_driver, driver) + +int ap_driver_register(struct ap_driver *, struct module *, char *); +void ap_driver_unregister(struct ap_driver *); + +struct ap_device { + struct device device; + struct ap_driver *drv; /* Pointer to AP device driver. */ + spinlock_t lock; /* Per device lock. */ + + ap_qid_t qid; /* AP queue id. */ + int queue_depth; /* AP queue depth.*/ + int device_type; /* AP device type. */ + int unregistered; /* marks AP device as unregistered */ + + int queue_count; /* # messages currently on AP queue. */ + + struct list_head pendingq; /* List of message sent to AP queue. */ + int pendingq_count; /* # requests on pendingq list. */ + struct list_head requestq; /* List of message yet to be sent. */ + int requestq_count; /* # requests on requestq list. */ + int total_request_count; /* # requests ever for this AP device. */ + + struct ap_message *reply; /* Per device reply message. */ + + void *private; /* ap driver private pointer. */ +}; + +#define to_ap_dev(x) container_of((x), struct ap_device, device) + +struct ap_message { + struct list_head list; /* Request queueing. */ + unsigned long long psmid; /* Message id. */ + void *message; /* Pointer to message buffer. */ + size_t length; /* Message length. */ + + void *private; /* ap driver private pointer. */ +}; + +#define AP_DEVICE(dt) \ + .dev_type=(dt), \ + .match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE, + +/** + * Note: don't use ap_send/ap_recv after using ap_queue_message + * for the first time. Otherwise the ap message queue will get + * confused. + */ +int ap_send(ap_qid_t, unsigned long long, void *, size_t); +int ap_recv(ap_qid_t, unsigned long long *, void *, size_t); + +void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg); +void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg); +void ap_flush_queue(struct ap_device *ap_dev); + +int ap_module_init(void); +void ap_module_exit(void); + +#endif /* _AP_BUS_H_ */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f6977708585c..f7ca0b09075d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -148,6 +148,17 @@ struct ccw_device_id { #define CCW_DEVICE_ID_MATCH_DEVICE_TYPE 0x04 #define CCW_DEVICE_ID_MATCH_DEVICE_MODEL 0x08 +/* s390 AP bus devices */ +struct ap_device_id { + __u16 match_flags; /* which fields to match against */ + __u8 dev_type; /* device type */ + __u8 pad1; + __u32 pad2; + kernel_ulong_t driver_info; +}; + +#define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 + #define PNP_ID_LEN 8 #define PNP_MAX_DEVICES 8 diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index e2de650d3dbf..de76da80443f 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -265,6 +265,14 @@ static int do_ccw_entry(const char *filename, return 1; } +/* looks like: "ap:tN" */ +static int do_ap_entry(const char *filename, + struct ap_device_id *id, char *alias) +{ + sprintf(alias, "ap:t%02X", id->dev_type); + return 1; +} + /* Looks like: "serio:tyNprNidNexN" */ static int do_serio_entry(const char *filename, struct serio_device_id *id, char *alias) @@ -503,6 +511,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, do_table(symval, sym->st_size, sizeof(struct ccw_device_id), "ccw", do_ccw_entry, mod); + else if (sym_is(symname, "__mod_ap_device_table")) + do_table(symval, sym->st_size, + sizeof(struct ap_device_id), "ap", + do_ap_entry, mod); else if (sym_is(symname, "__mod_serio_device_table")) do_table(symval, sym->st_size, sizeof(struct serio_device_id), "serio", From 2dbc2418bac32a18a372ae9aec386f0fe9174389 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:27 +0200 Subject: [PATCH 0223/1063] [S390] zcrypt user space interface. The user space interface of the zcrypt device driver implements the old user space interface as defined by the old z90crypt driver. Everything is there, the /dev/z90crypt misc character device, all the lovely ioctls and the /proc file. Even writing to the z90crypt proc file to configure the crypto device still works. It stands to reason to remove the proc write function someday since a much cleaner configuration via the sysfs is now available. The ap bus device drivers register crypto cards to the zcrypt user space interface. The request router of the user space interface picks one of the registered cards based on the predicted latency for the request and calls the driver via a callback found in the zcrypt_ops of the device. The request router only knows which operations the card can do and the minimum / maximum number of bits a request can have. Signed-off-by: Cornelia Huck Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 981 +++++++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_api.h | 140 +++++ include/asm-s390/zcrypt.h | 207 +++++++ 3 files changed, 1328 insertions(+) create mode 100644 drivers/s390/crypto/zcrypt_api.c create mode 100644 drivers/s390/crypto/zcrypt_api.h create mode 100644 include/asm-s390/zcrypt.h diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c new file mode 100644 index 000000000000..b3fe003b3d2d --- /dev/null +++ b/drivers/s390/crypto/zcrypt_api.c @@ -0,0 +1,981 @@ +/* + * linux/drivers/s390/crypto/zcrypt_api.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * Cornelia Huck + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zcrypt_api.h" + +/** + * Module description. + */ +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("Cryptographic Coprocessor interface, " + "Copyright 2001, 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); + +static DEFINE_SPINLOCK(zcrypt_device_lock); +static LIST_HEAD(zcrypt_device_list); +static int zcrypt_device_count = 0; +static atomic_t zcrypt_open_count = ATOMIC_INIT(0); + +/** + * Device attributes common for all crypto devices. + */ +static ssize_t zcrypt_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zcrypt_device *zdev = to_ap_dev(dev)->private; + return snprintf(buf, PAGE_SIZE, "%s\n", zdev->type_string); +} + +static DEVICE_ATTR(type, 0444, zcrypt_type_show, NULL); + +static ssize_t zcrypt_online_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zcrypt_device *zdev = to_ap_dev(dev)->private; + return snprintf(buf, PAGE_SIZE, "%d\n", zdev->online); +} + +static ssize_t zcrypt_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct zcrypt_device *zdev = to_ap_dev(dev)->private; + int online; + + if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) + return -EINVAL; + zdev->online = online; + if (!online) + ap_flush_queue(zdev->ap_dev); + return count; +} + +static DEVICE_ATTR(online, 0644, zcrypt_online_show, zcrypt_online_store); + +static struct attribute * zcrypt_device_attrs[] = { + &dev_attr_type.attr, + &dev_attr_online.attr, + NULL, +}; + +static struct attribute_group zcrypt_device_attr_group = { + .attrs = zcrypt_device_attrs, +}; + +/** + * Move the device towards the head of the device list. + * Need to be called while holding the zcrypt device list lock. + * Note: cards with speed_rating of 0 are kept at the end of the list. + */ +static void __zcrypt_increase_preference(struct zcrypt_device *zdev) +{ + struct zcrypt_device *tmp; + struct list_head *l; + + if (zdev->speed_rating == 0) + return; + for (l = zdev->list.prev; l != &zcrypt_device_list; l = l->prev) { + tmp = list_entry(l, struct zcrypt_device, list); + if ((tmp->request_count + 1) * tmp->speed_rating <= + (zdev->request_count + 1) * zdev->speed_rating && + tmp->speed_rating != 0) + break; + } + if (l == zdev->list.prev) + return; + /* Move zdev behind l */ + list_del(&zdev->list); + list_add(&zdev->list, l); +} + +/** + * Move the device towards the tail of the device list. + * Need to be called while holding the zcrypt device list lock. + * Note: cards with speed_rating of 0 are kept at the end of the list. + */ +static void __zcrypt_decrease_preference(struct zcrypt_device *zdev) +{ + struct zcrypt_device *tmp; + struct list_head *l; + + if (zdev->speed_rating == 0) + return; + for (l = zdev->list.next; l != &zcrypt_device_list; l = l->next) { + tmp = list_entry(l, struct zcrypt_device, list); + if ((tmp->request_count + 1) * tmp->speed_rating > + (zdev->request_count + 1) * zdev->speed_rating || + tmp->speed_rating == 0) + break; + } + if (l == zdev->list.next) + return; + /* Move zdev before l */ + list_del(&zdev->list); + list_add_tail(&zdev->list, l); +} + +static void zcrypt_device_release(struct kref *kref) +{ + struct zcrypt_device *zdev = + container_of(kref, struct zcrypt_device, refcount); + zcrypt_device_free(zdev); +} + +void zcrypt_device_get(struct zcrypt_device *zdev) +{ + kref_get(&zdev->refcount); +} +EXPORT_SYMBOL(zcrypt_device_get); + +int zcrypt_device_put(struct zcrypt_device *zdev) +{ + return kref_put(&zdev->refcount, zcrypt_device_release); +} +EXPORT_SYMBOL(zcrypt_device_put); + +struct zcrypt_device *zcrypt_device_alloc(size_t max_response_size) +{ + struct zcrypt_device *zdev; + + zdev = kzalloc(sizeof(struct zcrypt_device), GFP_KERNEL); + if (!zdev) + return NULL; + zdev->reply.message = kmalloc(max_response_size, GFP_KERNEL); + if (!zdev->reply.message) + goto out_free; + zdev->reply.length = max_response_size; + spin_lock_init(&zdev->lock); + INIT_LIST_HEAD(&zdev->list); + return zdev; + +out_free: + kfree(zdev); + return NULL; +} +EXPORT_SYMBOL(zcrypt_device_alloc); + +void zcrypt_device_free(struct zcrypt_device *zdev) +{ + kfree(zdev->reply.message); + kfree(zdev); +} +EXPORT_SYMBOL(zcrypt_device_free); + +/** + * Register a crypto device. + */ +int zcrypt_device_register(struct zcrypt_device *zdev) +{ + int rc; + + rc = sysfs_create_group(&zdev->ap_dev->device.kobj, + &zcrypt_device_attr_group); + if (rc) + goto out; + get_device(&zdev->ap_dev->device); + kref_init(&zdev->refcount); + spin_lock_bh(&zcrypt_device_lock); + zdev->online = 1; /* New devices are online by default. */ + list_add_tail(&zdev->list, &zcrypt_device_list); + __zcrypt_increase_preference(zdev); + zcrypt_device_count++; + spin_unlock_bh(&zcrypt_device_lock); +out: + return rc; +} +EXPORT_SYMBOL(zcrypt_device_register); + +/** + * Unregister a crypto device. + */ +void zcrypt_device_unregister(struct zcrypt_device *zdev) +{ + spin_lock_bh(&zcrypt_device_lock); + zcrypt_device_count--; + list_del_init(&zdev->list); + spin_unlock_bh(&zcrypt_device_lock); + sysfs_remove_group(&zdev->ap_dev->device.kobj, + &zcrypt_device_attr_group); + put_device(&zdev->ap_dev->device); + zcrypt_device_put(zdev); +} +EXPORT_SYMBOL(zcrypt_device_unregister); + +/** + * zcrypt_read is not be supported beyond zcrypt 1.3.1 + */ +static ssize_t zcrypt_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + return -EPERM; +} + +/** + * Write is is not allowed + */ +static ssize_t zcrypt_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + return -EPERM; +} + +/** + * Device open/close functions to count number of users. + */ +static int zcrypt_open(struct inode *inode, struct file *filp) +{ + atomic_inc(&zcrypt_open_count); + return 0; +} + +static int zcrypt_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&zcrypt_open_count); + return 0; +} + +/** + * zcrypt ioctls. + */ +static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex) +{ + struct zcrypt_device *zdev; + int rc; + + if (mex->outputdatalength < mex->inputdatalength) + return -EINVAL; + /** + * As long as outputdatalength is big enough, we can set the + * outputdatalength equal to the inputdatalength, since that is the + * number of bytes we will copy in any case + */ + mex->outputdatalength = mex->inputdatalength; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + if (!zdev->online || + !zdev->ops->rsa_modexpo || + zdev->min_mod_size > mex->inputdatalength || + zdev->max_mod_size < mex->inputdatalength) + continue; + zcrypt_device_get(zdev); + get_device(&zdev->ap_dev->device); + zdev->request_count++; + __zcrypt_decrease_preference(zdev); + spin_unlock_bh(&zcrypt_device_lock); + if (try_module_get(zdev->ap_dev->drv->driver.owner)) { + rc = zdev->ops->rsa_modexpo(zdev, mex); + module_put(zdev->ap_dev->drv->driver.owner); + } + else + rc = -EAGAIN; + spin_lock_bh(&zcrypt_device_lock); + zdev->request_count--; + __zcrypt_increase_preference(zdev); + put_device(&zdev->ap_dev->device); + zcrypt_device_put(zdev); + spin_unlock_bh(&zcrypt_device_lock); + return rc; + } + spin_unlock_bh(&zcrypt_device_lock); + return -ENODEV; +} + +static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt) +{ + struct zcrypt_device *zdev; + unsigned long long z1, z2, z3; + int rc, copied; + + if (crt->outputdatalength < crt->inputdatalength || + (crt->inputdatalength & 1)) + return -EINVAL; + /** + * As long as outputdatalength is big enough, we can set the + * outputdatalength equal to the inputdatalength, since that is the + * number of bytes we will copy in any case + */ + crt->outputdatalength = crt->inputdatalength; + + copied = 0; + restart: + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + if (!zdev->online || + !zdev->ops->rsa_modexpo_crt || + zdev->min_mod_size > crt->inputdatalength || + zdev->max_mod_size < crt->inputdatalength) + continue; + if (zdev->short_crt && crt->inputdatalength > 240) { + /** + * Check inputdata for leading zeros for cards + * that can't handle np_prime, bp_key, or + * u_mult_inv > 128 bytes. + */ + if (copied == 0) { + int len; + spin_unlock_bh(&zcrypt_device_lock); + /* len is max 256 / 2 - 120 = 8 */ + len = crt->inputdatalength / 2 - 120; + z1 = z2 = z3 = 0; + if (copy_from_user(&z1, crt->np_prime, len) || + copy_from_user(&z2, crt->bp_key, len) || + copy_from_user(&z3, crt->u_mult_inv, len)) + return -EFAULT; + copied = 1; + /** + * We have to restart device lookup - + * the device list may have changed by now. + */ + goto restart; + } + if (z1 != 0ULL || z2 != 0ULL || z3 != 0ULL) + /* The device can't handle this request. */ + continue; + } + zcrypt_device_get(zdev); + get_device(&zdev->ap_dev->device); + zdev->request_count++; + __zcrypt_decrease_preference(zdev); + spin_unlock_bh(&zcrypt_device_lock); + if (try_module_get(zdev->ap_dev->drv->driver.owner)) { + rc = zdev->ops->rsa_modexpo_crt(zdev, crt); + module_put(zdev->ap_dev->drv->driver.owner); + } + else + rc = -EAGAIN; + spin_lock_bh(&zcrypt_device_lock); + zdev->request_count--; + __zcrypt_increase_preference(zdev); + put_device(&zdev->ap_dev->device); + zcrypt_device_put(zdev); + spin_unlock_bh(&zcrypt_device_lock); + return rc; + } + spin_unlock_bh(&zcrypt_device_lock); + return -ENODEV; +} + +static void zcrypt_status_mask(char status[AP_DEVICES]) +{ + struct zcrypt_device *zdev; + + memset(status, 0, sizeof(char) * AP_DEVICES); + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) + status[AP_QID_DEVICE(zdev->ap_dev->qid)] = + zdev->online ? zdev->user_space_type : 0x0d; + spin_unlock_bh(&zcrypt_device_lock); +} + +static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES]) +{ + struct zcrypt_device *zdev; + + memset(qdepth, 0, sizeof(char) * AP_DEVICES); + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + spin_lock(&zdev->ap_dev->lock); + qdepth[AP_QID_DEVICE(zdev->ap_dev->qid)] = + zdev->ap_dev->pendingq_count + + zdev->ap_dev->requestq_count; + spin_unlock(&zdev->ap_dev->lock); + } + spin_unlock_bh(&zcrypt_device_lock); +} + +static void zcrypt_perdev_reqcnt(int reqcnt[AP_DEVICES]) +{ + struct zcrypt_device *zdev; + + memset(reqcnt, 0, sizeof(int) * AP_DEVICES); + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + spin_lock(&zdev->ap_dev->lock); + reqcnt[AP_QID_DEVICE(zdev->ap_dev->qid)] = + zdev->ap_dev->total_request_count; + spin_unlock(&zdev->ap_dev->lock); + } + spin_unlock_bh(&zcrypt_device_lock); +} + +static int zcrypt_pendingq_count(void) +{ + struct zcrypt_device *zdev; + int pendingq_count = 0; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + spin_lock(&zdev->ap_dev->lock); + pendingq_count += zdev->ap_dev->pendingq_count; + spin_unlock(&zdev->ap_dev->lock); + } + spin_unlock_bh(&zcrypt_device_lock); + return pendingq_count; +} + +static int zcrypt_requestq_count(void) +{ + struct zcrypt_device *zdev; + int requestq_count = 0; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + spin_lock(&zdev->ap_dev->lock); + requestq_count += zdev->ap_dev->requestq_count; + spin_unlock(&zdev->ap_dev->lock); + } + spin_unlock_bh(&zcrypt_device_lock); + return requestq_count; +} + +static int zcrypt_count_type(int type) +{ + struct zcrypt_device *zdev; + int device_count = 0; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) + if (zdev->user_space_type == type) + device_count++; + spin_unlock_bh(&zcrypt_device_lock); + return device_count; +} + +/** + * Old, deprecated combi status call. + */ +static long zcrypt_ica_status(struct file *filp, unsigned long arg) +{ + struct ica_z90_status *pstat; + int ret; + + pstat = kzalloc(sizeof(*pstat), GFP_KERNEL); + if (!pstat) + return -ENOMEM; + pstat->totalcount = zcrypt_device_count; + pstat->leedslitecount = zcrypt_count_type(ZCRYPT_PCICA); + pstat->leeds2count = zcrypt_count_type(ZCRYPT_PCICC); + pstat->requestqWaitCount = zcrypt_requestq_count(); + pstat->pendingqWaitCount = zcrypt_pendingq_count(); + pstat->totalOpenCount = atomic_read(&zcrypt_open_count); + pstat->cryptoDomain = ap_domain_index; + zcrypt_status_mask(pstat->status); + zcrypt_qdepth_mask(pstat->qdepth); + ret = 0; + if (copy_to_user((void __user *) arg, pstat, sizeof(*pstat))) + ret = -EFAULT; + kfree(pstat); + return ret; +} + +static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int rc; + + switch (cmd) { + case ICARSAMODEXPO: { + struct ica_rsa_modexpo __user *umex = (void __user *) arg; + struct ica_rsa_modexpo mex; + if (copy_from_user(&mex, umex, sizeof(mex))) + return -EFAULT; + do { + rc = zcrypt_rsa_modexpo(&mex); + } while (rc == -EAGAIN); + if (rc) + return rc; + return put_user(mex.outputdatalength, &umex->outputdatalength); + } + case ICARSACRT: { + struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg; + struct ica_rsa_modexpo_crt crt; + if (copy_from_user(&crt, ucrt, sizeof(crt))) + return -EFAULT; + do { + rc = zcrypt_rsa_crt(&crt); + } while (rc == -EAGAIN); + if (rc) + return rc; + return put_user(crt.outputdatalength, &ucrt->outputdatalength); + } + case Z90STAT_STATUS_MASK: { + char status[AP_DEVICES]; + zcrypt_status_mask(status); + if (copy_to_user((char __user *) arg, status, + sizeof(char) * AP_DEVICES)) + return -EFAULT; + return 0; + } + case Z90STAT_QDEPTH_MASK: { + char qdepth[AP_DEVICES]; + zcrypt_qdepth_mask(qdepth); + if (copy_to_user((char __user *) arg, qdepth, + sizeof(char) * AP_DEVICES)) + return -EFAULT; + return 0; + } + case Z90STAT_PERDEV_REQCNT: { + int reqcnt[AP_DEVICES]; + zcrypt_perdev_reqcnt(reqcnt); + if (copy_to_user((int __user *) arg, reqcnt, + sizeof(int) * AP_DEVICES)) + return -EFAULT; + return 0; + } + case Z90STAT_REQUESTQ_COUNT: + return put_user(zcrypt_requestq_count(), (int __user *) arg); + case Z90STAT_PENDINGQ_COUNT: + return put_user(zcrypt_pendingq_count(), (int __user *) arg); + case Z90STAT_TOTALOPEN_COUNT: + return put_user(atomic_read(&zcrypt_open_count), + (int __user *) arg); + case Z90STAT_DOMAIN_INDEX: + return put_user(ap_domain_index, (int __user *) arg); + /** + * Deprecated ioctls. Don't add another device count ioctl, + * you can count them yourself in the user space with the + * output of the Z90STAT_STATUS_MASK ioctl. + */ + case ICAZ90STATUS: + return zcrypt_ica_status(filp, arg); + case Z90STAT_TOTALCOUNT: + return put_user(zcrypt_device_count, (int __user *) arg); + case Z90STAT_PCICACOUNT: + return put_user(zcrypt_count_type(ZCRYPT_PCICA), + (int __user *) arg); + case Z90STAT_PCICCCOUNT: + return put_user(zcrypt_count_type(ZCRYPT_PCICC), + (int __user *) arg); + case Z90STAT_PCIXCCMCL2COUNT: + return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2), + (int __user *) arg); + case Z90STAT_PCIXCCMCL3COUNT: + return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL3), + (int __user *) arg); + case Z90STAT_PCIXCCCOUNT: + return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2) + + zcrypt_count_type(ZCRYPT_PCIXCC_MCL3), + (int __user *) arg); + case Z90STAT_CEX2CCOUNT: + return put_user(zcrypt_count_type(ZCRYPT_CEX2C), + (int __user *) arg); + case Z90STAT_CEX2ACOUNT: + return put_user(zcrypt_count_type(ZCRYPT_CEX2A), + (int __user *) arg); + default: + /* unknown ioctl number */ + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +/** + * ioctl32 conversion routines + */ +struct compat_ica_rsa_modexpo { + compat_uptr_t inputdata; + unsigned int inputdatalength; + compat_uptr_t outputdata; + unsigned int outputdatalength; + compat_uptr_t b_key; + compat_uptr_t n_modulus; +}; + +static long trans_modexpo32(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg); + struct compat_ica_rsa_modexpo mex32; + struct ica_rsa_modexpo mex64; + long rc; + + if (copy_from_user(&mex32, umex32, sizeof(mex32))) + return -EFAULT; + mex64.inputdata = compat_ptr(mex32.inputdata); + mex64.inputdatalength = mex32.inputdatalength; + mex64.outputdata = compat_ptr(mex32.outputdata); + mex64.outputdatalength = mex32.outputdatalength; + mex64.b_key = compat_ptr(mex32.b_key); + mex64.n_modulus = compat_ptr(mex32.n_modulus); + do { + rc = zcrypt_rsa_modexpo(&mex64); + } while (rc == -EAGAIN); + if (!rc) + rc = put_user(mex64.outputdatalength, + &umex32->outputdatalength); + return rc; +} + +struct compat_ica_rsa_modexpo_crt { + compat_uptr_t inputdata; + unsigned int inputdatalength; + compat_uptr_t outputdata; + unsigned int outputdatalength; + compat_uptr_t bp_key; + compat_uptr_t bq_key; + compat_uptr_t np_prime; + compat_uptr_t nq_prime; + compat_uptr_t u_mult_inv; +}; + +static long trans_modexpo_crt32(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg); + struct compat_ica_rsa_modexpo_crt crt32; + struct ica_rsa_modexpo_crt crt64; + long rc; + + if (copy_from_user(&crt32, ucrt32, sizeof(crt32))) + return -EFAULT; + crt64.inputdata = compat_ptr(crt32.inputdata); + crt64.inputdatalength = crt32.inputdatalength; + crt64.outputdata= compat_ptr(crt32.outputdata); + crt64.outputdatalength = crt32.outputdatalength; + crt64.bp_key = compat_ptr(crt32.bp_key); + crt64.bq_key = compat_ptr(crt32.bq_key); + crt64.np_prime = compat_ptr(crt32.np_prime); + crt64.nq_prime = compat_ptr(crt32.nq_prime); + crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv); + do { + rc = zcrypt_rsa_crt(&crt64); + } while (rc == -EAGAIN); + if (!rc) + rc = put_user(crt64.outputdatalength, + &ucrt32->outputdatalength); + return rc; +} + +long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (cmd == ICARSAMODEXPO) + return trans_modexpo32(filp, cmd, arg); + if (cmd == ICARSACRT) + return trans_modexpo_crt32(filp, cmd, arg); + return zcrypt_unlocked_ioctl(filp, cmd, arg); +} +#endif + +/** + * Misc device file operations. + */ +static struct file_operations zcrypt_fops = { + .owner = THIS_MODULE, + .read = zcrypt_read, + .write = zcrypt_write, + .unlocked_ioctl = zcrypt_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = zcrypt_compat_ioctl, +#endif + .open = zcrypt_open, + .release = zcrypt_release +}; + +/** + * Misc device. + */ +static struct miscdevice zcrypt_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "z90crypt", + .fops = &zcrypt_fops, +}; + +/** + * Deprecated /proc entry support. + */ +static struct proc_dir_entry *zcrypt_entry; + +static inline int sprintcl(unsigned char *outaddr, unsigned char *addr, + unsigned int len) +{ + int hl, i; + + hl = 0; + for (i = 0; i < len; i++) + hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]); + hl += sprintf(outaddr+hl, " "); + return hl; +} + +static inline int sprintrw(unsigned char *outaddr, unsigned char *addr, + unsigned int len) +{ + int hl, inl, c, cx; + + hl = sprintf(outaddr, " "); + inl = 0; + for (c = 0; c < (len / 16); c++) { + hl += sprintcl(outaddr+hl, addr+inl, 16); + inl += 16; + } + cx = len%16; + if (cx) { + hl += sprintcl(outaddr+hl, addr+inl, cx); + inl += cx; + } + hl += sprintf(outaddr+hl, "\n"); + return hl; +} + +static inline int sprinthx(unsigned char *title, unsigned char *outaddr, + unsigned char *addr, unsigned int len) +{ + int hl, inl, r, rx; + + hl = sprintf(outaddr, "\n%s\n", title); + inl = 0; + for (r = 0; r < (len / 64); r++) { + hl += sprintrw(outaddr+hl, addr+inl, 64); + inl += 64; + } + rx = len % 64; + if (rx) { + hl += sprintrw(outaddr+hl, addr+inl, rx); + inl += rx; + } + hl += sprintf(outaddr+hl, "\n"); + return hl; +} + +static inline int sprinthx4(unsigned char *title, unsigned char *outaddr, + unsigned int *array, unsigned int len) +{ + int hl, r; + + hl = sprintf(outaddr, "\n%s\n", title); + for (r = 0; r < len; r++) { + if ((r % 8) == 0) + hl += sprintf(outaddr+hl, " "); + hl += sprintf(outaddr+hl, "%08X ", array[r]); + if ((r % 8) == 7) + hl += sprintf(outaddr+hl, "\n"); + } + hl += sprintf(outaddr+hl, "\n"); + return hl; +} + +static int zcrypt_status_read(char *resp_buff, char **start, off_t offset, + int count, int *eof, void *data) +{ + unsigned char *workarea; + int len; + + len = 0; + + /* resp_buff is a page. Use the right half for a work area */ + workarea = resp_buff + 2000; + len += sprintf(resp_buff + len, "\nzcrypt version: %d.%d.%d\n", + ZCRYPT_VERSION, ZCRYPT_RELEASE, ZCRYPT_VARIANT); + len += sprintf(resp_buff + len, "Cryptographic domain: %d\n", + ap_domain_index); + len += sprintf(resp_buff + len, "Total device count: %d\n", + zcrypt_device_count); + len += sprintf(resp_buff + len, "PCICA count: %d\n", + zcrypt_count_type(ZCRYPT_PCICA)); + len += sprintf(resp_buff + len, "PCICC count: %d\n", + zcrypt_count_type(ZCRYPT_PCICC)); + len += sprintf(resp_buff + len, "PCIXCC MCL2 count: %d\n", + zcrypt_count_type(ZCRYPT_PCIXCC_MCL2)); + len += sprintf(resp_buff + len, "PCIXCC MCL3 count: %d\n", + zcrypt_count_type(ZCRYPT_PCIXCC_MCL3)); + len += sprintf(resp_buff + len, "CEX2C count: %d\n", + zcrypt_count_type(ZCRYPT_CEX2C)); + len += sprintf(resp_buff + len, "CEX2A count: %d\n", + zcrypt_count_type(ZCRYPT_CEX2A)); + len += sprintf(resp_buff + len, "requestq count: %d\n", + zcrypt_requestq_count()); + len += sprintf(resp_buff + len, "pendingq count: %d\n", + zcrypt_pendingq_count()); + len += sprintf(resp_buff + len, "Total open handles: %d\n\n", + atomic_read(&zcrypt_open_count)); + zcrypt_status_mask(workarea); + len += sprinthx("Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) " + "4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A", + resp_buff+len, workarea, AP_DEVICES); + zcrypt_qdepth_mask(workarea); + len += sprinthx("Waiting work element counts", + resp_buff+len, workarea, AP_DEVICES); + zcrypt_perdev_reqcnt((unsigned int *) workarea); + len += sprinthx4("Per-device successfully completed request counts", + resp_buff+len,(unsigned int *) workarea, AP_DEVICES); + *eof = 1; + memset((void *) workarea, 0x00, AP_DEVICES * sizeof(unsigned int)); + return len; +} + +static void zcrypt_disable_card(int index) +{ + struct zcrypt_device *zdev; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) + if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) { + zdev->online = 0; + ap_flush_queue(zdev->ap_dev); + break; + } + spin_unlock_bh(&zcrypt_device_lock); +} + +static void zcrypt_enable_card(int index) +{ + struct zcrypt_device *zdev; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) + if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) { + zdev->online = 1; + break; + } + spin_unlock_bh(&zcrypt_device_lock); +} + +static int zcrypt_status_write(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + unsigned char *lbuf, *ptr; + unsigned long local_count; + int j; + + if (count <= 0) + return 0; + +#define LBUFSIZE 1200UL + lbuf = kmalloc(LBUFSIZE, GFP_KERNEL); + if (!lbuf) { + PRINTK("kmalloc failed!\n"); + return 0; + } + + local_count = min(LBUFSIZE - 1, count); + if (copy_from_user(lbuf, buffer, local_count) != 0) { + kfree(lbuf); + return -EFAULT; + } + lbuf[local_count] = '\0'; + + ptr = strstr(lbuf, "Online devices"); + if (!ptr) { + PRINTK("Unable to parse data (missing \"Online devices\")\n"); + goto out; + } + ptr = strstr(ptr, "\n"); + if (!ptr) { + PRINTK("Unable to parse data (missing newline " + "after \"Online devices\")\n"); + goto out; + } + ptr++; + + if (strstr(ptr, "Waiting work element counts") == NULL) { + PRINTK("Unable to parse data (missing " + "\"Waiting work element counts\")\n"); + goto out; + } + + for (j = 0; j < 64 && *ptr; ptr++) { + /** + * '0' for no device, '1' for PCICA, '2' for PCICC, + * '3' for PCIXCC_MCL2, '4' for PCIXCC_MCL3, + * '5' for CEX2C and '6' for CEX2A' + */ + if (*ptr >= '0' && *ptr <= '6') + j++; + else if (*ptr == 'd' || *ptr == 'D') + zcrypt_disable_card(j++); + else if (*ptr == 'e' || *ptr == 'E') + zcrypt_enable_card(j++); + else if (*ptr != ' ' && *ptr != '\t') + break; + } +out: + kfree(lbuf); + return count; +} + +/** + * The module initialization code. + */ +int __init zcrypt_api_init(void) +{ + int rc; + + /* Register the request sprayer. */ + rc = misc_register(&zcrypt_misc_device); + if (rc < 0) { + PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n", + zcrypt_misc_device.minor, rc); + goto out; + } + + /* Set up the proc file system */ + zcrypt_entry = create_proc_entry("driver/z90crypt", 0644, NULL); + if (!zcrypt_entry) { + PRINTK("Couldn't create z90crypt proc entry\n"); + rc = -ENOMEM; + goto out_misc; + } + zcrypt_entry->nlink = 1; + zcrypt_entry->data = NULL; + zcrypt_entry->read_proc = zcrypt_status_read; + zcrypt_entry->write_proc = zcrypt_status_write; + + return 0; + +out_misc: + misc_deregister(&zcrypt_misc_device); +out: + return rc; +} + +/** + * The module termination code. + */ +void zcrypt_api_exit(void) +{ + remove_proc_entry("driver/z90crypt", NULL); + misc_deregister(&zcrypt_misc_device); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(zcrypt_api_init); +module_exit(zcrypt_api_exit); +#endif diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h new file mode 100644 index 000000000000..1f0e61f2e9b4 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_api.h @@ -0,0 +1,140 @@ +/* + * linux/drivers/s390/crypto/zcrypt_api.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * Cornelia Huck + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_API_H_ +#define _ZCRYPT_API_H_ + +/** + * Macro definitions + * + * PDEBUG debugs in the form "zcrypt: function_name -> message" + * + * PRINTK is like PDEBUG, except that it is always enabled + * PRINTKN is like PRINTK, except that it does not include the function name + * PRINTKW is like PRINTK, except that it uses KERN_WARNING + * PRINTKC is like PRINTK, except that it uses KERN_CRIT + */ +#define DEV_NAME "zcrypt" + +#define PRINTK(fmt, args...) \ + printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) +#define PRINTKN(fmt, args...) \ + printk(KERN_DEBUG DEV_NAME ": " fmt, ## args) +#define PRINTKW(fmt, args...) \ + printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) +#define PRINTKC(fmt, args...) \ + printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) + +#ifdef ZCRYPT_DEBUG +#define PDEBUG(fmt, args...) \ + printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args) +#else +#define PDEBUG(fmt, args...) do {} while (0) +#endif + +#include "ap_bus.h" +#include + +/* deprecated status calls */ +#define ICAZ90STATUS _IOR(ZCRYPT_IOCTL_MAGIC, 0x10, struct ica_z90_status) +#define Z90STAT_PCIXCCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x43, int) + +/** + * This structure is deprecated and the corresponding ioctl() has been + * replaced with individual ioctl()s for each piece of data! + */ +struct ica_z90_status { + int totalcount; + int leedslitecount; // PCICA + int leeds2count; // PCICC + // int PCIXCCCount; is not in struct for backward compatibility + int requestqWaitCount; + int pendingqWaitCount; + int totalOpenCount; + int cryptoDomain; + // status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3, + // 5=CEX2C + unsigned char status[64]; + // qdepth: # work elements waiting for each device + unsigned char qdepth[64]; +}; + +/** + * device type for an actual device is either PCICA, PCICC, PCIXCC_MCL2, + * PCIXCC_MCL3, CEX2C, or CEX2A + * + * NOTE: PCIXCC_MCL3 refers to a PCIXCC with May 2004 version of Licensed + * Internal Code (LIC) (EC J12220 level 29). + * PCIXCC_MCL2 refers to any LIC before this level. + */ +#define ZCRYPT_PCICA 1 +#define ZCRYPT_PCICC 2 +#define ZCRYPT_PCIXCC_MCL2 3 +#define ZCRYPT_PCIXCC_MCL3 4 +#define ZCRYPT_CEX2C 5 +#define ZCRYPT_CEX2A 6 + +struct zcrypt_device; + +struct zcrypt_ops { + long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *); + long (*rsa_modexpo_crt)(struct zcrypt_device *, + struct ica_rsa_modexpo_crt *); +}; + +struct zcrypt_device { + struct list_head list; /* Device list. */ + spinlock_t lock; /* Per device lock. */ + struct kref refcount; /* device refcounting */ + struct ap_device *ap_dev; /* The "real" ap device. */ + struct zcrypt_ops *ops; /* Crypto operations. */ + int online; /* User online/offline */ + + int user_space_type; /* User space device id. */ + char *type_string; /* User space device name. */ + int min_mod_size; /* Min number of bits. */ + int max_mod_size; /* Max number of bits. */ + int short_crt; /* Card has crt length restriction. */ + int speed_rating; /* Speed of the crypto device. */ + + int request_count; /* # current requests. */ + + struct ap_message reply; /* Per-device reply structure. */ +}; + +struct zcrypt_device *zcrypt_device_alloc(size_t); +void zcrypt_device_free(struct zcrypt_device *); +void zcrypt_device_get(struct zcrypt_device *); +int zcrypt_device_put(struct zcrypt_device *); +int zcrypt_device_register(struct zcrypt_device *); +void zcrypt_device_unregister(struct zcrypt_device *); +int zcrypt_api_init(void); +void zcrypt_api_exit(void); + +#endif /* _ZCRYPT_API_H_ */ diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h new file mode 100644 index 000000000000..0d6a3e2a3349 --- /dev/null +++ b/include/asm-s390/zcrypt.h @@ -0,0 +1,207 @@ +/* + * include/asm-s390/zcrypt.h + * + * zcrypt 2.0.0 (user-visible header) + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ASM_S390_ZCRYPT_H +#define __ASM_S390_ZCRYPT_H + +#define ZCRYPT_VERSION 2 +#define ZCRYPT_RELEASE 1 +#define ZCRYPT_VARIANT 0 + +#include +#include + +/** + * struct ica_rsa_modexpo + * + * Requirements: + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(b_key) = inputdatalength + * - length(n_modulus) = inputdatalength + */ +struct ica_rsa_modexpo { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * b_key; + char __user * n_modulus; +}; + +/** + * struct ica_rsa_modexpo_crt + * + * Requirements: + * - inputdatalength is even. + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(bp_key) = inputdatalength/2 + 8 + * - length(bq_key) = inputdatalength/2 + * - length(np_key) = inputdatalength/2 + 8 + * - length(nq_key) = inputdatalength/2 + * - length(u_mult_inv) = inputdatalength/2 + 8 + */ +struct ica_rsa_modexpo_crt { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * bp_key; + char __user * bq_key; + char __user * np_prime; + char __user * nq_prime; + char __user * u_mult_inv; +}; + +#define ZCRYPT_IOCTL_MAGIC 'z' + +/** + * Interface notes: + * + * The ioctl()s which are implemented (along with relevant details) + * are: + * + * ICARSAMODEXPO + * Perform an RSA operation using a Modulus-Exponent pair + * This takes an ica_rsa_modexpo struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * ICARSACRT + * Perform an RSA operation using a Chinese-Remainder Theorem key + * This takes an ica_rsa_modexpo_crt struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * Z90STAT_TOTALCOUNT + * Return an integer count of all device types together. + * + * Z90STAT_PCICACOUNT + * Return an integer count of all PCICAs. + * + * Z90STAT_PCICCCOUNT + * Return an integer count of all PCICCs. + * + * Z90STAT_PCIXCCMCL2COUNT + * Return an integer count of all MCL2 PCIXCCs. + * + * Z90STAT_PCIXCCMCL3COUNT + * Return an integer count of all MCL3 PCIXCCs. + * + * Z90STAT_CEX2CCOUNT + * Return an integer count of all CEX2Cs. + * + * Z90STAT_CEX2ACOUNT + * Return an integer count of all CEX2As. + * + * Z90STAT_REQUESTQ_COUNT + * Return an integer count of the number of entries waiting to be + * sent to a device. + * + * Z90STAT_PENDINGQ_COUNT + * Return an integer count of the number of entries sent to a + * device awaiting the reply. + * + * Z90STAT_TOTALOPEN_COUNT + * Return an integer count of the number of open file handles. + * + * Z90STAT_DOMAIN_INDEX + * Return the integer value of the Cryptographic Domain. + * + * Z90STAT_STATUS_MASK + * Return an 64 element array of unsigned chars for the status of + * all devices. + * 0x01: PCICA + * 0x02: PCICC + * 0x03: PCIXCC_MCL2 + * 0x04: PCIXCC_MCL3 + * 0x05: CEX2C + * 0x06: CEX2A + * 0x0d: device is disabled via the proc filesystem + * + * Z90STAT_QDEPTH_MASK + * Return an 64 element array of unsigned chars for the queue + * depth of all devices. + * + * Z90STAT_PERDEV_REQCNT + * Return an 64 element array of unsigned integers for the number + * of successfully completed requests per device since the device + * was detected and made available. + * + * ICAZ90STATUS (deprecated) + * Return some device driver status in a ica_z90_status struct + * This takes an ica_z90_status struct as its arg. + * + * NOTE: this ioctl() is deprecated, and has been replaced with + * single ioctl()s for each type of status being requested + * + * Z90STAT_PCIXCCCOUNT (deprecated) + * Return an integer count of all PCIXCCs (MCL2 + MCL3). + * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from + * MCL2 PCIXCCs. + * + * Z90QUIESCE (not recommended) + * Quiesce the driver. This is intended to stop all new + * requests from being processed. Its use is NOT recommended, + * except in circumstances where there is no other way to stop + * callers from accessing the driver. Its original use was to + * allow the driver to be "drained" of work in preparation for + * a system shutdown. + * + * NOTE: once issued, this ban on new work cannot be undone + * except by unloading and reloading the driver. + */ + +/** + * Supported ioctl calls + */ +#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) +#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) + +/* New status calls */ +#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) +#define Z90STAT_PCICACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int) +#define Z90STAT_PCICCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int) +#define Z90STAT_PCIXCCMCL2COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int) +#define Z90STAT_PCIXCCMCL3COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int) +#define Z90STAT_CEX2CCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int) +#define Z90STAT_CEX2ACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int) +#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int) +#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int) +#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int) +#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int) +#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64]) +#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64]) +#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64]) + +#endif /* __ASM_S390_ZCRYPT_H */ From 963ed931c3fd18082bfde0e8704a28955663abf4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:29 +0200 Subject: [PATCH 0224/1063] [S390] zcrypt CEX2A, CEX2C, PCICA accelerator card ap bus drivers. Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_cex2a.c | 435 +++++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_cex2a.h | 126 +++++++++ drivers/s390/crypto/zcrypt_error.h | 133 +++++++++ drivers/s390/crypto/zcrypt_pcica.c | 418 +++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_pcica.h | 117 ++++++++ 5 files changed, 1229 insertions(+) create mode 100644 drivers/s390/crypto/zcrypt_cex2a.c create mode 100644 drivers/s390/crypto/zcrypt_cex2a.h create mode 100644 drivers/s390/crypto/zcrypt_error.h create mode 100644 drivers/s390/crypto/zcrypt_pcica.c create mode 100644 drivers/s390/crypto/zcrypt_pcica.h diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c new file mode 100644 index 000000000000..350248e5cd93 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -0,0 +1,435 @@ +/* + * linux/drivers/s390/crypto/zcrypt_cex2a.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include "ap_bus.h" +#include "zcrypt_api.h" +#include "zcrypt_error.h" +#include "zcrypt_cex2a.h" + +#define CEX2A_MIN_MOD_SIZE 1 /* 8 bits */ +#define CEX2A_MAX_MOD_SIZE 256 /* 2048 bits */ + +#define CEX2A_SPEED_RATING 970 + +#define CEX2A_MAX_MESSAGE_SIZE 0x390 /* sizeof(struct type50_crb2_msg) */ +#define CEX2A_MAX_RESPONSE_SIZE 0x110 /* max outputdatalength + type80_hdr */ + +#define CEX2A_CLEANUP_TIME (15*HZ) + +static struct ap_device_id zcrypt_cex2a_ids[] = { + { AP_DEVICE(AP_DEVICE_TYPE_CEX2A) }, + { /* end of list */ }, +}; + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_ids); +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("CEX2A Cryptographic Coprocessor device driver, " + "Copyright 2001, 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); +#endif + +static int zcrypt_cex2a_probe(struct ap_device *ap_dev); +static void zcrypt_cex2a_remove(struct ap_device *ap_dev); +static void zcrypt_cex2a_receive(struct ap_device *, struct ap_message *, + struct ap_message *); + +static struct ap_driver zcrypt_cex2a_driver = { + .probe = zcrypt_cex2a_probe, + .remove = zcrypt_cex2a_remove, + .receive = zcrypt_cex2a_receive, + .ids = zcrypt_cex2a_ids, +}; + +/** + * Convert a ICAMEX message to a type50 MEX message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @mex: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo *mex) +{ + unsigned char *mod, *exp, *inp; + int mod_len; + + mod_len = mex->inputdatalength; + + if (mod_len <= 128) { + struct type50_meb1_msg *meb1 = ap_msg->message; + memset(meb1, 0, sizeof(*meb1)); + ap_msg->length = sizeof(*meb1); + meb1->header.msg_type_code = TYPE50_TYPE_CODE; + meb1->header.msg_len = sizeof(*meb1); + meb1->keyblock_type = TYPE50_MEB1_FMT; + mod = meb1->modulus + sizeof(meb1->modulus) - mod_len; + exp = meb1->exponent + sizeof(meb1->exponent) - mod_len; + inp = meb1->message + sizeof(meb1->message) - mod_len; + } else { + struct type50_meb2_msg *meb2 = ap_msg->message; + memset(meb2, 0, sizeof(*meb2)); + ap_msg->length = sizeof(*meb2); + meb2->header.msg_type_code = TYPE50_TYPE_CODE; + meb2->header.msg_len = sizeof(*meb2); + meb2->keyblock_type = TYPE50_MEB2_FMT; + mod = meb2->modulus + sizeof(meb2->modulus) - mod_len; + exp = meb2->exponent + sizeof(meb2->exponent) - mod_len; + inp = meb2->message + sizeof(meb2->message) - mod_len; + } + + if (copy_from_user(mod, mex->n_modulus, mod_len) || + copy_from_user(exp, mex->b_key, mod_len) || + copy_from_user(inp, mex->inputdata, mod_len)) + return -EFAULT; + return 0; +} + +/** + * Convert a ICACRT message to a type50 CRT message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @crt: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo_crt *crt) +{ + int mod_len, short_len, long_len, long_offset; + unsigned char *p, *q, *dp, *dq, *u, *inp; + + mod_len = crt->inputdatalength; + short_len = mod_len / 2; + long_len = mod_len / 2 + 8; + + /* + * CEX2A cannot handle p, dp, or U > 128 bytes. + * If we have one of these, we need to do extra checking. + */ + if (long_len > 128) { + /* + * zcrypt_rsa_crt already checked for the leading + * zeroes of np_prime, bp_key and u_mult_inc. + */ + long_offset = long_len - 128; + long_len = 128; + } else + long_offset = 0; + + /* + * Instead of doing extra work for p, dp, U > 64 bytes, we'll just use + * the larger message structure. + */ + if (long_len <= 64) { + struct type50_crb1_msg *crb1 = ap_msg->message; + memset(crb1, 0, sizeof(*crb1)); + ap_msg->length = sizeof(*crb1); + crb1->header.msg_type_code = TYPE50_TYPE_CODE; + crb1->header.msg_len = sizeof(*crb1); + crb1->keyblock_type = TYPE50_CRB1_FMT; + p = crb1->p + sizeof(crb1->p) - long_len; + q = crb1->q + sizeof(crb1->q) - short_len; + dp = crb1->dp + sizeof(crb1->dp) - long_len; + dq = crb1->dq + sizeof(crb1->dq) - short_len; + u = crb1->u + sizeof(crb1->u) - long_len; + inp = crb1->message + sizeof(crb1->message) - mod_len; + } else { + struct type50_crb2_msg *crb2 = ap_msg->message; + memset(crb2, 0, sizeof(*crb2)); + ap_msg->length = sizeof(*crb2); + crb2->header.msg_type_code = TYPE50_TYPE_CODE; + crb2->header.msg_len = sizeof(*crb2); + crb2->keyblock_type = TYPE50_CRB2_FMT; + p = crb2->p + sizeof(crb2->p) - long_len; + q = crb2->q + sizeof(crb2->q) - short_len; + dp = crb2->dp + sizeof(crb2->dp) - long_len; + dq = crb2->dq + sizeof(crb2->dq) - short_len; + u = crb2->u + sizeof(crb2->u) - long_len; + inp = crb2->message + sizeof(crb2->message) - mod_len; + } + + if (copy_from_user(p, crt->np_prime + long_offset, long_len) || + copy_from_user(q, crt->nq_prime, short_len) || + copy_from_user(dp, crt->bp_key + long_offset, long_len) || + copy_from_user(dq, crt->bq_key, short_len) || + copy_from_user(u, crt->u_mult_inv + long_offset, long_len) || + copy_from_user(inp, crt->inputdata, mod_len)) + return -EFAULT; + + + return 0; +} + +/** + * Copy results from a type 80 reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @data: pointer to user output data + * @length: size of user output data + * + * Returns 0 on success or -EFAULT. + */ +static int convert_type80(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + struct type80_hdr *t80h = reply->message; + unsigned char *data; + + if (t80h->len < sizeof(*t80h) + outputdatalength) { + /* The result is too short, the CEX2A card may not do that.. */ + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } + BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE); + data = reply->message + t80h->len - outputdatalength; + if (copy_to_user(outputdata, data, outputdatalength)) + return -EFAULT; + return 0; +} + +static int convert_response(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + /* Response type byte is the second byte in the response. */ + switch (((unsigned char *) reply->message)[1]) { + case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: + return convert_error(zdev, reply); + case TYPE80_RSP_CODE: + return convert_type80(zdev, reply, + outputdata, outputdatalength); + default: /* Unknown response type, this should NEVER EVER happen */ + PRINTK("Unrecognized Message Header: %08x%08x\n", + *(unsigned int *) reply->message, + *(unsigned int *) (reply->message+4)); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + +/** + * This function is called from the AP bus code after a crypto request + * "msg" has finished with the reply message "reply". + * It is called from tasklet context. + * @ap_dev: pointer to the AP device + * @msg: pointer to the AP message + * @reply: pointer to the AP reply message + */ +static void zcrypt_cex2a_receive(struct ap_device *ap_dev, + struct ap_message *msg, + struct ap_message *reply) +{ + static struct error_hdr error_reply = { + .type = TYPE82_RSP_CODE, + .reply_code = REP82_ERROR_MACHINE_FAILURE, + }; + struct type80_hdr *t80h = reply->message; + int length; + + /* Copy the reply message to the request message buffer. */ + if (IS_ERR(reply)) + memcpy(msg->message, &error_reply, sizeof(error_reply)); + else if (t80h->type == TYPE80_RSP_CODE) { + length = min(CEX2A_MAX_RESPONSE_SIZE, (int) t80h->len); + memcpy(msg->message, reply->message, length); + } else + memcpy(msg->message, reply->message, sizeof error_reply); + complete((struct completion *) msg->private); +} + +static atomic_t zcrypt_step = ATOMIC_INIT(0); + +/** + * The request distributor calls this function if it picked the CEX2A + * device to handle a modexpo request. + * @zdev: pointer to zcrypt_device structure that identifies the + * CEX2A device to the request distributor + * @mex: pointer to the modexpo request buffer + */ +static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev, + struct ica_rsa_modexpo *mex) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICAMEX_msg_to_type50MEX_msg(zdev, &ap_msg, mex); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, CEX2A_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, mex->outputdata, + mex->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + kfree(ap_msg.message); + return rc; +} + +/** + * The request distributor calls this function if it picked the CEX2A + * device to handle a modexpo_crt request. + * @zdev: pointer to zcrypt_device structure that identifies the + * CEX2A device to the request distributor + * @crt: pointer to the modexpoc_crt request buffer + */ +static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev, + struct ica_rsa_modexpo_crt *crt) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICACRT_msg_to_type50CRT_msg(zdev, &ap_msg, crt); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, CEX2A_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, crt->outputdata, + crt->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + kfree(ap_msg.message); + return rc; +} + +/** + * The crypto operations for a CEX2A card. + */ +static struct zcrypt_ops zcrypt_cex2a_ops = { + .rsa_modexpo = zcrypt_cex2a_modexpo, + .rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt, +}; + +/** + * Probe function for CEX2A cards. It always accepts the AP device + * since the bus_match already checked the hardware type. + * @ap_dev: pointer to the AP device. + */ +static int zcrypt_cex2a_probe(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev; + int rc; + + zdev = zcrypt_device_alloc(CEX2A_MAX_RESPONSE_SIZE); + if (!zdev) + return -ENOMEM; + zdev->ap_dev = ap_dev; + zdev->ops = &zcrypt_cex2a_ops; + zdev->online = 1; + zdev->user_space_type = ZCRYPT_CEX2A; + zdev->type_string = "CEX2A"; + zdev->min_mod_size = CEX2A_MIN_MOD_SIZE; + zdev->max_mod_size = CEX2A_MAX_MOD_SIZE; + zdev->short_crt = 1; + zdev->speed_rating = CEX2A_SPEED_RATING; + ap_dev->reply = &zdev->reply; + ap_dev->private = zdev; + rc = zcrypt_device_register(zdev); + if (rc) + goto out_free; + return 0; + +out_free: + ap_dev->private = NULL; + zcrypt_device_free(zdev); + return rc; +} + +/** + * This is called to remove the extended CEX2A driver information + * if an AP device is removed. + */ +static void zcrypt_cex2a_remove(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev = ap_dev->private; + + zcrypt_device_unregister(zdev); +} + +int __init zcrypt_cex2a_init(void) +{ + return ap_driver_register(&zcrypt_cex2a_driver, THIS_MODULE, "cex2a"); +} + +void __exit zcrypt_cex2a_exit(void) +{ + ap_driver_unregister(&zcrypt_cex2a_driver); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(zcrypt_cex2a_init); +module_exit(zcrypt_cex2a_exit); +#endif diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h new file mode 100644 index 000000000000..61a78c32dce4 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -0,0 +1,126 @@ +/* + * linux/drivers/s390/crypto/zcrypt_cex2a.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_CEX2A_H_ +#define _ZCRYPT_CEX2A_H_ + +/** + * The type 50 message family is associated with a CEX2A card. + * + * The four members of the family are described below. + * + * Note that all unsigned char arrays are right-justified and left-padded + * with zeroes. + * + * Note that all reserved fields must be zeroes. + */ +struct type50_hdr { + unsigned char reserved1; + unsigned char msg_type_code; /* 0x50 */ + unsigned short msg_len; + unsigned char reserved2; + unsigned char ignored; + unsigned short reserved3; +} __attribute__((packed)); + +#define TYPE50_TYPE_CODE 0x50 + +#define TYPE50_MEB1_FMT 0x0001 +#define TYPE50_MEB2_FMT 0x0002 +#define TYPE50_CRB1_FMT 0x0011 +#define TYPE50_CRB2_FMT 0x0012 + +/* Mod-Exp, with a small modulus */ +struct type50_meb1_msg { + struct type50_hdr header; + unsigned short keyblock_type; /* 0x0001 */ + unsigned char reserved[6]; + unsigned char exponent[128]; + unsigned char modulus[128]; + unsigned char message[128]; +} __attribute__((packed)); + +/* Mod-Exp, with a large modulus */ +struct type50_meb2_msg { + struct type50_hdr header; + unsigned short keyblock_type; /* 0x0002 */ + unsigned char reserved[6]; + unsigned char exponent[256]; + unsigned char modulus[256]; + unsigned char message[256]; +} __attribute__((packed)); + +/* CRT, with a small modulus */ +struct type50_crb1_msg { + struct type50_hdr header; + unsigned short keyblock_type; /* 0x0011 */ + unsigned char reserved[6]; + unsigned char p[64]; + unsigned char q[64]; + unsigned char dp[64]; + unsigned char dq[64]; + unsigned char u[64]; + unsigned char message[128]; +} __attribute__((packed)); + +/* CRT, with a large modulus */ +struct type50_crb2_msg { + struct type50_hdr header; + unsigned short keyblock_type; /* 0x0012 */ + unsigned char reserved[6]; + unsigned char p[128]; + unsigned char q[128]; + unsigned char dp[128]; + unsigned char dq[128]; + unsigned char u[128]; + unsigned char message[256]; +} __attribute__((packed)); + +/** + * The type 80 response family is associated with a CEX2A card. + * + * Note that all unsigned char arrays are right-justified and left-padded + * with zeroes. + * + * Note that all reserved fields must be zeroes. + */ + +#define TYPE80_RSP_CODE 0x80 + +struct type80_hdr { + unsigned char reserved1; + unsigned char type; /* 0x80 */ + unsigned short len; + unsigned char code; /* 0x00 */ + unsigned char reserved2[3]; + unsigned char reserved3[8]; +} __attribute__((packed)); + +int zcrypt_cex2a_init(void); +void zcrypt_cex2a_exit(void); + +#endif /* _ZCRYPT_CEX2A_H_ */ diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h new file mode 100644 index 000000000000..b22bd055a03b --- /dev/null +++ b/drivers/s390/crypto/zcrypt_error.h @@ -0,0 +1,133 @@ +/* + * linux/drivers/s390/crypto/zcrypt_error.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_ERROR_H_ +#define _ZCRYPT_ERROR_H_ + +#include "zcrypt_api.h" + +/** + * Reply Messages + * + * Error reply messages are of two types: + * 82: Error (see below) + * 88: Error (see below) + * Both type 82 and type 88 have the same structure in the header. + * + * Request reply messages are of three known types: + * 80: Reply from a Type 50 Request (see CEX2A-RELATED STRUCTS) + * 84: Reply from a Type 4 Request (see PCICA-RELATED STRUCTS) + * 86: Reply from a Type 6 Request (see PCICC/PCIXCC/CEX2C-RELATED STRUCTS) + * + */ +struct error_hdr { + unsigned char reserved1; /* 0x00 */ + unsigned char type; /* 0x82 or 0x88 */ + unsigned char reserved2[2]; /* 0x0000 */ + unsigned char reply_code; /* reply code */ + unsigned char reserved3[3]; /* 0x000000 */ +}; + +#define TYPE82_RSP_CODE 0x82 +#define TYPE88_RSP_CODE 0x88 + +#define REP82_ERROR_MACHINE_FAILURE 0x10 +#define REP82_ERROR_PREEMPT_FAILURE 0x12 +#define REP82_ERROR_CHECKPT_FAILURE 0x14 +#define REP82_ERROR_MESSAGE_TYPE 0x20 +#define REP82_ERROR_INVALID_COMM_CD 0x21 /* Type 84 */ +#define REP82_ERROR_INVALID_MSG_LEN 0x23 +#define REP82_ERROR_RESERVD_FIELD 0x24 /* was 0x50 */ +#define REP82_ERROR_FORMAT_FIELD 0x29 +#define REP82_ERROR_INVALID_COMMAND 0x30 +#define REP82_ERROR_MALFORMED_MSG 0x40 +#define REP82_ERROR_RESERVED_FIELDO 0x50 /* old value */ +#define REP82_ERROR_WORD_ALIGNMENT 0x60 +#define REP82_ERROR_MESSAGE_LENGTH 0x80 +#define REP82_ERROR_OPERAND_INVALID 0x82 +#define REP82_ERROR_OPERAND_SIZE 0x84 +#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85 +#define REP82_ERROR_RESERVED_FIELD 0x88 +#define REP82_ERROR_TRANSPORT_FAIL 0x90 +#define REP82_ERROR_PACKET_TRUNCATED 0xA0 +#define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 + +#define REP88_ERROR_MODULE_FAILURE 0x10 + +#define REP88_ERROR_MESSAGE_TYPE 0x20 +#define REP88_ERROR_MESSAGE_MALFORMD 0x22 +#define REP88_ERROR_MESSAGE_LENGTH 0x23 +#define REP88_ERROR_RESERVED_FIELD 0x24 +#define REP88_ERROR_KEY_TYPE 0x34 +#define REP88_ERROR_INVALID_KEY 0x82 /* CEX2A */ +#define REP88_ERROR_OPERAND 0x84 /* CEX2A */ +#define REP88_ERROR_OPERAND_EVEN_MOD 0x85 /* CEX2A */ + +static inline int convert_error(struct zcrypt_device *zdev, + struct ap_message *reply) +{ + struct error_hdr *ehdr = reply->message; + + PRINTK("Hardware error : Type %02x Message Header: %08x%08x\n", + ehdr->type, *(unsigned int *) reply->message, + *(unsigned int *) (reply->message + 4)); + + switch (ehdr->reply_code) { + case REP82_ERROR_OPERAND_INVALID: + case REP82_ERROR_OPERAND_SIZE: + case REP82_ERROR_EVEN_MOD_IN_OPND: + case REP88_ERROR_MESSAGE_MALFORMD: + // REP88_ERROR_INVALID_KEY // '82' CEX2A + // REP88_ERROR_OPERAND // '84' CEX2A + // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A + /* Invalid input data. */ + return -EINVAL; + case REP82_ERROR_MESSAGE_TYPE: + // REP88_ERROR_MESSAGE_TYPE // '20' CEX2A + /** + * To sent a message of the wrong type is a bug in the + * device driver. Warn about it, disable the device + * and then repeat the request. + */ + WARN_ON(1); + zdev->online = 0; + return -EAGAIN; + case REP82_ERROR_TRANSPORT_FAIL: + case REP82_ERROR_MACHINE_FAILURE: + // REP88_ERROR_MODULE_FAILURE // '10' CEX2A + /* If a card fails disable it and repeat the request. */ + zdev->online = 0; + return -EAGAIN; + default: + PRINTKW("unknown type %02x reply code = %d\n", + ehdr->type, ehdr->reply_code); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + +#endif /* _ZCRYPT_ERROR_H_ */ diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c new file mode 100644 index 000000000000..0ff56e86caae --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcica.c @@ -0,0 +1,418 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcica.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include "ap_bus.h" +#include "zcrypt_api.h" +#include "zcrypt_error.h" +#include "zcrypt_pcica.h" + +#define PCICA_MIN_MOD_SIZE 1 /* 8 bits */ +#define PCICA_MAX_MOD_SIZE 256 /* 2048 bits */ + +#define PCICA_SPEED_RATING 2800 + +#define PCICA_MAX_MESSAGE_SIZE 0x3a0 /* sizeof(struct type4_lcr) */ +#define PCICA_MAX_RESPONSE_SIZE 0x110 /* max outputdatalength + type80_hdr */ + +#define PCICA_CLEANUP_TIME (15*HZ) + +static struct ap_device_id zcrypt_pcica_ids[] = { + { AP_DEVICE(AP_DEVICE_TYPE_PCICA) }, + { /* end of list */ }, +}; + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +MODULE_DEVICE_TABLE(ap, zcrypt_pcica_ids); +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("PCICA Cryptographic Coprocessor device driver, " + "Copyright 2001, 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); +#endif + +static int zcrypt_pcica_probe(struct ap_device *ap_dev); +static void zcrypt_pcica_remove(struct ap_device *ap_dev); +static void zcrypt_pcica_receive(struct ap_device *, struct ap_message *, + struct ap_message *); + +static struct ap_driver zcrypt_pcica_driver = { + .probe = zcrypt_pcica_probe, + .remove = zcrypt_pcica_remove, + .receive = zcrypt_pcica_receive, + .ids = zcrypt_pcica_ids, +}; + +/** + * Convert a ICAMEX message to a type4 MEX message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @mex: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICAMEX_msg_to_type4MEX_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo *mex) +{ + unsigned char *modulus, *exponent, *message; + int mod_len; + + mod_len = mex->inputdatalength; + + if (mod_len <= 128) { + struct type4_sme *sme = ap_msg->message; + memset(sme, 0, sizeof(*sme)); + ap_msg->length = sizeof(*sme); + sme->header.msg_fmt = TYPE4_SME_FMT; + sme->header.msg_len = sizeof(*sme); + sme->header.msg_type_code = TYPE4_TYPE_CODE; + sme->header.request_code = TYPE4_REQU_CODE; + modulus = sme->modulus + sizeof(sme->modulus) - mod_len; + exponent = sme->exponent + sizeof(sme->exponent) - mod_len; + message = sme->message + sizeof(sme->message) - mod_len; + } else { + struct type4_lme *lme = ap_msg->message; + memset(lme, 0, sizeof(*lme)); + ap_msg->length = sizeof(*lme); + lme->header.msg_fmt = TYPE4_LME_FMT; + lme->header.msg_len = sizeof(*lme); + lme->header.msg_type_code = TYPE4_TYPE_CODE; + lme->header.request_code = TYPE4_REQU_CODE; + modulus = lme->modulus + sizeof(lme->modulus) - mod_len; + exponent = lme->exponent + sizeof(lme->exponent) - mod_len; + message = lme->message + sizeof(lme->message) - mod_len; + } + + if (copy_from_user(modulus, mex->n_modulus, mod_len) || + copy_from_user(exponent, mex->b_key, mod_len) || + copy_from_user(message, mex->inputdata, mod_len)) + return -EFAULT; + return 0; +} + +/** + * Convert a ICACRT message to a type4 CRT message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @crt: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICACRT_msg_to_type4CRT_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo_crt *crt) +{ + unsigned char *p, *q, *dp, *dq, *u, *inp; + int mod_len, short_len, long_len; + + mod_len = crt->inputdatalength; + short_len = mod_len / 2; + long_len = mod_len / 2 + 8; + + if (mod_len <= 128) { + struct type4_scr *scr = ap_msg->message; + memset(scr, 0, sizeof(*scr)); + ap_msg->length = sizeof(*scr); + scr->header.msg_type_code = TYPE4_TYPE_CODE; + scr->header.request_code = TYPE4_REQU_CODE; + scr->header.msg_fmt = TYPE4_SCR_FMT; + scr->header.msg_len = sizeof(*scr); + p = scr->p + sizeof(scr->p) - long_len; + q = scr->q + sizeof(scr->q) - short_len; + dp = scr->dp + sizeof(scr->dp) - long_len; + dq = scr->dq + sizeof(scr->dq) - short_len; + u = scr->u + sizeof(scr->u) - long_len; + inp = scr->message + sizeof(scr->message) - mod_len; + } else { + struct type4_lcr *lcr = ap_msg->message; + memset(lcr, 0, sizeof(*lcr)); + ap_msg->length = sizeof(*lcr); + lcr->header.msg_type_code = TYPE4_TYPE_CODE; + lcr->header.request_code = TYPE4_REQU_CODE; + lcr->header.msg_fmt = TYPE4_LCR_FMT; + lcr->header.msg_len = sizeof(*lcr); + p = lcr->p + sizeof(lcr->p) - long_len; + q = lcr->q + sizeof(lcr->q) - short_len; + dp = lcr->dp + sizeof(lcr->dp) - long_len; + dq = lcr->dq + sizeof(lcr->dq) - short_len; + u = lcr->u + sizeof(lcr->u) - long_len; + inp = lcr->message + sizeof(lcr->message) - mod_len; + } + + if (copy_from_user(p, crt->np_prime, long_len) || + copy_from_user(q, crt->nq_prime, short_len) || + copy_from_user(dp, crt->bp_key, long_len) || + copy_from_user(dq, crt->bq_key, short_len) || + copy_from_user(u, crt->u_mult_inv, long_len) || + copy_from_user(inp, crt->inputdata, mod_len)) + return -EFAULT; + return 0; +} + +/** + * Copy results from a type 84 reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @data: pointer to user output data + * @length: size of user output data + * + * Returns 0 on success or -EFAULT. + */ +static inline int convert_type84(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + struct type84_hdr *t84h = reply->message; + char *data; + + if (t84h->len < sizeof(*t84h) + outputdatalength) { + /* The result is too short, the PCICA card may not do that.. */ + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } + BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE); + data = reply->message + t84h->len - outputdatalength; + if (copy_to_user(outputdata, data, outputdatalength)) + return -EFAULT; + return 0; +} + +static int convert_response(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + /* Response type byte is the second byte in the response. */ + switch (((unsigned char *) reply->message)[1]) { + case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: + return convert_error(zdev, reply); + case TYPE84_RSP_CODE: + return convert_type84(zdev, reply, + outputdata, outputdatalength); + default: /* Unknown response type, this should NEVER EVER happen */ + PRINTK("Unrecognized Message Header: %08x%08x\n", + *(unsigned int *) reply->message, + *(unsigned int *) (reply->message+4)); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + +/** + * This function is called from the AP bus code after a crypto request + * "msg" has finished with the reply message "reply". + * It is called from tasklet context. + * @ap_dev: pointer to the AP device + * @msg: pointer to the AP message + * @reply: pointer to the AP reply message + */ +static void zcrypt_pcica_receive(struct ap_device *ap_dev, + struct ap_message *msg, + struct ap_message *reply) +{ + static struct error_hdr error_reply = { + .type = TYPE82_RSP_CODE, + .reply_code = REP82_ERROR_MACHINE_FAILURE, + }; + struct type84_hdr *t84h = reply->message; + int length; + + /* Copy the reply message to the request message buffer. */ + if (IS_ERR(reply)) + memcpy(msg->message, &error_reply, sizeof(error_reply)); + else if (t84h->code == TYPE84_RSP_CODE) { + length = min(PCICA_MAX_RESPONSE_SIZE, (int) t84h->len); + memcpy(msg->message, reply->message, length); + } else + memcpy(msg->message, reply->message, sizeof error_reply); + complete((struct completion *) msg->private); +} + +static atomic_t zcrypt_step = ATOMIC_INIT(0); + +/** + * The request distributor calls this function if it picked the PCICA + * device to handle a modexpo request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCICA device to the request distributor + * @mex: pointer to the modexpo request buffer + */ +static long zcrypt_pcica_modexpo(struct zcrypt_device *zdev, + struct ica_rsa_modexpo *mex) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICAMEX_msg_to_type4MEX_msg(zdev, &ap_msg, mex); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCICA_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, mex->outputdata, + mex->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + kfree(ap_msg.message); + return rc; +} + +/** + * The request distributor calls this function if it picked the PCICA + * device to handle a modexpo_crt request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCICA device to the request distributor + * @crt: pointer to the modexpoc_crt request buffer + */ +static long zcrypt_pcica_modexpo_crt(struct zcrypt_device *zdev, + struct ica_rsa_modexpo_crt *crt) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICACRT_msg_to_type4CRT_msg(zdev, &ap_msg, crt); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCICA_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, crt->outputdata, + crt->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + kfree(ap_msg.message); + return rc; +} + +/** + * The crypto operations for a PCICA card. + */ +static struct zcrypt_ops zcrypt_pcica_ops = { + .rsa_modexpo = zcrypt_pcica_modexpo, + .rsa_modexpo_crt = zcrypt_pcica_modexpo_crt, +}; + +/** + * Probe function for PCICA cards. It always accepts the AP device + * since the bus_match already checked the hardware type. + * @ap_dev: pointer to the AP device. + */ +static int zcrypt_pcica_probe(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev; + int rc; + + zdev = zcrypt_device_alloc(PCICA_MAX_RESPONSE_SIZE); + if (!zdev) + return -ENOMEM; + zdev->ap_dev = ap_dev; + zdev->ops = &zcrypt_pcica_ops; + zdev->online = 1; + zdev->user_space_type = ZCRYPT_PCICA; + zdev->type_string = "PCICA"; + zdev->min_mod_size = PCICA_MIN_MOD_SIZE; + zdev->max_mod_size = PCICA_MAX_MOD_SIZE; + zdev->speed_rating = PCICA_SPEED_RATING; + ap_dev->reply = &zdev->reply; + ap_dev->private = zdev; + rc = zcrypt_device_register(zdev); + if (rc) + goto out_free; + return 0; + +out_free: + ap_dev->private = NULL; + zcrypt_device_free(zdev); + return rc; +} + +/** + * This is called to remove the extended PCICA driver information + * if an AP device is removed. + */ +static void zcrypt_pcica_remove(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev = ap_dev->private; + + zcrypt_device_unregister(zdev); +} + +int __init zcrypt_pcica_init(void) +{ + return ap_driver_register(&zcrypt_pcica_driver, THIS_MODULE, "pcica"); +} + +void zcrypt_pcica_exit(void) +{ + ap_driver_unregister(&zcrypt_pcica_driver); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(zcrypt_pcica_init); +module_exit(zcrypt_pcica_exit); +#endif diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h new file mode 100644 index 000000000000..a08a4f8c33c9 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcica.h @@ -0,0 +1,117 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcica.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_PCICA_H_ +#define _ZCRYPT_PCICA_H_ + +/** + * The type 4 message family is associated with a PCICA card. + * + * The four members of the family are described below. + * + * Note that all unsigned char arrays are right-justified and left-padded + * with zeroes. + * + * Note that all reserved fields must be zeroes. + */ +struct type4_hdr { + unsigned char reserved1; + unsigned char msg_type_code; /* 0x04 */ + unsigned short msg_len; + unsigned char request_code; /* 0x40 */ + unsigned char msg_fmt; + unsigned short reserved2; +} __attribute__((packed)); + +#define TYPE4_TYPE_CODE 0x04 +#define TYPE4_REQU_CODE 0x40 + +#define TYPE4_SME_FMT 0x00 +#define TYPE4_LME_FMT 0x10 +#define TYPE4_SCR_FMT 0x40 +#define TYPE4_LCR_FMT 0x50 + +/* Mod-Exp, with a small modulus */ +struct type4_sme { + struct type4_hdr header; + unsigned char message[128]; + unsigned char exponent[128]; + unsigned char modulus[128]; +} __attribute__((packed)); + +/* Mod-Exp, with a large modulus */ +struct type4_lme { + struct type4_hdr header; + unsigned char message[256]; + unsigned char exponent[256]; + unsigned char modulus[256]; +} __attribute__((packed)); + +/* CRT, with a small modulus */ +struct type4_scr { + struct type4_hdr header; + unsigned char message[128]; + unsigned char dp[72]; + unsigned char dq[64]; + unsigned char p[72]; + unsigned char q[64]; + unsigned char u[72]; +} __attribute__((packed)); + +/* CRT, with a large modulus */ +struct type4_lcr { + struct type4_hdr header; + unsigned char message[256]; + unsigned char dp[136]; + unsigned char dq[128]; + unsigned char p[136]; + unsigned char q[128]; + unsigned char u[136]; +} __attribute__((packed)); + +/** + * The type 84 response family is associated with a PCICA card. + * + * Note that all unsigned char arrays are right-justified and left-padded + * with zeroes. + * + * Note that all reserved fields must be zeroes. + */ + +struct type84_hdr { + unsigned char reserved1; + unsigned char code; + unsigned short len; + unsigned char reserved2[4]; +} __attribute__((packed)); + +#define TYPE84_RSP_CODE 0x84 + +int zcrypt_pcica_init(void); +void zcrypt_pcica_exit(void); + +#endif /* _ZCRYPT_PCICA_H_ */ From 6684af1a07a1f88f3970bc90e5aed173d39168db Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:32 +0200 Subject: [PATCH 0225/1063] [S390] zcrypt PCICC, PCIXCC coprocessor card ap bus drivers. Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_cca_key.h | 350 +++++++++++++ drivers/s390/crypto/zcrypt_pcicc.c | 630 +++++++++++++++++++++++ drivers/s390/crypto/zcrypt_pcicc.h | 176 +++++++ drivers/s390/crypto/zcrypt_pcixcc.c | 714 +++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_pcixcc.h | 79 +++ 5 files changed, 1949 insertions(+) create mode 100644 drivers/s390/crypto/zcrypt_cca_key.h create mode 100644 drivers/s390/crypto/zcrypt_pcicc.c create mode 100644 drivers/s390/crypto/zcrypt_pcicc.h create mode 100644 drivers/s390/crypto/zcrypt_pcixcc.c create mode 100644 drivers/s390/crypto/zcrypt_pcixcc.h diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h new file mode 100644 index 000000000000..c80f40d44197 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -0,0 +1,350 @@ +/* + * linux/drivers/s390/crypto/zcrypt_cca_key.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_CCA_KEY_H_ +#define _ZCRYPT_CCA_KEY_H_ + +struct T6_keyBlock_hdr { + unsigned short blen; + unsigned short ulen; + unsigned short flags; +}; + +/** + * mapping for the cca private ME key token. + * Three parts of interest here: the header, the private section and + * the public section. + * + * mapping for the cca key token header + */ +struct cca_token_hdr { + unsigned char token_identifier; + unsigned char version; + unsigned short token_length; + unsigned char reserved[4]; +} __attribute__((packed)); + +#define CCA_TKN_HDR_ID_EXT 0x1E + +/** + * mapping for the cca private ME section + */ +struct cca_private_ext_ME_sec { + unsigned char section_identifier; + unsigned char version; + unsigned short section_length; + unsigned char private_key_hash[20]; + unsigned char reserved1[4]; + unsigned char key_format; + unsigned char reserved2; + unsigned char key_name_hash[20]; + unsigned char key_use_flags[4]; + unsigned char reserved3[6]; + unsigned char reserved4[24]; + unsigned char confounder[24]; + unsigned char exponent[128]; + unsigned char modulus[128]; +} __attribute__((packed)); + +#define CCA_PVT_USAGE_ALL 0x80 + +/** + * mapping for the cca public section + * In a private key, the modulus doesn't appear in the public + * section. So, an arbitrary public exponent of 0x010001 will be + * used, for a section length of 0x0F always. + */ +struct cca_public_sec { + unsigned char section_identifier; + unsigned char version; + unsigned short section_length; + unsigned char reserved[2]; + unsigned short exponent_len; + unsigned short modulus_bit_len; + unsigned short modulus_byte_len; /* In a private key, this is 0 */ +} __attribute__((packed)); + +/** + * mapping for the cca private CRT key 'token' + * The first three parts (the only parts considered in this release) + * are: the header, the private section and the public section. + * The header and public section are the same as for the + * struct cca_private_ext_ME + * + * Following the structure are the quantities p, q, dp, dq, u, pad, + * and modulus, in that order, where pad_len is the modulo 8 + * complement of the residue modulo 8 of the sum of + * (p_len + q_len + dp_len + dq_len + u_len). + */ +struct cca_pvt_ext_CRT_sec { + unsigned char section_identifier; + unsigned char version; + unsigned short section_length; + unsigned char private_key_hash[20]; + unsigned char reserved1[4]; + unsigned char key_format; + unsigned char reserved2; + unsigned char key_name_hash[20]; + unsigned char key_use_flags[4]; + unsigned short p_len; + unsigned short q_len; + unsigned short dp_len; + unsigned short dq_len; + unsigned short u_len; + unsigned short mod_len; + unsigned char reserved3[4]; + unsigned short pad_len; + unsigned char reserved4[52]; + unsigned char confounder[8]; +} __attribute__((packed)); + +#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08 +#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40 + +/** + * Set up private key fields of a type6 MEX message. + * Note that all numerics in the key token are big-endian, + * while the entries in the key block header are little-endian. + * + * @mex: pointer to user input data + * @p: pointer to memory area for the key + * + * Returns the size of the key area or -EFAULT + */ +static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex, + void *p, int big_endian) +{ + static struct cca_token_hdr static_pvt_me_hdr = { + .token_identifier = 0x1E, + .token_length = 0x0183, + }; + static struct cca_private_ext_ME_sec static_pvt_me_sec = { + .section_identifier = 0x02, + .section_length = 0x016C, + .key_use_flags = {0x80,0x00,0x00,0x00}, + }; + static struct cca_public_sec static_pub_me_sec = { + .section_identifier = 0x04, + .section_length = 0x000F, + .exponent_len = 0x0003, + }; + static char pk_exponent[3] = { 0x01, 0x00, 0x01 }; + struct { + struct T6_keyBlock_hdr t6_hdr; + struct cca_token_hdr pvtMeHdr; + struct cca_private_ext_ME_sec pvtMeSec; + struct cca_public_sec pubMeSec; + char exponent[3]; + } __attribute__((packed)) *key = p; + unsigned char *temp; + + memset(key, 0, sizeof(*key)); + + if (big_endian) { + key->t6_hdr.blen = cpu_to_be16(0x189); + key->t6_hdr.ulen = cpu_to_be16(0x189 - 2); + } else { + key->t6_hdr.blen = cpu_to_le16(0x189); + key->t6_hdr.ulen = cpu_to_le16(0x189 - 2); + } + key->pvtMeHdr = static_pvt_me_hdr; + key->pvtMeSec = static_pvt_me_sec; + key->pubMeSec = static_pub_me_sec; + /** + * In a private key, the modulus doesn't appear in the public + * section. So, an arbitrary public exponent of 0x010001 will be + * used. + */ + memcpy(key->exponent, pk_exponent, 3); + + /* key parameter block */ + temp = key->pvtMeSec.exponent + + sizeof(key->pvtMeSec.exponent) - mex->inputdatalength; + if (copy_from_user(temp, mex->b_key, mex->inputdatalength)) + return -EFAULT; + + /* modulus */ + temp = key->pvtMeSec.modulus + + sizeof(key->pvtMeSec.modulus) - mex->inputdatalength; + if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength)) + return -EFAULT; + key->pubMeSec.modulus_bit_len = 8 * mex->inputdatalength; + return sizeof(*key); +} + +/** + * Set up private key fields of a type6 MEX message. The _pad variant + * strips leading zeroes from the b_key. + * Note that all numerics in the key token are big-endian, + * while the entries in the key block header are little-endian. + * + * @mex: pointer to user input data + * @p: pointer to memory area for the key + * + * Returns the size of the key area or -EFAULT + */ +static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, + void *p, int big_endian) +{ + static struct cca_token_hdr static_pub_hdr = { + .token_identifier = 0x1E, + }; + static struct cca_public_sec static_pub_sec = { + .section_identifier = 0x04, + }; + struct { + struct T6_keyBlock_hdr t6_hdr; + struct cca_token_hdr pubHdr; + struct cca_public_sec pubSec; + char exponent[0]; + } __attribute__((packed)) *key = p; + unsigned char *temp; + int i; + + memset(key, 0, sizeof(*key)); + + key->pubHdr = static_pub_hdr; + key->pubSec = static_pub_sec; + + /* key parameter block */ + temp = key->exponent; + if (copy_from_user(temp, mex->b_key, mex->inputdatalength)) + return -EFAULT; + /* Strip leading zeroes from b_key. */ + for (i = 0; i < mex->inputdatalength; i++) + if (temp[i]) + break; + if (i >= mex->inputdatalength) + return -EINVAL; + memmove(temp, temp + i, mex->inputdatalength - i); + temp += mex->inputdatalength - i; + /* modulus */ + if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength)) + return -EFAULT; + + key->pubSec.modulus_bit_len = 8 * mex->inputdatalength; + key->pubSec.modulus_byte_len = mex->inputdatalength; + key->pubSec.exponent_len = mex->inputdatalength - i; + key->pubSec.section_length = sizeof(key->pubSec) + + 2*mex->inputdatalength - i; + key->pubHdr.token_length = + key->pubSec.section_length + sizeof(key->pubHdr); + if (big_endian) { + key->t6_hdr.ulen = cpu_to_be16(key->pubHdr.token_length + 4); + key->t6_hdr.blen = cpu_to_be16(key->pubHdr.token_length + 6); + } else { + key->t6_hdr.ulen = cpu_to_le16(key->pubHdr.token_length + 4); + key->t6_hdr.blen = cpu_to_le16(key->pubHdr.token_length + 6); + } + return sizeof(*key) + 2*mex->inputdatalength - i; +} + +/** + * Set up private key fields of a type6 CRT message. + * Note that all numerics in the key token are big-endian, + * while the entries in the key block header are little-endian. + * + * @mex: pointer to user input data + * @p: pointer to memory area for the key + * + * Returns the size of the key area or -EFAULT + */ +static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, + void *p, int big_endian) +{ + static struct cca_public_sec static_cca_pub_sec = { + .section_identifier = 4, + .section_length = 0x000f, + .exponent_len = 0x0003, + }; + static char pk_exponent[3] = { 0x01, 0x00, 0x01 }; + struct { + struct T6_keyBlock_hdr t6_hdr; + struct cca_token_hdr token; + struct cca_pvt_ext_CRT_sec pvt; + char key_parts[0]; + } __attribute__((packed)) *key = p; + struct cca_public_sec *pub; + int short_len, long_len, pad_len, key_len, size; + + memset(key, 0, sizeof(*key)); + + short_len = crt->inputdatalength / 2; + long_len = short_len + 8; + pad_len = -(3*long_len + 2*short_len) & 7; + key_len = 3*long_len + 2*short_len + pad_len + crt->inputdatalength; + size = sizeof(*key) + key_len + sizeof(*pub) + 3; + + /* parameter block.key block */ + if (big_endian) { + key->t6_hdr.blen = cpu_to_be16(size); + key->t6_hdr.ulen = cpu_to_be16(size - 2); + } else { + key->t6_hdr.blen = cpu_to_le16(size); + key->t6_hdr.ulen = cpu_to_le16(size - 2); + } + + /* key token header */ + key->token.token_identifier = CCA_TKN_HDR_ID_EXT; + key->token.token_length = size - 6; + + /* private section */ + key->pvt.section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT; + key->pvt.section_length = sizeof(key->pvt) + key_len; + key->pvt.key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL; + key->pvt.key_use_flags[0] = CCA_PVT_USAGE_ALL; + key->pvt.p_len = key->pvt.dp_len = key->pvt.u_len = long_len; + key->pvt.q_len = key->pvt.dq_len = short_len; + key->pvt.mod_len = crt->inputdatalength; + key->pvt.pad_len = pad_len; + + /* key parts */ + if (copy_from_user(key->key_parts, crt->np_prime, long_len) || + copy_from_user(key->key_parts + long_len, + crt->nq_prime, short_len) || + copy_from_user(key->key_parts + long_len + short_len, + crt->bp_key, long_len) || + copy_from_user(key->key_parts + 2*long_len + short_len, + crt->bq_key, short_len) || + copy_from_user(key->key_parts + 2*long_len + 2*short_len, + crt->u_mult_inv, long_len)) + return -EFAULT; + memset(key->key_parts + 3*long_len + 2*short_len + pad_len, + 0xff, crt->inputdatalength); + pub = (struct cca_public_sec *)(key->key_parts + key_len); + *pub = static_cca_pub_sec; + pub->modulus_bit_len = 8 * crt->inputdatalength; + /** + * In a private key, the modulus doesn't appear in the public + * section. So, an arbitrary public exponent of 0x010001 will be + * used. + */ + memcpy((char *) (pub + 1), pk_exponent, 3); + return size; +} + +#endif /* _ZCRYPT_CCA_KEY_H_ */ diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c new file mode 100644 index 000000000000..900362983fec --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcicc.c @@ -0,0 +1,630 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcicc.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include "ap_bus.h" +#include "zcrypt_api.h" +#include "zcrypt_error.h" +#include "zcrypt_pcicc.h" +#include "zcrypt_cca_key.h" + +#define PCICC_MIN_MOD_SIZE 64 /* 512 bits */ +#define PCICC_MAX_MOD_SIZE_OLD 128 /* 1024 bits */ +#define PCICC_MAX_MOD_SIZE 256 /* 2048 bits */ + +/** + * PCICC cards need a speed rating of 0. This keeps them at the end of + * the zcrypt device list (see zcrypt_api.c). PCICC cards are only + * used if no other cards are present because they are slow and can only + * cope with PKCS12 padded requests. The logic is queer. PKCS11 padded + * requests are rejected. The modexpo function encrypts PKCS12 padded data + * and decrypts any non-PKCS12 padded data (except PKCS11) in the assumption + * that it's encrypted PKCS12 data. The modexpo_crt function always decrypts + * the data in the assumption that its PKCS12 encrypted data. + */ +#define PCICC_SPEED_RATING 0 + +#define PCICC_MAX_MESSAGE_SIZE 0x710 /* max size type6 v1 crt message */ +#define PCICC_MAX_RESPONSE_SIZE 0x710 /* max size type86 v1 reply */ + +#define PCICC_CLEANUP_TIME (15*HZ) + +static struct ap_device_id zcrypt_pcicc_ids[] = { + { AP_DEVICE(AP_DEVICE_TYPE_PCICC) }, + { /* end of list */ }, +}; + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +MODULE_DEVICE_TABLE(ap, zcrypt_pcicc_ids); +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("PCICC Cryptographic Coprocessor device driver, " + "Copyright 2001, 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); +#endif + +static int zcrypt_pcicc_probe(struct ap_device *ap_dev); +static void zcrypt_pcicc_remove(struct ap_device *ap_dev); +static void zcrypt_pcicc_receive(struct ap_device *, struct ap_message *, + struct ap_message *); + +static struct ap_driver zcrypt_pcicc_driver = { + .probe = zcrypt_pcicc_probe, + .remove = zcrypt_pcicc_remove, + .receive = zcrypt_pcicc_receive, + .ids = zcrypt_pcicc_ids, +}; + +/** + * The following is used to initialize the CPRB passed to the PCICC card + * in a type6 message. The 3 fields that must be filled in at execution + * time are req_parml, rpl_parml and usage_domain. Note that all three + * fields are *little*-endian. Actually, everything about this interface + * is ascii/little-endian, since the device has 'Intel inside'. + * + * The CPRB is followed immediately by the parm block. + * The parm block contains: + * - function code ('PD' 0x5044 or 'PK' 0x504B) + * - rule block (0x0A00 'PKCS-1.2' or 0x0A00 'ZERO-PAD') + * - VUD block + */ +static struct CPRB static_cprb = { + .cprb_len = __constant_cpu_to_le16(0x0070), + .cprb_ver_id = 0x41, + .func_id = {0x54,0x32}, + .checkpoint_flag= 0x01, + .svr_namel = __constant_cpu_to_le16(0x0008), + .svr_name = {'I','C','S','F',' ',' ',' ',' '} +}; + +/** + * Check the message for PKCS11 padding. + */ +static inline int is_PKCS11_padded(unsigned char *buffer, int length) +{ + int i; + if ((buffer[0] != 0x00) || (buffer[1] != 0x01)) + return 0; + for (i = 2; i < length; i++) + if (buffer[i] != 0xFF) + break; + if (i < 10 || i == length) + return 0; + if (buffer[i] != 0x00) + return 0; + return 1; +} + +/** + * Check the message for PKCS12 padding. + */ +static inline int is_PKCS12_padded(unsigned char *buffer, int length) +{ + int i; + if ((buffer[0] != 0x00) || (buffer[1] != 0x02)) + return 0; + for (i = 2; i < length; i++) + if (buffer[i] == 0x00) + break; + if ((i < 10) || (i == length)) + return 0; + if (buffer[i] != 0x00) + return 0; + return 1; +} + +/** + * Convert a ICAMEX message to a type6 MEX message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @mex: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICAMEX_msg_to_type6MEX_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo *mex) +{ + static struct type6_hdr static_type6_hdr = { + .type = 0x06, + .offset1 = 0x00000058, + .agent_id = {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50, + 0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01}, + .function_code = {'P','K'}, + }; + static struct function_and_rules_block static_pke_function_and_rules ={ + .function_code = {'P','K'}, + .ulen = __constant_cpu_to_le16(10), + .only_rule = {'P','K','C','S','-','1','.','2'} + }; + struct { + struct type6_hdr hdr; + struct CPRB cprb; + struct function_and_rules_block fr; + unsigned short length; + char text[0]; + } __attribute__((packed)) *msg = ap_msg->message; + int vud_len, pad_len, size; + + /* VUD.ciphertext */ + if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength)) + return -EFAULT; + + if (is_PKCS11_padded(msg->text, mex->inputdatalength)) + return -EINVAL; + + /* static message header and f&r */ + msg->hdr = static_type6_hdr; + msg->fr = static_pke_function_and_rules; + + if (is_PKCS12_padded(msg->text, mex->inputdatalength)) { + /* strip the padding and adjust the data length */ + pad_len = strnlen(msg->text + 2, mex->inputdatalength - 2) + 3; + if (pad_len <= 9 || pad_len >= mex->inputdatalength) + return -ENODEV; + vud_len = mex->inputdatalength - pad_len; + memmove(msg->text, msg->text + pad_len, vud_len); + msg->length = cpu_to_le16(vud_len + 2); + + /* Set up key after the variable length text. */ + size = zcrypt_type6_mex_key_en(mex, msg->text + vud_len, 0); + if (size < 0) + return size; + size += sizeof(*msg) + vud_len; /* total size of msg */ + } else { + vud_len = mex->inputdatalength; + msg->length = cpu_to_le16(2 + vud_len); + + msg->hdr.function_code[1] = 'D'; + msg->fr.function_code[1] = 'D'; + + /* Set up key after the variable length text. */ + size = zcrypt_type6_mex_key_de(mex, msg->text + vud_len, 0); + if (size < 0) + return size; + size += sizeof(*msg) + vud_len; /* total size of msg */ + } + + /* message header, cprb and f&r */ + msg->hdr.ToCardLen1 = (size - sizeof(msg->hdr) + 3) & -4; + msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr); + + msg->cprb = static_cprb; + msg->cprb.usage_domain[0]= AP_QID_QUEUE(zdev->ap_dev->qid); + msg->cprb.req_parml = cpu_to_le16(size - sizeof(msg->hdr) - + sizeof(msg->cprb)); + msg->cprb.rpl_parml = cpu_to_le16(msg->hdr.FromCardLen1); + + ap_msg->length = (size + 3) & -4; + return 0; +} + +/** + * Convert a ICACRT message to a type6 CRT message. + * + * @zdev: crypto device pointer + * @zreq: crypto request pointer + * @crt: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICACRT_msg_to_type6CRT_msg(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo_crt *crt) +{ + static struct type6_hdr static_type6_hdr = { + .type = 0x06, + .offset1 = 0x00000058, + .agent_id = {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50, + 0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01}, + .function_code = {'P','D'}, + }; + static struct function_and_rules_block static_pkd_function_and_rules ={ + .function_code = {'P','D'}, + .ulen = __constant_cpu_to_le16(10), + .only_rule = {'P','K','C','S','-','1','.','2'} + }; + struct { + struct type6_hdr hdr; + struct CPRB cprb; + struct function_and_rules_block fr; + unsigned short length; + char text[0]; + } __attribute__((packed)) *msg = ap_msg->message; + int size; + + /* VUD.ciphertext */ + msg->length = cpu_to_le16(2 + crt->inputdatalength); + if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength)) + return -EFAULT; + + if (is_PKCS11_padded(msg->text, crt->inputdatalength)) + return -EINVAL; + + /* Set up key after the variable length text. */ + size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 0); + if (size < 0) + return size; + size += sizeof(*msg) + crt->inputdatalength; /* total size of msg */ + + /* message header, cprb and f&r */ + msg->hdr = static_type6_hdr; + msg->hdr.ToCardLen1 = (size - sizeof(msg->hdr) + 3) & -4; + msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr); + + msg->cprb = static_cprb; + msg->cprb.usage_domain[0] = AP_QID_QUEUE(zdev->ap_dev->qid); + msg->cprb.req_parml = msg->cprb.rpl_parml = + cpu_to_le16(size - sizeof(msg->hdr) - sizeof(msg->cprb)); + + msg->fr = static_pkd_function_and_rules; + + ap_msg->length = (size + 3) & -4; + return 0; +} + +/** + * Copy results from a type 86 reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @data: pointer to user output data + * @length: size of user output data + * + * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. + */ +struct type86_reply { + struct type86_hdr hdr; + struct type86_fmt2_ext fmt2; + struct CPRB cprb; + unsigned char pad[4]; /* 4 byte function code/rules block ? */ + unsigned short length; + char text[0]; +} __attribute__((packed)); + +static int convert_type86(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + static unsigned char static_pad[] = { + 0x00,0x02, + 0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD, + 0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57, + 0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B, + 0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39, + 0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5, + 0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D, + 0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB, + 0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F, + 0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9, + 0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45, + 0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9, + 0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F, + 0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD, + 0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D, + 0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD, + 0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9, + 0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B, + 0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B, + 0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B, + 0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD, + 0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7, + 0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1, + 0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3, + 0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23, + 0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55, + 0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43, + 0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F, + 0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F, + 0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5, + 0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD, + 0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41, + 0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09 + }; + struct type86_reply *msg = reply->message; + unsigned short service_rc, service_rs; + unsigned int reply_len, pad_len; + char *data; + + service_rc = le16_to_cpu(msg->cprb.ccp_rtcode); + if (unlikely(service_rc != 0)) { + service_rs = le16_to_cpu(msg->cprb.ccp_rscode); + if (service_rc == 8 && service_rs == 66) { + PDEBUG("Bad block format on PCICC\n"); + return -EINVAL; + } + if (service_rc == 8 && service_rs == 65) { + PDEBUG("Probably an even modulus on PCICC\n"); + return -EINVAL; + } + if (service_rc == 8 && service_rs == 770) { + PDEBUG("Invalid key length on PCICC\n"); + zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD; + return -EAGAIN; + } + if (service_rc == 8 && service_rs == 783) { + PDEBUG("Extended bitlengths not enabled on PCICC\n"); + zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD; + return -EAGAIN; + } + PRINTK("Unknown service rc/rs (PCICC): %d/%d\n", + service_rc, service_rs); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } + data = msg->text; + reply_len = le16_to_cpu(msg->length) - 2; + if (reply_len > outputdatalength) + return -EINVAL; + /** + * For all encipher requests, the length of the ciphertext (reply_len) + * will always equal the modulus length. For MEX decipher requests + * the output needs to get padded. Minimum pad size is 10. + * + * Currently, the cases where padding will be added is for: + * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support + * ZERO-PAD and CRT is only supported for PKD requests) + * - PCICC, always + */ + pad_len = outputdatalength - reply_len; + if (pad_len > 0) { + if (pad_len < 10) + return -EINVAL; + /* 'restore' padding left in the PCICC/PCIXCC card. */ + if (copy_to_user(outputdata, static_pad, pad_len - 1)) + return -EFAULT; + if (put_user(0, outputdata + pad_len - 1)) + return -EFAULT; + } + /* Copy the crypto response to user space. */ + if (copy_to_user(outputdata + pad_len, data, reply_len)) + return -EFAULT; + return 0; +} + +static int convert_response(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + struct type86_reply *msg = reply->message; + + /* Response type byte is the second byte in the response. */ + switch (msg->hdr.type) { + case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: + return convert_error(zdev, reply); + case TYPE86_RSP_CODE: + if (msg->hdr.reply_code) + return convert_error(zdev, reply); + if (msg->cprb.cprb_ver_id == 0x01) + return convert_type86(zdev, reply, + outputdata, outputdatalength); + /* no break, incorrect cprb version is an unknown response */ + default: /* Unknown response type, this should NEVER EVER happen */ + PRINTK("Unrecognized Message Header: %08x%08x\n", + *(unsigned int *) reply->message, + *(unsigned int *) (reply->message+4)); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + +/** + * This function is called from the AP bus code after a crypto request + * "msg" has finished with the reply message "reply". + * It is called from tasklet context. + * @ap_dev: pointer to the AP device + * @msg: pointer to the AP message + * @reply: pointer to the AP reply message + */ +static void zcrypt_pcicc_receive(struct ap_device *ap_dev, + struct ap_message *msg, + struct ap_message *reply) +{ + static struct error_hdr error_reply = { + .type = TYPE82_RSP_CODE, + .reply_code = REP82_ERROR_MACHINE_FAILURE, + }; + struct type86_reply *t86r = reply->message; + int length; + + /* Copy the reply message to the request message buffer. */ + if (IS_ERR(reply)) + memcpy(msg->message, &error_reply, sizeof(error_reply)); + else if (t86r->hdr.type == TYPE86_RSP_CODE && + t86r->cprb.cprb_ver_id == 0x01) { + length = sizeof(struct type86_reply) + t86r->length - 2; + length = min(PCICC_MAX_RESPONSE_SIZE, length); + memcpy(msg->message, reply->message, length); + } else + memcpy(msg->message, reply->message, sizeof error_reply); + complete((struct completion *) msg->private); +} + +static atomic_t zcrypt_step = ATOMIC_INIT(0); + +/** + * The request distributor calls this function if it picked the PCICC + * device to handle a modexpo request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCICC device to the request distributor + * @mex: pointer to the modexpo request buffer + */ +static long zcrypt_pcicc_modexpo(struct zcrypt_device *zdev, + struct ica_rsa_modexpo *mex) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.length = PAGE_SIZE; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICAMEX_msg_to_type6MEX_msg(zdev, &ap_msg, mex); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCICC_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, mex->outputdata, + mex->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + free_page((unsigned long) ap_msg.message); + return rc; +} + +/** + * The request distributor calls this function if it picked the PCICC + * device to handle a modexpo_crt request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCICC device to the request distributor + * @crt: pointer to the modexpoc_crt request buffer + */ +static long zcrypt_pcicc_modexpo_crt(struct zcrypt_device *zdev, + struct ica_rsa_modexpo_crt *crt) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.length = PAGE_SIZE; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICACRT_msg_to_type6CRT_msg(zdev, &ap_msg, crt); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCICC_CLEANUP_TIME); + if (rc > 0) + rc = convert_response(zdev, &ap_msg, crt->outputdata, + crt->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + free_page((unsigned long) ap_msg.message); + return rc; +} + +/** + * The crypto operations for a PCICC card. + */ +static struct zcrypt_ops zcrypt_pcicc_ops = { + .rsa_modexpo = zcrypt_pcicc_modexpo, + .rsa_modexpo_crt = zcrypt_pcicc_modexpo_crt, +}; + +/** + * Probe function for PCICC cards. It always accepts the AP device + * since the bus_match already checked the hardware type. + * @ap_dev: pointer to the AP device. + */ +static int zcrypt_pcicc_probe(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev; + int rc; + + zdev = zcrypt_device_alloc(PCICC_MAX_RESPONSE_SIZE); + if (!zdev) + return -ENOMEM; + zdev->ap_dev = ap_dev; + zdev->ops = &zcrypt_pcicc_ops; + zdev->online = 1; + zdev->user_space_type = ZCRYPT_PCICC; + zdev->type_string = "PCICC"; + zdev->min_mod_size = PCICC_MIN_MOD_SIZE; + zdev->max_mod_size = PCICC_MAX_MOD_SIZE; + zdev->speed_rating = PCICC_SPEED_RATING; + ap_dev->reply = &zdev->reply; + ap_dev->private = zdev; + rc = zcrypt_device_register(zdev); + if (rc) + goto out_free; + return 0; + + out_free: + ap_dev->private = NULL; + zcrypt_device_free(zdev); + return rc; +} + +/** + * This is called to remove the extended PCICC driver information + * if an AP device is removed. + */ +static void zcrypt_pcicc_remove(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev = ap_dev->private; + + zcrypt_device_unregister(zdev); +} + +int __init zcrypt_pcicc_init(void) +{ + return ap_driver_register(&zcrypt_pcicc_driver, THIS_MODULE, "pcicc"); +} + +void zcrypt_pcicc_exit(void) +{ + ap_driver_unregister(&zcrypt_pcicc_driver); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(zcrypt_pcicc_init); +module_exit(zcrypt_pcicc_exit); +#endif diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h new file mode 100644 index 000000000000..027bafc7312a --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcicc.h @@ -0,0 +1,176 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcicc.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_PCICC_H_ +#define _ZCRYPT_PCICC_H_ + +/** + * The type 6 message family is associated with PCICC or PCIXCC cards. + * + * It contains a message header followed by a CPRB, both of which + * are described below. + * + * Note that all reserved fields must be zeroes. + */ +struct type6_hdr { + unsigned char reserved1; /* 0x00 */ + unsigned char type; /* 0x06 */ + unsigned char reserved2[2]; /* 0x0000 */ + unsigned char right[4]; /* 0x00000000 */ + unsigned char reserved3[2]; /* 0x0000 */ + unsigned char reserved4[2]; /* 0x0000 */ + unsigned char apfs[4]; /* 0x00000000 */ + unsigned int offset1; /* 0x00000058 (offset to CPRB) */ + unsigned int offset2; /* 0x00000000 */ + unsigned int offset3; /* 0x00000000 */ + unsigned int offset4; /* 0x00000000 */ + unsigned char agent_id[16]; /* PCICC: */ + /* 0x0100 */ + /* 0x4343412d4150504c202020 */ + /* 0x010101 */ + /* PCIXCC: */ + /* 0x4341000000000000 */ + /* 0x0000000000000000 */ + unsigned char rqid[2]; /* rqid. internal to 603 */ + unsigned char reserved5[2]; /* 0x0000 */ + unsigned char function_code[2]; /* for PKD, 0x5044 (ascii 'PD') */ + unsigned char reserved6[2]; /* 0x0000 */ + unsigned int ToCardLen1; /* (request CPRB len + 3) & -4 */ + unsigned int ToCardLen2; /* db len 0x00000000 for PKD */ + unsigned int ToCardLen3; /* 0x00000000 */ + unsigned int ToCardLen4; /* 0x00000000 */ + unsigned int FromCardLen1; /* response buffer length */ + unsigned int FromCardLen2; /* db len 0x00000000 for PKD */ + unsigned int FromCardLen3; /* 0x00000000 */ + unsigned int FromCardLen4; /* 0x00000000 */ +} __attribute__((packed)); + +/** + * CPRB + * Note that all shorts, ints and longs are little-endian. + * All pointer fields are 32-bits long, and mean nothing + * + * A request CPRB is followed by a request_parameter_block. + * + * The request (or reply) parameter block is organized thus: + * function code + * VUD block + * key block + */ +struct CPRB { + unsigned short cprb_len; /* CPRB length */ + unsigned char cprb_ver_id; /* CPRB version id. */ + unsigned char pad_000; /* Alignment pad byte. */ + unsigned char srpi_rtcode[4]; /* SRPI return code LELONG */ + unsigned char srpi_verb; /* SRPI verb type */ + unsigned char flags; /* flags */ + unsigned char func_id[2]; /* function id */ + unsigned char checkpoint_flag; /* */ + unsigned char resv2; /* reserved */ + unsigned short req_parml; /* request parameter buffer */ + /* length 16-bit little endian */ + unsigned char req_parmp[4]; /* request parameter buffer * + * pointer (means nothing: the * + * parameter buffer follows * + * the CPRB). */ + unsigned char req_datal[4]; /* request data buffer */ + /* length ULELONG */ + unsigned char req_datap[4]; /* request data buffer */ + /* pointer */ + unsigned short rpl_parml; /* reply parameter buffer */ + /* length 16-bit little endian */ + unsigned char pad_001[2]; /* Alignment pad bytes. ULESHORT */ + unsigned char rpl_parmp[4]; /* reply parameter buffer * + * pointer (means nothing: the * + * parameter buffer follows * + * the CPRB). */ + unsigned char rpl_datal[4]; /* reply data buffer len ULELONG */ + unsigned char rpl_datap[4]; /* reply data buffer */ + /* pointer */ + unsigned short ccp_rscode; /* server reason code ULESHORT */ + unsigned short ccp_rtcode; /* server return code ULESHORT */ + unsigned char repd_parml[2]; /* replied parameter len ULESHORT*/ + unsigned char mac_data_len[2]; /* Mac Data Length ULESHORT */ + unsigned char repd_datal[4]; /* replied data length ULELONG */ + unsigned char req_pc[2]; /* PC identifier */ + unsigned char res_origin[8]; /* resource origin */ + unsigned char mac_value[8]; /* Mac Value */ + unsigned char logon_id[8]; /* Logon Identifier */ + unsigned char usage_domain[2]; /* cdx */ + unsigned char resv3[18]; /* reserved for requestor */ + unsigned short svr_namel; /* server name length ULESHORT */ + unsigned char svr_name[8]; /* server name */ +} __attribute__((packed)); + +/** + * The type 86 message family is associated with PCICC and PCIXCC cards. + * + * It contains a message header followed by a CPRB. The CPRB is + * the same as the request CPRB, which is described above. + * + * If format is 1, an error condition exists and no data beyond + * the 8-byte message header is of interest. + * + * The non-error message is shown below. + * + * Note that all reserved fields must be zeroes. + */ +struct type86_hdr { + unsigned char reserved1; /* 0x00 */ + unsigned char type; /* 0x86 */ + unsigned char format; /* 0x01 (error) or 0x02 (ok) */ + unsigned char reserved2; /* 0x00 */ + unsigned char reply_code; /* reply code (see above) */ + unsigned char reserved3[3]; /* 0x000000 */ +} __attribute__((packed)); + +#define TYPE86_RSP_CODE 0x86 +#define TYPE86_FMT2 0x02 + +struct type86_fmt2_ext { + unsigned char reserved[4]; /* 0x00000000 */ + unsigned char apfs[4]; /* final status */ + unsigned int count1; /* length of CPRB + parameters */ + unsigned int offset1; /* offset to CPRB */ + unsigned int count2; /* 0x00000000 */ + unsigned int offset2; /* db offset 0x00000000 for PKD */ + unsigned int count3; /* 0x00000000 */ + unsigned int offset3; /* 0x00000000 */ + unsigned int count4; /* 0x00000000 */ + unsigned int offset4; /* 0x00000000 */ +} __attribute__((packed)); + +struct function_and_rules_block { + unsigned char function_code[2]; + unsigned short ulen; + unsigned char only_rule[8]; +} __attribute__((packed)); + +int zcrypt_pcicc_init(void); +void zcrypt_pcicc_exit(void); + +#endif /* _ZCRYPT_PCICC_H_ */ diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c new file mode 100644 index 000000000000..6064cf58be43 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -0,0 +1,714 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcixcc.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * Ralph Wuerthner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include "ap_bus.h" +#include "zcrypt_api.h" +#include "zcrypt_error.h" +#include "zcrypt_pcicc.h" +#include "zcrypt_pcixcc.h" +#include "zcrypt_cca_key.h" + +#define PCIXCC_MIN_MOD_SIZE 16 /* 128 bits */ +#define PCIXCC_MIN_MOD_SIZE_OLD 64 /* 512 bits */ +#define PCIXCC_MAX_MOD_SIZE 256 /* 2048 bits */ + +#define PCIXCC_MCL2_SPEED_RATING 7870 /* FIXME: needs finetuning */ +#define PCIXCC_MCL3_SPEED_RATING 7870 +#define CEX2C_SPEED_RATING 8540 + +#define PCIXCC_MAX_ICA_MESSAGE_SIZE 0x77c /* max size type6 v2 crt message */ +#define PCIXCC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply */ + +#define PCIXCC_MAX_XCRB_MESSAGE_SIZE (12*1024) +#define PCIXCC_MAX_XCRB_RESPONSE_SIZE PCIXCC_MAX_XCRB_MESSAGE_SIZE +#define PCIXCC_MAX_XCRB_DATA_SIZE (11*1024) +#define PCIXCC_MAX_XCRB_REPLY_SIZE (5*1024) + +#define PCIXCC_MAX_RESPONSE_SIZE PCIXCC_MAX_XCRB_RESPONSE_SIZE + +#define PCIXCC_CLEANUP_TIME (15*HZ) + +static struct ap_device_id zcrypt_pcixcc_ids[] = { + { AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) }, + { AP_DEVICE(AP_DEVICE_TYPE_CEX2C) }, + { /* end of list */ }, +}; + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_ids); +MODULE_AUTHOR("IBM Corporation"); +MODULE_DESCRIPTION("PCIXCC Cryptographic Coprocessor device driver, " + "Copyright 2001, 2006 IBM Corporation"); +MODULE_LICENSE("GPL"); +#endif + +static int zcrypt_pcixcc_probe(struct ap_device *ap_dev); +static void zcrypt_pcixcc_remove(struct ap_device *ap_dev); +static void zcrypt_pcixcc_receive(struct ap_device *, struct ap_message *, + struct ap_message *); + +static struct ap_driver zcrypt_pcixcc_driver = { + .probe = zcrypt_pcixcc_probe, + .remove = zcrypt_pcixcc_remove, + .receive = zcrypt_pcixcc_receive, + .ids = zcrypt_pcixcc_ids, +}; + +/** + * The following is used to initialize the CPRBX passed to the PCIXCC/CEX2C + * card in a type6 message. The 3 fields that must be filled in at execution + * time are req_parml, rpl_parml and usage_domain. + * Everything about this interface is ascii/big-endian, since the + * device does *not* have 'Intel inside'. + * + * The CPRBX is followed immediately by the parm block. + * The parm block contains: + * - function code ('PD' 0x5044 or 'PK' 0x504B) + * - rule block (one of:) + * + 0x000A 'PKCS-1.2' (MCL2 'PD') + * + 0x000A 'ZERO-PAD' (MCL2 'PK') + * + 0x000A 'ZERO-PAD' (MCL3 'PD' or CEX2C 'PD') + * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK') + * - VUD block + */ +static struct CPRBX static_cprbx = { + .cprb_len = 0x00DC, + .cprb_ver_id = 0x02, + .func_id = {0x54,0x32}, +}; + +/** + * Convert a ICAMEX message to a type6 MEX message. + * + * @zdev: crypto device pointer + * @ap_msg: pointer to AP message + * @mex: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo *mex) +{ + static struct type6_hdr static_type6_hdrX = { + .type = 0x06, + .offset1 = 0x00000058, + .agent_id = {'C','A',}, + .function_code = {'P','K'}, + }; + static struct function_and_rules_block static_pke_fnr = { + .function_code = {'P','K'}, + .ulen = 10, + .only_rule = {'M','R','P',' ',' ',' ',' ',' '} + }; + static struct function_and_rules_block static_pke_fnr_MCL2 = { + .function_code = {'P','K'}, + .ulen = 10, + .only_rule = {'Z','E','R','O','-','P','A','D'} + }; + struct { + struct type6_hdr hdr; + struct CPRBX cprbx; + struct function_and_rules_block fr; + unsigned short length; + char text[0]; + } __attribute__((packed)) *msg = ap_msg->message; + int size; + + /* VUD.ciphertext */ + msg->length = mex->inputdatalength + 2; + if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength)) + return -EFAULT; + + /* Set up key which is located after the variable length text. */ + size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength, 1); + if (size < 0) + return size; + size += sizeof(*msg) + mex->inputdatalength; + + /* message header, cprbx and f&r */ + msg->hdr = static_type6_hdrX; + msg->hdr.ToCardLen1 = size - sizeof(msg->hdr); + msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); + + msg->cprbx = static_cprbx; + msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid); + msg->cprbx.rpl_msgbl = msg->hdr.FromCardLen1; + + msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ? + static_pke_fnr_MCL2 : static_pke_fnr; + + msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx); + + ap_msg->length = size; + return 0; +} + +/** + * Convert a ICACRT message to a type6 CRT message. + * + * @zdev: crypto device pointer + * @ap_msg: pointer to AP message + * @crt: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_rsa_modexpo_crt *crt) +{ + static struct type6_hdr static_type6_hdrX = { + .type = 0x06, + .offset1 = 0x00000058, + .agent_id = {'C','A',}, + .function_code = {'P','D'}, + }; + static struct function_and_rules_block static_pkd_fnr = { + .function_code = {'P','D'}, + .ulen = 10, + .only_rule = {'Z','E','R','O','-','P','A','D'} + }; + + static struct function_and_rules_block static_pkd_fnr_MCL2 = { + .function_code = {'P','D'}, + .ulen = 10, + .only_rule = {'P','K','C','S','-','1','.','2'} + }; + struct { + struct type6_hdr hdr; + struct CPRBX cprbx; + struct function_and_rules_block fr; + unsigned short length; + char text[0]; + } __attribute__((packed)) *msg = ap_msg->message; + int size; + + /* VUD.ciphertext */ + msg->length = crt->inputdatalength + 2; + if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength)) + return -EFAULT; + + /* Set up key which is located after the variable length text. */ + size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 1); + if (size < 0) + return size; + size += sizeof(*msg) + crt->inputdatalength; /* total size of msg */ + + /* message header, cprbx and f&r */ + msg->hdr = static_type6_hdrX; + msg->hdr.ToCardLen1 = size - sizeof(msg->hdr); + msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr); + + msg->cprbx = static_cprbx; + msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid); + msg->cprbx.req_parml = msg->cprbx.rpl_msgbl = + size - sizeof(msg->hdr) - sizeof(msg->cprbx); + + msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ? + static_pkd_fnr_MCL2 : static_pkd_fnr; + + ap_msg->length = size; + return 0; +} + +/** + * Copy results from a type 86 ICA reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @data: pointer to user output data + * @length: size of user output data + * + * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. + */ +struct type86x_reply { + struct type86_hdr hdr; + struct type86_fmt2_ext fmt2; + struct CPRBX cprbx; + unsigned char pad[4]; /* 4 byte function code/rules block ? */ + unsigned short length; + char text[0]; +} __attribute__((packed)); + +static int convert_type86_ica(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + static unsigned char static_pad[] = { + 0x00,0x02, + 0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD, + 0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57, + 0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B, + 0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39, + 0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5, + 0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D, + 0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB, + 0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F, + 0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9, + 0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45, + 0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9, + 0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F, + 0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD, + 0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D, + 0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD, + 0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9, + 0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B, + 0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B, + 0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B, + 0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD, + 0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7, + 0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1, + 0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3, + 0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23, + 0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55, + 0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43, + 0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F, + 0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F, + 0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5, + 0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD, + 0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41, + 0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09 + }; + struct type86x_reply *msg = reply->message; + unsigned short service_rc, service_rs; + unsigned int reply_len, pad_len; + char *data; + + service_rc = msg->cprbx.ccp_rtcode; + if (unlikely(service_rc != 0)) { + service_rs = msg->cprbx.ccp_rscode; + if (service_rc == 8 && service_rs == 66) { + PDEBUG("Bad block format on PCIXCC/CEX2C\n"); + return -EINVAL; + } + if (service_rc == 8 && service_rs == 65) { + PDEBUG("Probably an even modulus on PCIXCC/CEX2C\n"); + return -EINVAL; + } + if (service_rc == 8 && service_rs == 770) { + PDEBUG("Invalid key length on PCIXCC/CEX2C\n"); + zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD; + return -EAGAIN; + } + if (service_rc == 8 && service_rs == 783) { + PDEBUG("Extended bitlengths not enabled on PCIXCC/CEX2C\n"); + zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD; + return -EAGAIN; + } + PRINTK("Unknown service rc/rs (PCIXCC/CEX2C): %d/%d\n", + service_rc, service_rs); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } + data = msg->text; + reply_len = msg->length - 2; + if (reply_len > outputdatalength) + return -EINVAL; + /** + * For all encipher requests, the length of the ciphertext (reply_len) + * will always equal the modulus length. For MEX decipher requests + * the output needs to get padded. Minimum pad size is 10. + * + * Currently, the cases where padding will be added is for: + * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support + * ZERO-PAD and CRT is only supported for PKD requests) + * - PCICC, always + */ + pad_len = outputdatalength - reply_len; + if (pad_len > 0) { + if (pad_len < 10) + return -EINVAL; + /* 'restore' padding left in the PCICC/PCIXCC card. */ + if (copy_to_user(outputdata, static_pad, pad_len - 1)) + return -EFAULT; + if (put_user(0, outputdata + pad_len - 1)) + return -EFAULT; + } + /* Copy the crypto response to user space. */ + if (copy_to_user(outputdata + pad_len, data, reply_len)) + return -EFAULT; + return 0; +} + +static int convert_response_ica(struct zcrypt_device *zdev, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) +{ + struct type86x_reply *msg = reply->message; + + /* Response type byte is the second byte in the response. */ + switch (((unsigned char *) reply->message)[1]) { + case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: + return convert_error(zdev, reply); + case TYPE86_RSP_CODE: + if (msg->hdr.reply_code) + return convert_error(zdev, reply); + if (msg->cprbx.cprb_ver_id == 0x02) + return convert_type86_ica(zdev, reply, + outputdata, outputdatalength); + /* no break, incorrect cprb version is an unknown response */ + default: /* Unknown response type, this should NEVER EVER happen */ + PRINTK("Unrecognized Message Header: %08x%08x\n", + *(unsigned int *) reply->message, + *(unsigned int *) (reply->message+4)); + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + +/** + * This function is called from the AP bus code after a crypto request + * "msg" has finished with the reply message "reply". + * It is called from tasklet context. + * @ap_dev: pointer to the AP device + * @msg: pointer to the AP message + * @reply: pointer to the AP reply message + */ +static void zcrypt_pcixcc_receive(struct ap_device *ap_dev, + struct ap_message *msg, + struct ap_message *reply) +{ + static struct error_hdr error_reply = { + .type = TYPE82_RSP_CODE, + .reply_code = REP82_ERROR_MACHINE_FAILURE, + }; + struct type86x_reply *t86r = reply->message; + int length; + + /* Copy the reply message to the request message buffer. */ + if (IS_ERR(reply)) + memcpy(msg->message, &error_reply, sizeof(error_reply)); + else if (t86r->hdr.type == TYPE86_RSP_CODE && + t86r->cprbx.cprb_ver_id == 0x02) { + length = sizeof(struct type86x_reply) + t86r->length - 2; + length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length); + memcpy(msg->message, reply->message, length); + } else + memcpy(msg->message, reply->message, sizeof error_reply); + complete((struct completion *) msg->private); +} + +static atomic_t zcrypt_step = ATOMIC_INIT(0); + +/** + * The request distributor calls this function if it picked the PCIXCC/CEX2C + * device to handle a modexpo request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCIXCC/CEX2C device to the request distributor + * @mex: pointer to the modexpo request buffer + */ +static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev, + struct ica_rsa_modexpo *mex) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCIXCC_CLEANUP_TIME); + if (rc > 0) + rc = convert_response_ica(zdev, &ap_msg, mex->outputdata, + mex->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + free_page((unsigned long) ap_msg.message); + return rc; +} + +/** + * The request distributor calls this function if it picked the PCIXCC/CEX2C + * device to handle a modexpo_crt request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCIXCC/CEX2C device to the request distributor + * @crt: pointer to the modexpoc_crt request buffer + */ +static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev, + struct ica_rsa_modexpo_crt *crt) +{ + struct ap_message ap_msg; + struct completion work; + int rc; + + ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &work; + rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt); + if (rc) + goto out_free; + init_completion(&work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &work, PCIXCC_CLEANUP_TIME); + if (rc > 0) + rc = convert_response_ica(zdev, &ap_msg, crt->outputdata, + crt->outputdatalength); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + free_page((unsigned long) ap_msg.message); + return rc; +} + +/** + * The crypto operations for a PCIXCC/CEX2C card. + */ +static struct zcrypt_ops zcrypt_pcixcc_ops = { + .rsa_modexpo = zcrypt_pcixcc_modexpo, + .rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt, +}; + +/** + * Micro-code detection function. Its sends a message to a pcixcc card + * to find out the microcode level. + * @ap_dev: pointer to the AP device. + */ +static int zcrypt_pcixcc_mcl(struct ap_device *ap_dev) +{ + static unsigned char msg[] = { + 0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00, + 0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8, + 0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A, + 0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20, + 0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05, + 0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D, + 0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55, + 0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD, + 0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA, + 0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22, + 0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB, + 0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54, + 0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00, + 0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00, + 0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40, + 0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C, + 0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF, + 0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9, + 0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63, + 0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5, + 0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A, + 0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01, + 0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28, + 0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91, + 0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5, + 0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C, + 0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98, + 0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96, + 0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19, + 0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47, + 0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36, + 0xF1,0x3D,0x93,0x53 + }; + unsigned long long psmid; + struct CPRBX *cprbx; + char *reply; + int rc, i; + + reply = (void *) get_zeroed_page(GFP_KERNEL); + if (!reply) + return -ENOMEM; + + rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, msg, sizeof(msg)); + if (rc) + goto out_free; + + /* Wait for the test message to complete. */ + for (i = 0; i < 6; i++) { + mdelay(300); + rc = ap_recv(ap_dev->qid, &psmid, reply, 4096); + if (rc == 0 && psmid == 0x0102030405060708ULL) + break; + } + + if (i >= 6) { + /* Got no answer. */ + rc = -ENODEV; + goto out_free; + } + + cprbx = (struct CPRBX *) (reply + 48); + if (cprbx->ccp_rtcode == 8 && cprbx->ccp_rscode == 33) + rc = ZCRYPT_PCIXCC_MCL2; + else + rc = ZCRYPT_PCIXCC_MCL3; +out_free: + free_page((unsigned long) reply); + return rc; +} + +/** + * Probe function for PCIXCC/CEX2C cards. It always accepts the AP device + * since the bus_match already checked the hardware type. The PCIXCC + * cards come in two flavours: micro code level 2 and micro code level 3. + * This is checked by sending a test message to the device. + * @ap_dev: pointer to the AP device. + */ +static int zcrypt_pcixcc_probe(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev; + int rc; + + zdev = zcrypt_device_alloc(PCIXCC_MAX_RESPONSE_SIZE); + if (!zdev) + return -ENOMEM; + zdev->ap_dev = ap_dev; + zdev->ops = &zcrypt_pcixcc_ops; + zdev->online = 1; + if (ap_dev->device_type == AP_DEVICE_TYPE_PCIXCC) { + rc = zcrypt_pcixcc_mcl(ap_dev); + if (rc < 0) { + zcrypt_device_free(zdev); + return rc; + } + zdev->user_space_type = rc; + if (rc == ZCRYPT_PCIXCC_MCL2) { + zdev->type_string = "PCIXCC_MCL2"; + zdev->speed_rating = PCIXCC_MCL2_SPEED_RATING; + zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD; + zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE; + } else { + zdev->type_string = "PCIXCC_MCL3"; + zdev->speed_rating = PCIXCC_MCL3_SPEED_RATING; + zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE; + zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE; + } + } else { + zdev->user_space_type = ZCRYPT_CEX2C; + zdev->type_string = "CEX2C"; + zdev->speed_rating = CEX2C_SPEED_RATING; + zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE; + zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE; + } + ap_dev->reply = &zdev->reply; + ap_dev->private = zdev; + rc = zcrypt_device_register(zdev); + if (rc) + goto out_free; + return 0; + + out_free: + ap_dev->private = NULL; + zcrypt_device_free(zdev); + return rc; +} + +/** + * This is called to remove the extended PCIXCC/CEX2C driver information + * if an AP device is removed. + */ +static void zcrypt_pcixcc_remove(struct ap_device *ap_dev) +{ + struct zcrypt_device *zdev = ap_dev->private; + + zcrypt_device_unregister(zdev); +} + +int __init zcrypt_pcixcc_init(void) +{ + return ap_driver_register(&zcrypt_pcixcc_driver, THIS_MODULE, "pcixcc"); +} + +void zcrypt_pcixcc_exit(void) +{ + ap_driver_unregister(&zcrypt_pcixcc_driver); +} + +#ifndef CONFIG_ZCRYPT_MONOLITHIC +module_init(zcrypt_pcixcc_init); +module_exit(zcrypt_pcixcc_exit); +#endif diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h new file mode 100644 index 000000000000..d4c44c4d7ad0 --- /dev/null +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -0,0 +1,79 @@ +/* + * linux/drivers/s390/crypto/zcrypt_pcixcc.h + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _ZCRYPT_PCIXCC_H_ +#define _ZCRYPT_PCIXCC_H_ + +/** + * CPRBX + * Note that all shorts and ints are big-endian. + * All pointer fields are 16 bytes long, and mean nothing. + * + * A request CPRB is followed by a request_parameter_block. + * + * The request (or reply) parameter block is organized thus: + * function code + * VUD block + * key block + */ +struct CPRBX { + unsigned short cprb_len; /* CPRB length 220 */ + unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ + unsigned char pad_000[3]; /* Alignment pad bytes */ + unsigned char func_id[2]; /* function id 0x5432 */ + unsigned char cprb_flags[4]; /* Flags */ + unsigned int req_parml; /* request parameter buffer len */ + unsigned int req_datal; /* request data buffer */ + unsigned int rpl_msgbl; /* reply message block length */ + unsigned int rpld_parml; /* replied parameter block len */ + unsigned int rpl_datal; /* reply data block len */ + unsigned int rpld_datal; /* replied data block len */ + unsigned int req_extbl; /* request extension block len */ + unsigned char pad_001[4]; /* reserved */ + unsigned int rpld_extbl; /* replied extension block len */ + unsigned char req_parmb[16]; /* request parm block 'address' */ + unsigned char req_datab[16]; /* request data block 'address' */ + unsigned char rpl_parmb[16]; /* reply parm block 'address' */ + unsigned char rpl_datab[16]; /* reply data block 'address' */ + unsigned char req_extb[16]; /* request extension block 'addr'*/ + unsigned char rpl_extb[16]; /* reply extension block 'addres'*/ + unsigned short ccp_rtcode; /* server return code */ + unsigned short ccp_rscode; /* server reason code */ + unsigned int mac_data_len; /* Mac Data Length */ + unsigned char logon_id[8]; /* Logon Identifier */ + unsigned char mac_value[8]; /* Mac Value */ + unsigned char mac_content_flgs;/* Mac content flag byte */ + unsigned char pad_002; /* Alignment */ + unsigned short domain; /* Domain */ + unsigned char pad_003[12]; /* Domain masks */ + unsigned char pad_004[36]; /* reserved */ +} __attribute__((packed)); + +int zcrypt_pcixcc_init(void); +void zcrypt_pcixcc_exit(void); + +#endif /* _ZCRYPT_PCIXCC_H_ */ From fe3a1be59c851aba2330387596c6134bc5ec8397 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:34 +0200 Subject: [PATCH 0226/1063] [S390] zcrypt driver Makefile, Kconfig and monolithic build. The Makefile and Kconfig changes should be obvious. The monolithic build option is there to create an old-style z90crypt module for backward compatability to older distributions. Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/Kconfig | 21 +++++++ drivers/s390/crypto/Makefile | 13 ++++ drivers/s390/crypto/zcrypt_mono.c | 100 ++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 drivers/s390/crypto/zcrypt_mono.c diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig index f0ea550d39bc..bc4261e8b606 100644 --- a/drivers/s390/Kconfig +++ b/drivers/s390/Kconfig @@ -217,4 +217,25 @@ endmenu menu "Cryptographic devices" +config ZCRYPT + tristate "Support for PCI-attached cryptographic adapters" + select ZCRYPT_MONOLITHIC if ZCRYPT="y" + default "m" + help + Select this option if you want to use a PCI-attached cryptographic + adapter like: + + PCI Cryptographic Accelerator (PCICA) + + PCI Cryptographic Coprocessor (PCICC) + + PCI-X Cryptographic Coprocessor (PCIXCC) + + Crypto Express2 Coprocessor (CEX2C) + + Crypto Express2 Accelerator (CEX2A) + +config ZCRYPT_MONOLITHIC + bool "Monolithic zcrypt module" + depends on ZCRYPT="m" + help + Select this option if you want to have a single module z90crypt.ko + that contains all parts of the crypto device driver (ap bus, + request router and all the card drivers). + endmenu diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile index 67e75be8e4e4..f0a12d2eb780 100644 --- a/drivers/s390/crypto/Makefile +++ b/drivers/s390/crypto/Makefile @@ -2,3 +2,16 @@ # S/390 crypto devices # +ifdef CONFIG_ZCRYPT_MONOLITHIC + +z90crypt-objs := zcrypt_mono.o ap_bus.o zcrypt_api.o \ + zcrypt_pcica.o zcrypt_pcicc.o zcrypt_pcixcc.o zcrypt_cex2a.o +obj-$(CONFIG_ZCRYPT) += z90crypt.o + +else + +ap-objs := ap_bus.o +obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcicc.o zcrypt_pcixcc.o +obj-$(CONFIG_ZCRYPT) += zcrypt_pcica.o zcrypt_cex2a.o + +endif diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c new file mode 100644 index 000000000000..f48b61a6126c --- /dev/null +++ b/drivers/s390/crypto/zcrypt_mono.c @@ -0,0 +1,100 @@ +/* + * linux/drivers/s390/crypto/zcrypt_mono.c + * + * zcrypt 2.0.0 + * + * Copyright (C) 2001, 2006 IBM Corporation + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * Major cleanup & driver split: Martin Schwidefsky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ap_bus.h" +#include "zcrypt_api.h" +#include "zcrypt_pcica.h" +#include "zcrypt_pcicc.h" +#include "zcrypt_pcixcc.h" +#include "zcrypt_cex2a.h" + +/** + * The module initialization code. + */ +int __init zcrypt_init(void) +{ + int rc; + + rc = ap_module_init(); + if (rc) + goto out; + rc = zcrypt_api_init(); + if (rc) + goto out_ap; + rc = zcrypt_pcica_init(); + if (rc) + goto out_api; + rc = zcrypt_pcicc_init(); + if (rc) + goto out_pcica; + rc = zcrypt_pcixcc_init(); + if (rc) + goto out_pcicc; + rc = zcrypt_cex2a_init(); + if (rc) + goto out_pcixcc; + return 0; + +out_pcixcc: + zcrypt_pcixcc_exit(); +out_pcicc: + zcrypt_pcicc_exit(); +out_pcica: + zcrypt_pcica_exit(); +out_api: + zcrypt_api_exit(); +out_ap: + ap_module_exit(); +out: + return rc; +} + +/** + * The module termination code. + */ +void __exit zcrypt_exit(void) +{ + zcrypt_cex2a_exit(); + zcrypt_pcixcc_exit(); + zcrypt_pcicc_exit(); + zcrypt_pcica_exit(); + zcrypt_api_exit(); + ap_module_exit(); +} + +module_init(zcrypt_init); +module_exit(zcrypt_exit); From 5432114baf0300286a6ca1b0aea549492a379432 Mon Sep 17 00:00:00 2001 From: Ralph Wuerthner Date: Wed, 20 Sep 2006 15:58:36 +0200 Subject: [PATCH 0227/1063] [S390] zcrypt secure key cryptography extension. Allow the user space to send extended cprb messages directly to the PCIXCC / CEX2C cards. This allows the CCA library to construct special crypto requests that use "secure" keys that are stored on the card. Signed-off-by: Ralph Wuerthner Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/zcrypt_api.c | 112 +++++++++++- drivers/s390/crypto/zcrypt_api.h | 3 +- drivers/s390/crypto/zcrypt_cca_key.h | 2 +- drivers/s390/crypto/zcrypt_cex2a.c | 2 +- drivers/s390/crypto/zcrypt_cex2a.h | 2 +- drivers/s390/crypto/zcrypt_error.h | 2 +- drivers/s390/crypto/zcrypt_mono.c | 2 +- drivers/s390/crypto/zcrypt_pcica.c | 2 +- drivers/s390/crypto/zcrypt_pcica.h | 2 +- drivers/s390/crypto/zcrypt_pcicc.c | 2 +- drivers/s390/crypto/zcrypt_pcicc.h | 2 +- drivers/s390/crypto/zcrypt_pcixcc.c | 263 +++++++++++++++++++++++++-- drivers/s390/crypto/zcrypt_pcixcc.h | 2 +- include/asm-s390/zcrypt.h | 80 +++++++- 14 files changed, 452 insertions(+), 26 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index b3fe003b3d2d..1edc10a7a6f2 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_api.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs @@ -392,6 +392,41 @@ static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt) return -ENODEV; } +static long zcrypt_send_cprb(struct ica_xcRB *xcRB) +{ + struct zcrypt_device *zdev; + int rc; + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + if (!zdev->online || !zdev->ops->send_cprb || + (xcRB->user_defined != AUTOSELECT && + AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined) + ) + continue; + zcrypt_device_get(zdev); + get_device(&zdev->ap_dev->device); + zdev->request_count++; + __zcrypt_decrease_preference(zdev); + spin_unlock_bh(&zcrypt_device_lock); + if (try_module_get(zdev->ap_dev->drv->driver.owner)) { + rc = zdev->ops->send_cprb(zdev, xcRB); + module_put(zdev->ap_dev->drv->driver.owner); + } + else + rc = -EAGAIN; + spin_lock_bh(&zcrypt_device_lock); + zdev->request_count--; + __zcrypt_increase_preference(zdev); + put_device(&zdev->ap_dev->device); + zcrypt_device_put(zdev); + spin_unlock_bh(&zcrypt_device_lock); + return rc; + } + spin_unlock_bh(&zcrypt_device_lock); + return -ENODEV; +} + static void zcrypt_status_mask(char status[AP_DEVICES]) { struct zcrypt_device *zdev; @@ -535,6 +570,18 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, return rc; return put_user(crt.outputdatalength, &ucrt->outputdatalength); } + case ZSECSENDCPRB: { + struct ica_xcRB __user *uxcRB = (void __user *) arg; + struct ica_xcRB xcRB; + if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) + return -EFAULT; + do { + rc = zcrypt_send_cprb(&xcRB); + } while (rc == -EAGAIN); + if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB))) + return -EFAULT; + return rc; + } case Z90STAT_STATUS_MASK: { char status[AP_DEVICES]; zcrypt_status_mask(status); @@ -683,6 +730,67 @@ static long trans_modexpo_crt32(struct file *filp, unsigned int cmd, return rc; } +struct compat_ica_xcRB { + unsigned short agent_ID; + unsigned int user_defined; + unsigned short request_ID; + unsigned int request_control_blk_length; + unsigned char padding1[16 - sizeof (compat_uptr_t)]; + compat_uptr_t request_control_blk_addr; + unsigned int request_data_length; + char padding2[16 - sizeof (compat_uptr_t)]; + compat_uptr_t request_data_address; + unsigned int reply_control_blk_length; + char padding3[16 - sizeof (compat_uptr_t)]; + compat_uptr_t reply_control_blk_addr; + unsigned int reply_data_length; + char padding4[16 - sizeof (compat_uptr_t)]; + compat_uptr_t reply_data_addr; + unsigned short priority_window; + unsigned int status; +} __attribute__((packed)); + +static long trans_xcRB32(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg); + struct compat_ica_xcRB xcRB32; + struct ica_xcRB xcRB64; + long rc; + + if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32))) + return -EFAULT; + xcRB64.agent_ID = xcRB32.agent_ID; + xcRB64.user_defined = xcRB32.user_defined; + xcRB64.request_ID = xcRB32.request_ID; + xcRB64.request_control_blk_length = + xcRB32.request_control_blk_length; + xcRB64.request_control_blk_addr = + compat_ptr(xcRB32.request_control_blk_addr); + xcRB64.request_data_length = + xcRB32.request_data_length; + xcRB64.request_data_address = + compat_ptr(xcRB32.request_data_address); + xcRB64.reply_control_blk_length = + xcRB32.reply_control_blk_length; + xcRB64.reply_control_blk_addr = + compat_ptr(xcRB32.reply_control_blk_addr); + xcRB64.reply_data_length = xcRB32.reply_data_length; + xcRB64.reply_data_addr = + compat_ptr(xcRB32.reply_data_addr); + xcRB64.priority_window = xcRB32.priority_window; + xcRB64.status = xcRB32.status; + do { + rc = zcrypt_send_cprb(&xcRB64); + } while (rc == -EAGAIN); + xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length; + xcRB32.reply_data_length = xcRB64.reply_data_length; + xcRB32.status = xcRB64.status; + if (copy_to_user(uxcRB32, &xcRB32, sizeof(xcRB32))) + return -EFAULT; + return rc; +} + long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -690,6 +798,8 @@ long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd, return trans_modexpo32(filp, cmd, arg); if (cmd == ICARSACRT) return trans_modexpo_crt32(filp, cmd, arg); + if (cmd == ZSECSENDCPRB) + return trans_xcRB32(filp, cmd, arg); return zcrypt_unlocked_ioctl(filp, cmd, arg); } #endif diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 1f0e61f2e9b4..de4877ee618f 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_api.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs @@ -106,6 +106,7 @@ struct zcrypt_ops { long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *); long (*rsa_modexpo_crt)(struct zcrypt_device *, struct ica_rsa_modexpo_crt *); + long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *); }; struct zcrypt_device { diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index c80f40d44197..8dbcf0eef3e5 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_cca_key.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index 350248e5cd93..a62b00083d0c 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_cex2a.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h index 61a78c32dce4..8f69d1dacab8 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.h +++ b/drivers/s390/crypto/zcrypt_cex2a.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_cex2a.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index b22bd055a03b..2cb616ba8bec 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_error.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c index f48b61a6126c..2a9349ad68b7 100644 --- a/drivers/s390/crypto/zcrypt_mono.c +++ b/drivers/s390/crypto/zcrypt_mono.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_mono.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c index 0ff56e86caae..b6a4ecdc8025 100644 --- a/drivers/s390/crypto/zcrypt_pcica.c +++ b/drivers/s390/crypto/zcrypt_pcica.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcica.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h index a08a4f8c33c9..3be11187f6df 100644 --- a/drivers/s390/crypto/zcrypt_pcica.h +++ b/drivers/s390/crypto/zcrypt_pcica.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcica.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c index 900362983fec..f295a403b29a 100644 --- a/drivers/s390/crypto/zcrypt_pcicc.c +++ b/drivers/s390/crypto/zcrypt_pcicc.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcicc.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h index 027bafc7312a..6d4454846c8f 100644 --- a/drivers/s390/crypto/zcrypt_pcicc.h +++ b/drivers/s390/crypto/zcrypt_pcicc.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcicc.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 6064cf58be43..2da8b9381407 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcixcc.c * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs @@ -60,6 +60,15 @@ #define PCIXCC_CLEANUP_TIME (15*HZ) +#define CEIL4(x) ((((x)+3)/4)*4) + +struct response_type { + struct completion work; + int type; +}; +#define PCIXCC_RESPONSE_TYPE_ICA 0 +#define PCIXCC_RESPONSE_TYPE_XCRB 1 + static struct ap_device_id zcrypt_pcixcc_ids[] = { { AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) }, { AP_DEVICE(AP_DEVICE_TYPE_CEX2C) }, @@ -243,6 +252,108 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev, return 0; } +/** + * Convert a XCRB message to a type6 CPRB message. + * + * @zdev: crypto device pointer + * @ap_msg: pointer to AP message + * @xcRB: pointer to user input data + * + * Returns 0 on success or -EFAULT. + */ +struct type86_fmt2_msg { + struct type86_hdr hdr; + struct type86_fmt2_ext fmt2; +} __attribute__((packed)); + +static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ica_xcRB *xcRB) +{ + static struct type6_hdr static_type6_hdrX = { + .type = 0x06, + .offset1 = 0x00000058, + }; + struct { + struct type6_hdr hdr; + struct ica_CPRBX cprbx; + } __attribute__((packed)) *msg = ap_msg->message; + + int rcblen = CEIL4(xcRB->request_control_blk_length); + int replylen; + char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen; + char *function_code; + + /* length checks */ + ap_msg->length = sizeof(struct type6_hdr) + + CEIL4(xcRB->request_control_blk_length) + + xcRB->request_data_length; + if (ap_msg->length > PCIXCC_MAX_XCRB_MESSAGE_SIZE) { + PRINTK("Combined message is too large (%ld/%d/%d).\n", + sizeof(struct type6_hdr), + xcRB->request_control_blk_length, + xcRB->request_data_length); + return -EFAULT; + } + if (CEIL4(xcRB->reply_control_blk_length) > + PCIXCC_MAX_XCRB_REPLY_SIZE) { + PDEBUG("Reply CPRB length is too large (%d).\n", + xcRB->request_control_blk_length); + return -EFAULT; + } + if (CEIL4(xcRB->reply_data_length) > PCIXCC_MAX_XCRB_DATA_SIZE) { + PDEBUG("Reply data block length is too large (%d).\n", + xcRB->reply_data_length); + return -EFAULT; + } + replylen = CEIL4(xcRB->reply_control_blk_length) + + CEIL4(xcRB->reply_data_length) + + sizeof(struct type86_fmt2_msg); + if (replylen > PCIXCC_MAX_XCRB_RESPONSE_SIZE) { + PDEBUG("Reply CPRB + data block > PCIXCC_MAX_XCRB_RESPONSE_SIZE" + " (%d/%d/%d).\n", + sizeof(struct type86_fmt2_msg), + xcRB->reply_control_blk_length, + xcRB->reply_data_length); + xcRB->reply_control_blk_length = PCIXCC_MAX_XCRB_RESPONSE_SIZE - + (sizeof(struct type86_fmt2_msg) + + CEIL4(xcRB->reply_data_length)); + PDEBUG("Capping Reply CPRB length at %d\n", + xcRB->reply_control_blk_length); + } + + /* prepare type6 header */ + msg->hdr = static_type6_hdrX; + memcpy(msg->hdr.agent_id , &(xcRB->agent_ID), sizeof(xcRB->agent_ID)); + msg->hdr.ToCardLen1 = xcRB->request_control_blk_length; + if (xcRB->request_data_length) { + msg->hdr.offset2 = msg->hdr.offset1 + rcblen; + msg->hdr.ToCardLen2 = xcRB->request_data_length; + } + msg->hdr.FromCardLen1 = xcRB->reply_control_blk_length; + msg->hdr.FromCardLen2 = xcRB->reply_data_length; + + /* prepare CPRB */ + if (copy_from_user(&(msg->cprbx), xcRB->request_control_blk_addr, + xcRB->request_control_blk_length)) + return -EFAULT; + if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) > + xcRB->request_control_blk_length) { + PDEBUG("cprb_len too large (%d/%d)\n", msg->cprbx.cprb_len, + xcRB->request_control_blk_length); + return -EFAULT; + } + function_code = ((unsigned char *)&msg->cprbx) + msg->cprbx.cprb_len; + memcpy(msg->hdr.function_code, function_code, sizeof(msg->hdr.function_code)); + + /* copy data block */ + if (xcRB->request_data_length && + copy_from_user(req_data, xcRB->request_data_address, + xcRB->request_data_length)) + return -EFAULT; + return 0; +} + /** * Copy results from a type 86 ICA reply message back to user space. * @@ -363,6 +474,37 @@ static int convert_type86_ica(struct zcrypt_device *zdev, return 0; } +/** + * Copy results from a type 86 XCRB reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @xcRB: pointer to XCRB + * + * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. + */ +static int convert_type86_xcrb(struct zcrypt_device *zdev, + struct ap_message *reply, + struct ica_xcRB *xcRB) +{ + struct type86_fmt2_msg *msg = reply->message; + char *data = reply->message; + + /* Copy CPRB to user */ + if (copy_to_user(xcRB->reply_control_blk_addr, + data + msg->fmt2.offset1, msg->fmt2.count1)) + return -EFAULT; + xcRB->reply_control_blk_length = msg->fmt2.count1; + + /* Copy data buffer to user */ + if (msg->fmt2.count2) + if (copy_to_user(xcRB->reply_data_addr, + data + msg->fmt2.offset2, msg->fmt2.count2)) + return -EFAULT; + xcRB->reply_data_length = msg->fmt2.count2; + return 0; +} + static int convert_response_ica(struct zcrypt_device *zdev, struct ap_message *reply, char __user *outputdata, @@ -391,6 +533,36 @@ static int convert_response_ica(struct zcrypt_device *zdev, } } +static int convert_response_xcrb(struct zcrypt_device *zdev, + struct ap_message *reply, + struct ica_xcRB *xcRB) +{ + struct type86x_reply *msg = reply->message; + + /* Response type byte is the second byte in the response. */ + switch (((unsigned char *) reply->message)[1]) { + case TYPE82_RSP_CODE: + case TYPE88_RSP_CODE: + xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ + return convert_error(zdev, reply); + case TYPE86_RSP_CODE: + if (msg->hdr.reply_code) { + memcpy(&(xcRB->status), msg->fmt2.apfs, sizeof(u32)); + return convert_error(zdev, reply); + } + if (msg->cprbx.cprb_ver_id == 0x02) + return convert_type86_xcrb(zdev, reply, xcRB); + /* no break, incorrect cprb version is an unknown response */ + default: /* Unknown response type, this should NEVER EVER happen */ + PRINTK("Unrecognized Message Header: %08x%08x\n", + *(unsigned int *) reply->message, + *(unsigned int *) (reply->message+4)); + xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ + zdev->online = 0; + return -EAGAIN; /* repeat the request on a different device. */ + } +} + /** * This function is called from the AP bus code after a crypto request * "msg" has finished with the reply message "reply". @@ -407,6 +579,8 @@ static void zcrypt_pcixcc_receive(struct ap_device *ap_dev, .type = TYPE82_RSP_CODE, .reply_code = REP82_ERROR_MACHINE_FAILURE, }; + struct response_type *resp_type = + (struct response_type *) msg->private; struct type86x_reply *t86r = reply->message; int length; @@ -415,12 +589,27 @@ static void zcrypt_pcixcc_receive(struct ap_device *ap_dev, memcpy(msg->message, &error_reply, sizeof(error_reply)); else if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x02) { - length = sizeof(struct type86x_reply) + t86r->length - 2; - length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length); - memcpy(msg->message, reply->message, length); + switch (resp_type->type) { + case PCIXCC_RESPONSE_TYPE_ICA: + length = sizeof(struct type86x_reply) + + t86r->length - 2; + length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length); + memcpy(msg->message, reply->message, length); + break; + case PCIXCC_RESPONSE_TYPE_XCRB: + length = t86r->fmt2.offset2 + t86r->fmt2.count2; + length = min(PCIXCC_MAX_XCRB_RESPONSE_SIZE, length); + memcpy(msg->message, reply->message, length); + break; + default: + PRINTK("Invalid internal response type: %i\n", + resp_type->type); + memcpy(msg->message, &error_reply, + sizeof error_reply); + } } else memcpy(msg->message, reply->message, sizeof error_reply); - complete((struct completion *) msg->private); + complete(&(resp_type->work)); } static atomic_t zcrypt_step = ATOMIC_INIT(0); @@ -436,7 +625,9 @@ static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev, struct ica_rsa_modexpo *mex) { struct ap_message ap_msg; - struct completion work; + struct response_type resp_type = { + .type = PCIXCC_RESPONSE_TYPE_ICA, + }; int rc; ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); @@ -444,14 +635,14 @@ static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev, return -ENOMEM; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + atomic_inc_return(&zcrypt_step); - ap_msg.private = &work; + ap_msg.private = &resp_type; rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex); if (rc) goto out_free; - init_completion(&work); + init_completion(&resp_type.work); ap_queue_message(zdev->ap_dev, &ap_msg); rc = wait_for_completion_interruptible_timeout( - &work, PCIXCC_CLEANUP_TIME); + &resp_type.work, PCIXCC_CLEANUP_TIME); if (rc > 0) rc = convert_response_ica(zdev, &ap_msg, mex->outputdata, mex->outputdatalength); @@ -478,7 +669,9 @@ static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev, struct ica_rsa_modexpo_crt *crt) { struct ap_message ap_msg; - struct completion work; + struct response_type resp_type = { + .type = PCIXCC_RESPONSE_TYPE_ICA, + }; int rc; ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); @@ -486,14 +679,14 @@ static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev, return -ENOMEM; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + atomic_inc_return(&zcrypt_step); - ap_msg.private = &work; + ap_msg.private = &resp_type; rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt); if (rc) goto out_free; - init_completion(&work); + init_completion(&resp_type.work); ap_queue_message(zdev->ap_dev, &ap_msg); rc = wait_for_completion_interruptible_timeout( - &work, PCIXCC_CLEANUP_TIME); + &resp_type.work, PCIXCC_CLEANUP_TIME); if (rc > 0) rc = convert_response_ica(zdev, &ap_msg, crt->outputdata, crt->outputdatalength); @@ -509,12 +702,56 @@ out_free: return rc; } +/** + * The request distributor calls this function if it picked the PCIXCC/CEX2C + * device to handle a send_cprb request. + * @zdev: pointer to zcrypt_device structure that identifies the + * PCIXCC/CEX2C device to the request distributor + * @xcRB: pointer to the send_cprb request buffer + */ +long zcrypt_pcixcc_send_cprb(struct zcrypt_device *zdev, struct ica_xcRB *xcRB) +{ + struct ap_message ap_msg; + struct response_type resp_type = { + .type = PCIXCC_RESPONSE_TYPE_XCRB, + }; + int rc; + + ap_msg.message = (void *) kmalloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &resp_type; + rc = XCRB_msg_to_type6CPRB_msgX(zdev, &ap_msg, xcRB); + if (rc) + goto out_free; + init_completion(&resp_type.work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible_timeout( + &resp_type.work, PCIXCC_CLEANUP_TIME); + if (rc > 0) + rc = convert_response_xcrb(zdev, &ap_msg, xcRB); + else { + /* Signal pending or message timed out. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + if (rc == 0) + /* Message timed out. */ + rc = -ETIME; + } +out_free: + memset(ap_msg.message, 0x0, ap_msg.length); + kfree(ap_msg.message); + return rc; +} + /** * The crypto operations for a PCIXCC/CEX2C card. */ static struct zcrypt_ops zcrypt_pcixcc_ops = { .rsa_modexpo = zcrypt_pcixcc_modexpo, .rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt, + .send_cprb = zcrypt_pcixcc_send_cprb, }; /** diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index d4c44c4d7ad0..a78ff307fd19 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -1,7 +1,7 @@ /* * linux/drivers/s390/crypto/zcrypt_pcixcc.h * - * zcrypt 2.0.0 + * zcrypt 2.1.0 * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h index 0d6a3e2a3349..7244c68464f2 100644 --- a/include/asm-s390/zcrypt.h +++ b/include/asm-s390/zcrypt.h @@ -1,7 +1,7 @@ /* * include/asm-s390/zcrypt.h * - * zcrypt 2.0.0 (user-visible header) + * zcrypt 2.1.0 (user-visible header) * * Copyright (C) 2001, 2006 IBM Corporation * Author(s): Robert Burroughs @@ -79,6 +79,83 @@ struct ica_rsa_modexpo_crt { char __user * u_mult_inv; }; +/** + * CPRBX + * Note that all shorts and ints are big-endian. + * All pointer fields are 16 bytes long, and mean nothing. + * + * A request CPRB is followed by a request_parameter_block. + * + * The request (or reply) parameter block is organized thus: + * function code + * VUD block + * key block + */ +struct ica_CPRBX { + unsigned short cprb_len; /* CPRB length 220 */ + unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ + unsigned char pad_000[3]; /* Alignment pad bytes */ + unsigned char func_id[2]; /* function id 0x5432 */ + unsigned char cprb_flags[4]; /* Flags */ + unsigned int req_parml; /* request parameter buffer len */ + unsigned int req_datal; /* request data buffer */ + unsigned int rpl_msgbl; /* reply message block length */ + unsigned int rpld_parml; /* replied parameter block len */ + unsigned int rpl_datal; /* reply data block len */ + unsigned int rpld_datal; /* replied data block len */ + unsigned int req_extbl; /* request extension block len */ + unsigned char pad_001[4]; /* reserved */ + unsigned int rpld_extbl; /* replied extension block len */ + unsigned char padx000[16 - sizeof (char *)]; + unsigned char * req_parmb; /* request parm block 'address' */ + unsigned char padx001[16 - sizeof (char *)]; + unsigned char * req_datab; /* request data block 'address' */ + unsigned char padx002[16 - sizeof (char *)]; + unsigned char * rpl_parmb; /* reply parm block 'address' */ + unsigned char padx003[16 - sizeof (char *)]; + unsigned char * rpl_datab; /* reply data block 'address' */ + unsigned char padx004[16 - sizeof (char *)]; + unsigned char * req_extb; /* request extension block 'addr'*/ + unsigned char padx005[16 - sizeof (char *)]; + unsigned char * rpl_extb; /* reply extension block 'addres'*/ + unsigned short ccp_rtcode; /* server return code */ + unsigned short ccp_rscode; /* server reason code */ + unsigned int mac_data_len; /* Mac Data Length */ + unsigned char logon_id[8]; /* Logon Identifier */ + unsigned char mac_value[8]; /* Mac Value */ + unsigned char mac_content_flgs;/* Mac content flag byte */ + unsigned char pad_002; /* Alignment */ + unsigned short domain; /* Domain */ + unsigned char usage_domain[4];/* Usage domain */ + unsigned char cntrl_domain[4];/* Control domain */ + unsigned char S390enf_mask[4];/* S/390 enforcement mask */ + unsigned char pad_004[36]; /* reserved */ +}; + +/** + * xcRB + */ +struct ica_xcRB { + unsigned short agent_ID; + unsigned int user_defined; + unsigned short request_ID; + unsigned int request_control_blk_length; + unsigned char padding1[16 - sizeof (char *)]; + char __user * request_control_blk_addr; + unsigned int request_data_length; + char padding2[16 - sizeof (char *)]; + char __user * request_data_address; + unsigned int reply_control_blk_length; + char padding3[16 - sizeof (char *)]; + char __user * reply_control_blk_addr; + unsigned int reply_data_length; + char padding4[16 - sizeof (char *)]; + char __user * reply_data_addr; + unsigned short priority_window; + unsigned int status; +} __attribute__((packed)); +#define AUTOSELECT ((unsigned int)0xFFFFFFFF) + #define ZCRYPT_IOCTL_MAGIC 'z' /** @@ -187,6 +264,7 @@ struct ica_rsa_modexpo_crt { */ #define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) #define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) +#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) /* New status calls */ #define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) From 4ba069b802c29eee066385f9826e2d83716626b4 Mon Sep 17 00:00:00 2001 From: Michael Grundy Date: Wed, 20 Sep 2006 15:58:39 +0200 Subject: [PATCH 0228/1063] [S390] add kprobes support. Signed-off-by: Michael Grundy Signed-off-by: David Wilder Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 14 + arch/s390/kernel/Makefile | 1 + arch/s390/kernel/entry.S | 12 + arch/s390/kernel/entry64.S | 12 + arch/s390/kernel/kprobes.c | 657 +++++++++++++++++++++++++++++++++ arch/s390/kernel/traps.c | 31 +- arch/s390/kernel/vmlinux.lds.S | 1 + arch/s390/mm/fault.c | 40 +- include/asm-s390/kdebug.h | 59 +++ include/asm-s390/kprobes.h | 114 ++++++ 10 files changed, 937 insertions(+), 4 deletions(-) create mode 100644 arch/s390/kernel/kprobes.c create mode 100644 include/asm-s390/kdebug.h create mode 100644 include/asm-s390/kprobes.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2f4f70c4dbb2..76122ce1e6cb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -487,8 +487,22 @@ source "drivers/net/Kconfig" source "fs/Kconfig" +menu "Instrumentation Support" + source "arch/s390/oprofile/Kconfig" +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + +endmenu + source "arch/s390/Kconfig.debug" source "security/Kconfig" diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9a33ed6ca696..33a5069c0e16 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ obj-$(CONFIG_VIRT_TIMER) += vtime.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_KPROBES) += kprobes.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5b5799ac8f83..0c712b78a7e8 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -505,6 +505,8 @@ pgm_no_vtime2: mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + tm SP_PSW+1(%r15),0x01 # kernel per event ? + bz BASED(kernel_per) l %r3,__LC_PGM_ILC # load program interruption code la %r8,0x7f nr %r8,%r3 # clear per-event-bit and ilc @@ -536,6 +538,16 @@ pgm_no_vtime3: stosm __SF_EMPTY(%r15),0x03 # reenable interrupts b BASED(sysc_do_svc) +# +# per was called from kernel, must be kprobes +# +kernel_per: + mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check + la %r2,SP_PTREGS(%r15) # address of register-save area + l %r1,BASED(.Lhandle_per) # load adr. of per handler + la %r14,BASED(sysc_leave) # load adr. of system return + br %r1 # branch to do_single_step + /* * IO interrupt handler routine */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 56f5f613b868..8b956d1538f5 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -518,6 +518,8 @@ pgm_no_vtime2: #endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct lg %r1,__TI_task(%r9) + tm SP_PSW+1(%r15),0x01 # kernel per event ? + jz kernel_per mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID @@ -553,6 +555,16 @@ pgm_no_vtime3: stosm __SF_EMPTY(%r15),0x03 # reenable interrupts j sysc_do_svc +# +# per was called from kernel, must be kprobes +# +kernel_per: + lhi %r0,__LC_PGM_OLD_PSW + sth %r0,SP_TRAP(%r15) # set trap indication to pgm check + la %r2,SP_PTREGS(%r15) # address of register-save area + larl %r14,sysc_leave # load adr. of system ret, no work + jg do_single_step # branch to do_single_step + /* * IO interrupt handler routine */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c new file mode 100644 index 000000000000..ca28fb0b3790 --- /dev/null +++ b/arch/s390/kernel/kprobes.c @@ -0,0 +1,657 @@ +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2006 + * + * s390 port, used ppc64 as template. Mike Grundy + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + /* Make sure the probe isn't going on a difficult instruction */ + if (is_prohibited_opcode((kprobe_opcode_t *) p->addr)) + return -EINVAL; + + if ((unsigned long)p->addr & 0x01) { + printk("Attempt to register kprobe at an unaligned address\n"); + return -EINVAL; + } + + /* Use the get_insn_slot() facility for correctness */ + if (!(p->ainsn.insn = get_insn_slot())) + return -ENOMEM; + + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + get_instruction_type(&p->ainsn); + p->opcode = *p->addr; + return 0; +} + +int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction) +{ + switch (*(__u8 *) instruction) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + return -EINVAL; + } + switch (*(__u16 *) instruction) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + return -EINVAL; + } + return 0; +} + +void __kprobes get_instruction_type(struct arch_specific_insn *ainsn) +{ + /* default fixup method */ + ainsn->fixup = FIXUP_PSW_NORMAL; + + /* save r1 operand */ + ainsn->reg = (*ainsn->insn & 0xf0) >> 4; + + /* save the instruction length (pop 5-5) in bytes */ + switch (*(__u8 *) (ainsn->insn) >> 4) { + case 0: + ainsn->ilen = 2; + break; + case 1: + case 2: + ainsn->ilen = 4; + break; + case 3: + ainsn->ilen = 6; + break; + } + + switch (*(__u8 *) ainsn->insn) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + ainsn->fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((*ainsn->insn & 0x0f) == 0) + ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + ainsn->fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + ainsn->fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) { + ainsn->fixup = FIXUP_NOT_REQUIRED; + } + break; + case 0xa7: /* bras */ + if ((*ainsn->insn & 0x0f) == 0x05) { + ainsn->fixup |= FIXUP_RETURN_REGISTER; + } + break; + case 0xc0: + if ((*ainsn->insn & 0x0f) == 0x00 /* larl */ + || (*ainsn->insn & 0x0f) == 0x05) /* brasl */ + ainsn->fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 || /* bxhg */ + *(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */ + ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + } + break; + case 0xe3: /* bctg */ + if (*(((__u8 *) ainsn->insn) + 5) == 0x46) { + ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + } + break; + } +} + +static int __kprobes swap_instruction(void *aref) +{ + struct ins_replace_args *args = aref; + int err = -EFAULT; + + asm volatile( + "0: mvc 0(2,%2),0(%3)\n" + "1: la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) + : "+d" (err), "=m" (*args->ptr) + : "a" (args->ptr), "a" (&args->new), "m" (args->new)); + return err; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long status = kcb->kprobe_status; + struct ins_replace_args args; + + args.ptr = p->addr; + args.old = p->opcode; + args.new = BREAKPOINT_INSTRUCTION; + + kcb->kprobe_status = KPROBE_SWAP_INST; + stop_machine_run(swap_instruction, &args, NR_CPUS); + kcb->kprobe_status = status; +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long status = kcb->kprobe_status; + struct ins_replace_args args; + + args.ptr = p->addr; + args.old = BREAKPOINT_INSTRUCTION; + args.new = p->opcode; + + kcb->kprobe_status = KPROBE_SWAP_INST; + stop_machine_run(swap_instruction, &args, NR_CPUS); + kcb->kprobe_status = status; +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + mutex_lock(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + mutex_unlock(&kprobe_mutex); +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + per_cr_bits kprobe_per_regs[1]; + + memset(kprobe_per_regs, 0, sizeof(per_cr_bits)); + regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE; + + /* Set up the per control reg info, will pass to lctl */ + kprobe_per_regs[0].em_instruction_fetch = 1; + kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn; + kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1; + + /* Set the PER control regs, turns on single step for this address */ + __ctl_load(kprobe_per_regs, 9, 11); + regs->psw.mask |= PSW_MASK_PER; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask; + memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl, + sizeof(kcb->kprobe_saved_ctl)); +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask; + memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl, + sizeof(kcb->kprobe_saved_ctl)); +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = p; + /* Save the interrupt and per flags */ + kcb->kprobe_saved_imask = regs->psw.mask & + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + /* Save the control regs that govern PER */ + __ctl_store(kcb->kprobe_saved_ctl, 9, 11); +} + +/* Called with kretprobe_lock held */ +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; + + /* Replace the return addr with trampoline addr */ + regs->gprs[14] = (unsigned long)&kretprobe_trampoline; + + add_rp_inst(ri); + } else { + rp->nmissed++; + } +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + unsigned long *addr = (unsigned long *) + ((regs->psw.addr & PSW_ADDR_INSN) - 2); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kcb->kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + goto no_kprobe; + } + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*addr != BREAKPOINT_INSTRUCTION) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * + */ + ret = 1; + } + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p, regs, kcb); + if (p->pre_handler && p->pre_handler(p, regs)) + /* handler has already set things up, so skip ss setup */ + return 1; + +ss_probe: + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +void __kprobes kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline: bcr 0,0\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + + spin_lock_irqsave(&kretprobe_lock, flags); + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) { + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + } + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; + + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + regs->psw.addr &= PSW_ADDR_INSN; + + if (p->ainsn.fixup & FIXUP_PSW_NORMAL) + regs->psw.addr = (unsigned long)p->addr + + ((unsigned long)regs->psw.addr - + (unsigned long)p->ainsn.insn); + + if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN) + if ((unsigned long)regs->psw.addr - + (unsigned long)p->ainsn.insn == p->ainsn.ilen) + regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen; + + if (p->ainsn.fixup & FIXUP_RETURN_REGISTER) + regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr + + (regs->gprs[p->ainsn.reg] - + (unsigned long)p->ainsn.insn)) + | PSW_ADDR_AMODE; + + regs->psw.addr |= PSW_ADDR_AMODE; + /* turn off PER mode */ + regs->psw.mask &= ~PSW_MASK_PER; + /* Restore the original per control regs */ + __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + regs->psw.mask |= kcb->kprobe_saved_imask; +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, psw mask + * will have PER set, in which case, continue the remaining processing + * of do_single_step, as if this is not a probe hit. + */ + if (regs->psw.mask & PSW_MASK_PER) { + return 0; + } + + return 1; +} + +static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_SWAP_INST: + /* We are here because the instruction replacement failed */ + return 0; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE; + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (entry) { + regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + return 0; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BPT: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_TRAP: + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && + kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; + + /* r14 is the function return address */ + kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; + /* r15 is the stack pointer */ + kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15]; + addr = (unsigned long)kcb->jprobe_saved_r15; + + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr, + MIN_STACK_SIZE(addr)); + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile(".word 0x0002"); +} + +void __kprobes jprobe_return_end(void) +{ + asm volatile("bcr 0,0"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15); + + /* Put the regs back */ + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* put the stack back */ + memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, + MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) & kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index bde1d1d59858..c4982c963424 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include /* Called from entry.S only */ extern void handle_per_exception(struct pt_regs *regs); @@ -74,6 +76,20 @@ static int kstack_depth_to_print = 12; static int kstack_depth_to_print = 20; #endif /* CONFIG_64BIT */ +ATOMIC_NOTIFIER_HEAD(s390die_chain); + +int register_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&s390die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&s390die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + /* * For show_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown @@ -305,8 +321,9 @@ report_user_fault(long interruption_code, struct pt_regs *regs) #endif } -static void inline do_trap(long interruption_code, int signr, char *str, - struct pt_regs *regs, siginfo_t *info) +static void __kprobes inline do_trap(long interruption_code, int signr, + char *str, struct pt_regs *regs, + siginfo_t *info) { /* * We got all needed information from the lowcore and can @@ -315,6 +332,10 @@ static void inline do_trap(long interruption_code, int signr, char *str, if (regs->psw.mask & PSW_MASK_PSTATE) local_irq_enable(); + if (notify_die(DIE_TRAP, str, regs, interruption_code, + interruption_code, signr) == NOTIFY_STOP) + return; + if (regs->psw.mask & PSW_MASK_PSTATE) { struct task_struct *tsk = current; @@ -336,8 +357,12 @@ static inline void __user *get_check_address(struct pt_regs *regs) return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN); } -void do_single_step(struct pt_regs *regs) +void __kprobes do_single_step(struct pt_regs *regs) { + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, + SIGTRAP) == NOTIFY_STOP){ + return; + } if ((current->ptrace & PT_PTRACED) != 0) force_sig(SIGTRAP, current); } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index ff5f7bb34f75..df0c16ab8e92 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -24,6 +24,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x0700 diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7cd82575813d..44f0cda7e72e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -25,10 +25,12 @@ #include #include #include +#include #include #include #include +#include #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 @@ -48,6 +50,38 @@ extern int sysctl_userprocess_debug; extern void die(const char *,struct pt_regs *,long); +#ifdef CONFIG_KPROBES +ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +int register_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); +} + +int unregister_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); +} + +static inline int notify_page_fault(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); +} +#else +static inline int notify_page_fault(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + return NOTIFY_DONE; +} +#endif + extern spinlock_t timerlist_lock; /* @@ -159,7 +193,7 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline void +static inline void __kprobes do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) { struct task_struct *tsk; @@ -173,6 +207,10 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) tsk = current; mm = tsk->mm; + if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code diff --git a/include/asm-s390/kdebug.h b/include/asm-s390/kdebug.h new file mode 100644 index 000000000000..40cc68025e01 --- /dev/null +++ b/include/asm-s390/kdebug.h @@ -0,0 +1,59 @@ +#ifndef _S390_KDEBUG_H +#define _S390_KDEBUG_H + +/* + * Feb 2006 Ported to s390 + */ +#include + +struct pt_regs; + +struct die_args { + struct pt_regs *regs; + const char *str; + long err; + int trapnr; + int signr; +}; + +/* Note - you should never unregister because that can race with NMIs. + * If you really want to do it first unregister - then synchronize_sched + * - then free. + */ +extern int register_die_notifier(struct notifier_block *); +extern int unregister_die_notifier(struct notifier_block *); +extern int register_page_fault_notifier(struct notifier_block *); +extern int unregister_page_fault_notifier(struct notifier_block *); +extern struct atomic_notifier_head s390die_chain; + + +enum die_val { + DIE_OOPS = 1, + DIE_BPT, + DIE_SSTEP, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_NMIWATCHDOG, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_NMI_IPI, + DIE_PAGE_FAULT, +}; + +static inline int notify_die(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig + }; + return atomic_notifier_call_chain(&s390die_chain, val, &args); +} + +#endif diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h new file mode 100644 index 000000000000..b847ff0ec3fa --- /dev/null +++ b/include/asm-s390/kprobes.h @@ -0,0 +1,114 @@ +#ifndef _ASM_S390_KPROBES_H +#define _ASM_S390_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2006 + * + * 2002-Oct Created by Vamsi Krishna S Kernel + * Probes initial implementation ( includes suggestions from + * Rusty Russell). + * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli + * + * 2005-Dec Used as a template for s390 by Mike Grundy + * + */ +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT +struct pt_regs; +struct kprobe; + +typedef u16 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0x0002 + +/* Maximum instruction size is 3 (16bit) halfwords: */ +#define MAX_INSN_SIZE 0x0003 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) + +#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)(pentry) + +#define ARCH_SUPPORTS_KRETPROBES +#define ARCH_INACTIVE_KPROBE_COUNT 0 + +#define KPROBE_SWAP_INST 0x10 + +#define FIXUP_PSW_NORMAL 0x08 +#define FIXUP_BRANCH_NOT_TAKEN 0x04 +#define FIXUP_RETURN_REGISTER 0x02 +#define FIXUP_NOT_REQUIRED 0x01 + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + /* copy of original instruction */ + kprobe_opcode_t *insn; + int fixup; + int ilen; + int reg; +}; + +struct ins_replace_args { + kprobe_opcode_t *ptr; + kprobe_opcode_t old; + kprobe_opcode_t new; +}; +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long saved_psw; + unsigned long kprobe_saved_imask; + unsigned long kprobe_saved_ctl[3]; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_imask; + unsigned long kprobe_saved_ctl[3]; + struct pt_regs jprobe_saved_regs; + unsigned long jprobe_saved_r14; + unsigned long jprobe_saved_r15; + struct prev_kprobe prev_kprobe; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); +int is_prohibited_opcode(kprobe_opcode_t *instruction); +void get_instruction_type(struct arch_specific_insn *ainsn); + +#define flush_insn_slot(p) do { } while (0) + +#endif /* _ASM_S390_KPROBES_H */ + +#ifdef CONFIG_KPROBES + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#else /* !CONFIG_KPROBES */ +static inline int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif From 65912a84c0f33304fa5ea004c7b6ee58d5f5572e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:58:41 +0200 Subject: [PATCH 0229/1063] [S390] initrd vs. bootmem bitmap. Move initrd if the bitmap of the bootmem allocator would overwrite it. In addition this patch sets the default size and address of the initrd to 0. Therefore all boot loaders must set the initrd size and address correctly. This is especially relevant for ftp boot via HMC/SE, where this change requires a special patch file entry in the .ins file which sets these two values contained at address 0x10408 and 0x10410. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head.S | 10 ++++++---- arch/s390/kernel/head31.S | 4 ++-- arch/s390/kernel/head64.S | 4 ++-- arch/s390/kernel/setup.c | 41 ++++++++++++++++++++++++++++++++++++--- include/asm-s390/setup.h | 2 -- 5 files changed, 48 insertions(+), 13 deletions(-) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index adad8863ee2f..a6e9bdb53591 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -272,7 +272,7 @@ iplstart: # load parameter file from ipl device # .Lagain1: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # ramdisk loc. is temp + l %r2,.Linitrd # ramdisk loc. is temp bas %r14,.Lloader # load parameter file ltr %r2,%r2 # got anything ? bz .Lnopf @@ -280,7 +280,7 @@ iplstart: bnh .Lnotrunc la %r2,895 .Lnotrunc: - l %r4,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + l %r4,.Linitrd clc 0(3,%r4),.L_hdr # if it is HDRx bz .Lagain1 # skip dataset header clc 0(3,%r4),.L_eof # if it is EOFx @@ -323,14 +323,15 @@ iplstart: # load ramdisk from ipl device # .Lagain2: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # addr of ramdisk + l %r2,.Linitrd # addr of ramdisk + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) bas %r14,.Lloader # load ramdisk st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of ramdisk ltr %r2,%r2 bnz .Lrdcont st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found .Lrdcont: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + l %r2,.Linitrd clc 0(3,%r2),.L_hdr # skip HDRx and EOFx bz .Lagain2 @@ -379,6 +380,7 @@ iplstart: l %r1,.Lstartup br %r1 +.Linitrd:.long _end + 0x400000 # default address of initrd .Lparm: .long PARMAREA .Lstartup: .long startup .Lcvtab:.long _ebcasc # ebcdic to ascii table diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index a4dc61f3285e..0e46077d7140 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -26,8 +26,8 @@ startup:basr %r13,0 # get base # .org PARMAREA .long 0,0 # IPL_DEVICE - .long 0,RAMDISK_ORIGIN # INITRD_START - .long 0,RAMDISK_SIZE # INITRD_SIZE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 9d80c5b1ef95..3e0341acd04e 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,8 +26,8 @@ startup:basr %r13,0 # get base # .org PARMAREA .quad 0 # IPL_DEVICE - .quad RAMDISK_ORIGIN # INITRD_START - .quad RAMDISK_SIZE # INITRD_SIZE + .quad 0 # INITRD_START + .quad 0 # INITRD_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c902f059c7aa..89051e8a5d8d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -501,13 +502,47 @@ setup_memory(void) * partially used pages are not usable - thus * we are rounding upwards: */ - start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT; - end_pfn = max_pfn = memory_end >> PAGE_SHIFT; + start_pfn = PFN_UP(__pa(&_end)); + end_pfn = max_pfn = PFN_DOWN(memory_end); /* Initialize storage key for kernel pages */ for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++) page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); +#ifdef CONFIG_BLK_DEV_INITRD + /* + * Move the initrd in case the bitmap of the bootmem allocater + * would overwrite it. + */ + + if (INITRD_START && INITRD_SIZE) { + unsigned long bmap_size; + unsigned long start; + + bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); + bmap_size = PFN_PHYS(bmap_size); + + if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { + start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; + + if (start + INITRD_SIZE > memory_end) { + printk("initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\n" + "disabling initrd\n", + start + INITRD_SIZE, memory_end); + INITRD_START = INITRD_SIZE = 0; + } else { + printk("Moving initrd (0x%08lx -> 0x%08lx, " + "size: %ld)\n", + INITRD_START, start, INITRD_SIZE); + memmove((void *) start, (void *) INITRD_START, + INITRD_SIZE); + INITRD_START = start; + } + } + } +#endif + /* * Initialize the boot-time allocator (with low memory only): */ @@ -559,7 +594,7 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START) { + if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { reserve_bootmem(INITRD_START, INITRD_SIZE); initrd_start = INITRD_START; diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 19e31979309a..02c96d57f0cf 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -14,8 +14,6 @@ #define PARMAREA 0x10400 #define COMMAND_LINE_SIZE 896 -#define RAMDISK_ORIGIN 0x800000 -#define RAMDISK_SIZE 0x800000 #define MEMORY_CHUNKS 16 /* max 0x7fff */ #define IPL_PARMBLOCK_ORIGIN 0x2000 From f19bfb2c9b8675590fbecb43e5ce3b34ee321185 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 20 Sep 2006 15:58:44 +0200 Subject: [PATCH 0230/1063] [S390] hypfs comment cleanup. Correct some comments in the hypervisor filesystem. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs.h | 2 +- arch/s390/hypfs/hypfs_diag.c | 2 +- arch/s390/hypfs/hypfs_diag.h | 2 +- arch/s390/hypfs/inode.c | 2 +- drivers/base/hypervisor.c | 3 ++- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index ea5567be00fc..f3dbd91965c6 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -1,5 +1,5 @@ /* - * fs/hypfs/hypfs.h + * arch/s390/hypfs/hypfs.h * Hypervisor filesystem for Linux on s390. * * Copyright (C) IBM Corp. 2006 diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 1785bce2b919..874d761c9810 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -1,5 +1,5 @@ /* - * fs/hypfs/hypfs_diag.c + * arch/s390/hypfs/hypfs_diag.c * Hypervisor filesystem for Linux on s390. Diag 204 and 224 * implementation. * diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h index 793dea6b9bb6..256b384aebe1 100644 --- a/arch/s390/hypfs/hypfs_diag.h +++ b/arch/s390/hypfs/hypfs_diag.h @@ -1,5 +1,5 @@ /* - * fs/hypfs/hypfs_diag.h + * arch/s390/hypfs_diag.h * Hypervisor filesystem for Linux on s390. * * Copyright (C) IBM Corp. 2006 diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 18c091925ea5..bdcad2ea1ff4 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,5 +1,5 @@ /* - * fs/hypfs/inode.c + * arch/s390/hypfs/inode.c * Hypervisor filesystem for Linux on s390. * * Copyright (C) IBM Corp. 2006 diff --git a/drivers/base/hypervisor.c b/drivers/base/hypervisor.c index 0c85e9d6a448..7080b413ddc9 100644 --- a/drivers/base/hypervisor.c +++ b/drivers/base/hypervisor.c @@ -1,8 +1,9 @@ /* * hypervisor.c - /sys/hypervisor subsystem. * - * This file is released under the GPLv2 + * Copyright (C) IBM Corp. 2006 * + * This file is released under the GPLv2 */ #include From 331c982d4a6b43cdc0d056956a1cae8a7d6237bf Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 20 Sep 2006 15:58:47 +0200 Subject: [PATCH 0231/1063] [S390] hypfs compiler warnings. Add casts to avoid compiler warnings. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 874d761c9810..fee5aee605f6 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -432,12 +432,14 @@ static int diag204_probe(void) buf = diag204_get_buffer(INFO_EXT, &pages); if (!IS_ERR(buf)) { - if (diag204(SUBC_STIB7 | INFO_EXT, pages, buf) >= 0) { + if (diag204((unsigned long)SUBC_STIB7 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { diag204_store_sc = SUBC_STIB7; diag204_info_type = INFO_EXT; goto out; } - if (diag204(SUBC_STIB6 | INFO_EXT, pages, buf) >= 0) { + if (diag204((unsigned long)SUBC_STIB6 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { diag204_store_sc = SUBC_STIB7; diag204_info_type = INFO_EXT; goto out; @@ -452,7 +454,8 @@ static int diag204_probe(void) rc = PTR_ERR(buf); goto fail_alloc; } - if (diag204(SUBC_STIB4 | INFO_SIMPLE, pages, buf) >= 0) { + if (diag204((unsigned long)SUBC_STIB4 | + (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { diag204_store_sc = SUBC_STIB4; diag204_info_type = INFO_SIMPLE; goto out; @@ -476,7 +479,8 @@ static void *diag204_store(void) buf = diag204_get_buffer(diag204_info_type, &pages); if (IS_ERR(buf)) goto out; - if (diag204(diag204_store_sc | diag204_info_type, pages, buf) < 0) + if (diag204((unsigned long)diag204_store_sc | + (unsigned long)diag204_info_type, pages, buf) < 0) return ERR_PTR(-ENOSYS); out: return buf; From ff6b8ea68f4b7353f88b97024f28127e2148aa00 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 20 Sep 2006 15:58:49 +0200 Subject: [PATCH 0232/1063] [S390] ipl/dump on panic. It is now possible to specify a ccw/fcp dump device which is used to automatically create a system dump in case of a kernel panic. The dump device can be configured under /sys/firmware/dump. In addition it is now possible to specify a ccw/fcp device which is used for the next reboot of Linux. The reipl device can be configured under /sys/firmware/reipl. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/head31.S | 3 + arch/s390/kernel/head64.S | 3 + arch/s390/kernel/ipl.c | 942 ++++++++++++++++++++++++++++++++++ arch/s390/kernel/reipl.S | 33 +- arch/s390/kernel/reipl64.S | 34 +- arch/s390/kernel/reipl_diag.c | 39 -- arch/s390/kernel/setup.c | 220 +------- arch/s390/kernel/smp.c | 10 +- drivers/s390/cio/cio.c | 54 +- include/asm-s390/cio.h | 7 + include/asm-s390/lowcore.h | 13 +- include/asm-s390/setup.h | 58 ++- 13 files changed, 1103 insertions(+), 315 deletions(-) create mode 100644 arch/s390/kernel/ipl.c delete mode 100644 arch/s390/kernel/reipl_diag.c diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 33a5069c0e16..aa978978d3d1 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -6,7 +6,7 @@ EXTRA_AFLAGS := -traditional obj-y := bitmap.o traps.o time.o process.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o + semaphore.o s390_ext.o debug.o profile.o irq.o ipl.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 0e46077d7140..d8bb68a72527 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -38,6 +38,7 @@ startup:basr %r13,0 # get base startup_continue: basr %r13,0 # get base .LPG1: GET_IPL_DEVICE + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore @@ -274,6 +275,8 @@ startup_continue: .Lparmaddr: .long PARMAREA .Lsccbaddr: .long .Lsccb .org 0x12000 +.globl s390_readinfo_sccb +s390_readinfo_sccb: .Lsccb: .hword 0x1000 # length, one page .byte 0x00,0x00,0x00 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3e0341acd04e..c2005101fee1 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -41,6 +41,7 @@ startup_continue: srl %r13,1 GET_IPL_DEVICE lhi %r1,1 # mode 1 = esame + mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero sigp %r1,%r0,0x12 # switch to esame mode sam64 # switch to 64 bit mode @@ -269,6 +270,8 @@ startup_continue: .quad PARMAREA .org 0x12000 +.globl s390_readinfo_sccb +s390_readinfo_sccb: .Lsccb: .hword 0x1000 # length, one page .byte 0x00,0x00,0x00 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c new file mode 100644 index 000000000000..105ee15a2b31 --- /dev/null +++ b/arch/s390/kernel/ipl.c @@ -0,0 +1,942 @@ +/* + * arch/s390/kernel/ipl.c + * ipl/reipl/dump support for Linux on s390. + * + * Copyright (C) IBM Corp. 2005,2006 + * Author(s): Michael Holzheu + * Heiko Carstens + * Volker Sameske + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPL_PARM_BLOCK_VERSION 0 + +enum ipl_type { + IPL_TYPE_NONE = 1, + IPL_TYPE_UNKNOWN = 2, + IPL_TYPE_CCW = 4, + IPL_TYPE_FCP = 8, +}; + +#define IPL_NONE_STR "none" +#define IPL_UNKNOWN_STR "unknown" +#define IPL_CCW_STR "ccw" +#define IPL_FCP_STR "fcp" + +static char *ipl_type_str(enum ipl_type type) +{ + switch (type) { + case IPL_TYPE_NONE: + return IPL_NONE_STR; + case IPL_TYPE_CCW: + return IPL_CCW_STR; + case IPL_TYPE_FCP: + return IPL_FCP_STR; + case IPL_TYPE_UNKNOWN: + default: + return IPL_UNKNOWN_STR; + } +} + +enum ipl_method { + IPL_METHOD_NONE, + IPL_METHOD_CCW_CIO, + IPL_METHOD_CCW_DIAG, + IPL_METHOD_CCW_VM, + IPL_METHOD_FCP_RO_DIAG, + IPL_METHOD_FCP_RW_DIAG, + IPL_METHOD_FCP_RO_VM, +}; + +enum shutdown_action { + SHUTDOWN_REIPL, + SHUTDOWN_DUMP, + SHUTDOWN_STOP, +}; + +#define SHUTDOWN_REIPL_STR "reipl" +#define SHUTDOWN_DUMP_STR "dump" +#define SHUTDOWN_STOP_STR "stop" + +static char *shutdown_action_str(enum shutdown_action action) +{ + switch (action) { + case SHUTDOWN_REIPL: + return SHUTDOWN_REIPL_STR; + case SHUTDOWN_DUMP: + return SHUTDOWN_DUMP_STR; + case SHUTDOWN_STOP: + return SHUTDOWN_STOP_STR; + default: + BUG(); + } +} + +enum diag308_subcode { + DIAG308_IPL = 3, + DIAG308_DUMP = 4, + DIAG308_SET = 5, + DIAG308_STORE = 6, +}; + +enum diag308_ipl_type { + DIAG308_IPL_TYPE_FCP = 0, + DIAG308_IPL_TYPE_CCW = 2, +}; + +enum diag308_opt { + DIAG308_IPL_OPT_IPL = 0x10, + DIAG308_IPL_OPT_DUMP = 0x20, +}; + +enum diag308_rc { + DIAG308_RC_OK = 1, +}; + +static int diag308_set_works = 0; + +static int reipl_capabilities = IPL_TYPE_UNKNOWN; +static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; +static enum ipl_method reipl_method = IPL_METHOD_NONE; +static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_ccw; + +static int dump_capabilities = IPL_TYPE_NONE; +static enum ipl_type dump_type = IPL_TYPE_NONE; +static enum ipl_method dump_method = IPL_METHOD_NONE; +static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_ccw; + +static enum shutdown_action on_panic_action = SHUTDOWN_STOP; + +static int diag308(unsigned long subcode, void *addr) +{ + register unsigned long _addr asm("0") = (unsigned long)addr; + register unsigned long _rc asm("1") = 0; + + asm volatile ( + " diag %0,%2,0x308\n" + "0: \n" + ".section __ex_table,\"a\"\n" +#ifdef CONFIG_64BIT + " .align 8\n" + " .quad 0b, 0b\n" +#else + " .align 4\n" + " .long 0b, 0b\n" +#endif + ".previous\n" + : "+d" (_addr), "+d" (_rc) + : "d" (subcode) : "cc", "memory" ); + + return _rc; +} + +/* SYSFS */ + +#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ + char *page) \ +{ \ + return sprintf(page, _format, _value); \ +} \ +static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL); + +#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, \ + (unsigned long long) _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + _value = value; \ + return len; \ +} \ +static struct subsys_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + +static void make_attrs_ro(struct attribute **attrs) +{ + while (*attrs) { + (*attrs)->mode = S_IRUGO; + attrs++; + } +} + +/* + * ipl section + */ + +static enum ipl_type ipl_get_type(void) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + if (!IPL_DEVNO_VALID) + return IPL_TYPE_UNKNOWN; + if (!IPL_PARMBLOCK_VALID) + return IPL_TYPE_CCW; + if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) + return IPL_TYPE_UNKNOWN; + if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP) + return IPL_TYPE_UNKNOWN; + return IPL_TYPE_FCP; +} + +static ssize_t ipl_type_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(ipl_get_type())); +} + +static struct subsys_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); + +static ssize_t sys_ipl_device_show(struct subsystem *subsys, char *page) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + switch (ipl_get_type()) { + case IPL_TYPE_CCW: + return sprintf(page, "0.0.%04x\n", ipl_devno); + case IPL_TYPE_FCP: + return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno); + default: + return 0; + } +} + +static struct subsys_attribute sys_ipl_device_attr = + __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); + +static ssize_t ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + unsigned int size = IPL_PARMBLOCK_SIZE; + + if (off > size) + return 0; + if (off + count > size) + count = size - off; + memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count); + return count; +} + +static struct bin_attribute ipl_parameter_attr = { + .attr = { + .name = "binary_parameter", + .mode = S_IRUGO, + .owner = THIS_MODULE, + }, + .size = PAGE_SIZE, + .read = &ipl_parameter_read, +}; + +static ssize_t ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off, + size_t count) +{ + unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; + void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; + + if (off > size) + return 0; + if (off + count > size) + count = size - off; + memcpy(buf, scp_data + off, count); + return count; +} + +static struct bin_attribute ipl_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO, + .owner = THIS_MODULE, + }, + .size = PAGE_SIZE, + .read = &ipl_scp_data_read, +}; + +/* FCP ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.br_lba); + +static struct attribute *ipl_fcp_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_fcp_wwpn_attr.attr, + &sys_ipl_fcp_lun_attr.attr, + &sys_ipl_fcp_bootprog_attr.attr, + &sys_ipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group ipl_fcp_attr_group = { + .attrs = ipl_fcp_attrs, +}; + +/* CCW ipl device attributes */ + +static struct attribute *ipl_ccw_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + NULL, +}; + +static struct attribute_group ipl_ccw_attr_group = { + .attrs = ipl_ccw_attrs, +}; + +/* UNKNOWN ipl device attributes */ + +static struct attribute *ipl_unknown_attrs[] = { + &sys_ipl_type_attr.attr, + NULL, +}; + +static struct attribute_group ipl_unknown_attr_group = { + .attrs = ipl_unknown_attrs, +}; + +static decl_subsys(ipl, NULL, NULL); + +/* + * reipl section + */ + +/* FCP reipl device attributes */ + +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", + reipl_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", + reipl_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_fcp->ipl_info.fcp.devno); + +static struct attribute *reipl_fcp_attrs[] = { + &sys_reipl_fcp_device_attr.attr, + &sys_reipl_fcp_wwpn_attr.attr, + &sys_reipl_fcp_lun_attr.attr, + &sys_reipl_fcp_bootprog_attr.attr, + &sys_reipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group reipl_fcp_attr_group = { + .name = IPL_FCP_STR, + .attrs = reipl_fcp_attrs, +}; + +/* CCW reipl device attributes */ + +DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_ccw->ipl_info.ccw.devno); + +static struct attribute *reipl_ccw_attrs[] = { + &sys_reipl_ccw_device_attr.attr, + NULL, +}; + +static struct attribute_group reipl_ccw_attr_group = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs, +}; + +/* reipl type */ + +static int reipl_set_type(enum ipl_type type) +{ + if (!(reipl_capabilities & type)) + return -EINVAL; + + switch(type) { + case IPL_TYPE_CCW: + if (MACHINE_IS_VM) + reipl_method = IPL_METHOD_CCW_VM; + else + reipl_method = IPL_METHOD_CCW_CIO; + break; + case IPL_TYPE_FCP: + if (diag308_set_works) + reipl_method = IPL_METHOD_FCP_RW_DIAG; + else if (MACHINE_IS_VM) + reipl_method = IPL_METHOD_FCP_RO_VM; + else + reipl_method = IPL_METHOD_FCP_RO_DIAG; + break; + default: + reipl_method = IPL_METHOD_NONE; + } + reipl_type = type; + return 0; +} + +static ssize_t reipl_type_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(reipl_type)); +} + +static ssize_t reipl_type_store(struct subsystem *subsys, const char *buf, + size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_FCP); + return (rc != 0) ? rc : len; +} + +static struct subsys_attribute reipl_type_attr = + __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); + +static decl_subsys(reipl, NULL, NULL); + +/* + * dump section + */ + +/* FCP dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", + dump_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", + dump_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_fcp->ipl_info.fcp.devno); + +static struct attribute *dump_fcp_attrs[] = { + &sys_dump_fcp_device_attr.attr, + &sys_dump_fcp_wwpn_attr.attr, + &sys_dump_fcp_lun_attr.attr, + &sys_dump_fcp_bootprog_attr.attr, + &sys_dump_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group dump_fcp_attr_group = { + .name = IPL_FCP_STR, + .attrs = dump_fcp_attrs, +}; + +/* CCW dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_ccw->ipl_info.ccw.devno); + +static struct attribute *dump_ccw_attrs[] = { + &sys_dump_ccw_device_attr.attr, + NULL, +}; + +static struct attribute_group dump_ccw_attr_group = { + .name = IPL_CCW_STR, + .attrs = dump_ccw_attrs, +}; + +/* dump type */ + +static int dump_set_type(enum ipl_type type) +{ + if (!(dump_capabilities & type)) + return -EINVAL; + switch(type) { + case IPL_TYPE_CCW: + if (MACHINE_IS_VM) + dump_method = IPL_METHOD_CCW_VM; + else + dump_method = IPL_METHOD_CCW_CIO; + break; + case IPL_TYPE_FCP: + dump_method = IPL_METHOD_FCP_RW_DIAG; + break; + default: + dump_method = IPL_METHOD_NONE; + } + dump_type = type; + return 0; +} + +static ssize_t dump_type_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(dump_type)); +} + +static ssize_t dump_type_store(struct subsystem *subsys, const char *buf, + size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, IPL_NONE_STR, strlen(IPL_NONE_STR)) == 0) + rc = dump_set_type(IPL_TYPE_NONE); + else if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) + rc = dump_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) + rc = dump_set_type(IPL_TYPE_FCP); + return (rc != 0) ? rc : len; +} + +static struct subsys_attribute dump_type_attr = + __ATTR(dump_type, 0644, dump_type_show, dump_type_store); + +static decl_subsys(dump, NULL, NULL); + +#ifdef CONFIG_SMP +static void dump_smp_stop_all(void) +{ + int cpu; + preempt_disable(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + while (signal_processor(cpu, sigp_stop) == sigp_busy) + udelay(10); + } + preempt_enable(); +} +#else +#define dump_smp_stop_all() do { } while (0) +#endif + +/* + * Shutdown actions section + */ + +static decl_subsys(shutdown_actions, NULL, NULL); + +/* on panic */ + +static ssize_t on_panic_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", shutdown_action_str(on_panic_action)); +} + +static ssize_t on_panic_store(struct subsystem *subsys, const char *buf, + size_t len) +{ + if (strncmp(buf, SHUTDOWN_REIPL_STR, strlen(SHUTDOWN_REIPL_STR)) == 0) + on_panic_action = SHUTDOWN_REIPL; + else if (strncmp(buf, SHUTDOWN_DUMP_STR, + strlen(SHUTDOWN_DUMP_STR)) == 0) + on_panic_action = SHUTDOWN_DUMP; + else if (strncmp(buf, SHUTDOWN_STOP_STR, + strlen(SHUTDOWN_STOP_STR)) == 0) + on_panic_action = SHUTDOWN_STOP; + else + return -EINVAL; + + return len; +} + +static struct subsys_attribute on_panic_attr = + __ATTR(on_panic, 0644, on_panic_show, on_panic_store); + +static void print_fcp_block(struct ipl_parameter_block *fcp_block) +{ + printk(KERN_EMERG "wwpn: %016llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.wwpn); + printk(KERN_EMERG "lun: %016llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.lun); + printk(KERN_EMERG "bootprog: %lld\n", + (unsigned long long)fcp_block->ipl_info.fcp.bootprog); + printk(KERN_EMERG "br_lba: %lld\n", + (unsigned long long)fcp_block->ipl_info.fcp.br_lba); + printk(KERN_EMERG "device: %llx\n", + (unsigned long long)fcp_block->ipl_info.fcp.devno); + printk(KERN_EMERG "opt: %x\n", fcp_block->ipl_info.fcp.opt); +} + +void do_reipl(void) +{ + struct ccw_dev_id devid; + static char buf[100]; + + switch (reipl_type) { + case IPL_TYPE_CCW: + printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n", + reipl_block_ccw->ipl_info.ccw.devno); + break; + case IPL_TYPE_FCP: + printk(KERN_EMERG "reboot on fcp device:\n"); + print_fcp_block(reipl_block_fcp); + break; + default: + break; + } + + switch (reipl_method) { + case IPL_METHOD_CCW_CIO: + devid.devno = reipl_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case IPL_METHOD_CCW_VM: + sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno); + cpcmd(buf, NULL, 0, NULL); + break; + case IPL_METHOD_CCW_DIAG: + diag308(DIAG308_SET, reipl_block_ccw); + diag308(DIAG308_IPL, NULL); + break; + case IPL_METHOD_FCP_RW_DIAG: + diag308(DIAG308_SET, reipl_block_fcp); + diag308(DIAG308_IPL, NULL); + break; + case IPL_METHOD_FCP_RO_DIAG: + diag308(DIAG308_IPL, NULL); + break; + case IPL_METHOD_FCP_RO_VM: + cpcmd("IPL", NULL, 0, NULL); + break; + case IPL_METHOD_NONE: + default: + if (MACHINE_IS_VM) + cpcmd("IPL", NULL, 0, NULL); + diag308(DIAG308_IPL, NULL); + break; + } + panic("reipl failed!\n"); +} + +static void do_dump(void) +{ + struct ccw_dev_id devid; + static char buf[100]; + + switch (dump_type) { + case IPL_TYPE_CCW: + printk(KERN_EMERG "Automatic dump on ccw device: 0.0.%04x\n", + dump_block_ccw->ipl_info.ccw.devno); + break; + case IPL_TYPE_FCP: + printk(KERN_EMERG "Automatic dump on fcp device:\n"); + print_fcp_block(dump_block_fcp); + break; + default: + return; + } + + switch (dump_method) { + case IPL_METHOD_CCW_CIO: + dump_smp_stop_all(); + devid.devno = dump_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case IPL_METHOD_CCW_VM: + dump_smp_stop_all(); + sprintf(buf, "STORE STATUS"); + cpcmd(buf, NULL, 0, NULL); + sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); + cpcmd(buf, NULL, 0, NULL); + break; + case IPL_METHOD_CCW_DIAG: + diag308(DIAG308_SET, dump_block_ccw); + diag308(DIAG308_DUMP, NULL); + break; + case IPL_METHOD_FCP_RW_DIAG: + diag308(DIAG308_SET, dump_block_fcp); + diag308(DIAG308_DUMP, NULL); + break; + case IPL_METHOD_NONE: + default: + return; + } + printk(KERN_EMERG "Dump failed!\n"); +} + +/* init functions */ + +static int __init ipl_register_fcp_files(void) +{ + int rc; + + rc = sysfs_create_group(&ipl_subsys.kset.kobj, + &ipl_fcp_attr_group); + if (rc) + goto out; + rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj, + &ipl_parameter_attr); + if (rc) + goto out_ipl_parm; + rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj, + &ipl_scp_data_attr); + if (!rc) + goto out; + + sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr); + +out_ipl_parm: + sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group); +out: + return rc; +} + +static int __init ipl_init(void) +{ + int rc; + + rc = firmware_register(&ipl_subsys); + if (rc) + return rc; + switch (ipl_get_type()) { + case IPL_TYPE_CCW: + rc = sysfs_create_group(&ipl_subsys.kset.kobj, + &ipl_ccw_attr_group); + break; + case IPL_TYPE_FCP: + rc = ipl_register_fcp_files(); + break; + default: + rc = sysfs_create_group(&ipl_subsys.kset.kobj, + &ipl_unknown_attr_group); + break; + } + if (rc) + firmware_unregister(&ipl_subsys); + return rc; +} + +static void __init reipl_probe(void) +{ + void *buffer; + + buffer = (void *) get_zeroed_page(GFP_KERNEL); + if (!buffer) + return; + if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK) + diag308_set_works = 1; + free_page((unsigned long)buffer); +} + +static int __init reipl_ccw_init(void) +{ + int rc; + + reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_ccw) + return -ENOMEM; + rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_ccw_attr_group); + if (rc) { + free_page((unsigned long)reipl_block_ccw); + return rc; + } + reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; + reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw); + reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + if (ipl_get_type() == IPL_TYPE_CCW) + reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; + reipl_capabilities |= IPL_TYPE_CCW; + return 0; +} + +static int __init reipl_fcp_init(void) +{ + int rc; + + if ((!diag308_set_works) && (ipl_get_type() != IPL_TYPE_FCP)) + return 0; + if ((!diag308_set_works) && (ipl_get_type() == IPL_TYPE_FCP)) + make_attrs_ro(reipl_fcp_attrs); + + reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_fcp) + return -ENOMEM; + rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_fcp_attr_group); + if (rc) { + free_page((unsigned long)reipl_block_fcp); + return rc; + } + if (ipl_get_type() == IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); + } else { + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_fcp->hdr.blk0_len = + sizeof(reipl_block_fcp->ipl_info.fcp); + reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_FCP; + return 0; +} + +static int __init reipl_init(void) +{ + int rc; + + rc = firmware_register(&reipl_subsys); + if (rc) + return rc; + rc = subsys_create_file(&reipl_subsys, &reipl_type_attr); + if (rc) { + firmware_unregister(&reipl_subsys); + return rc; + } + rc = reipl_ccw_init(); + if (rc) + return rc; + rc = reipl_fcp_init(); + if (rc) + return rc; + rc = reipl_set_type(ipl_get_type()); + if (rc) + return rc; + return 0; +} + +static int __init dump_ccw_init(void) +{ + int rc; + + dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_ccw) + return -ENOMEM; + rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_ccw_attr_group); + if (rc) { + free_page((unsigned long)dump_block_ccw); + return rc; + } + dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; + dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw); + dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + dump_capabilities |= IPL_TYPE_CCW; + return 0; +} + +extern char s390_readinfo_sccb[]; + +static int __init dump_fcp_init(void) +{ + int rc; + + if(!(s390_readinfo_sccb[91] & 0x2)) + return 0; /* LDIPL DUMP is not installed */ + if (!diag308_set_works) + return 0; + dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_fcp) + return -ENOMEM; + rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_fcp_attr_group); + if (rc) { + free_page((unsigned long)dump_block_fcp); + return rc; + } + dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_fcp->hdr.blk0_len = sizeof(dump_block_fcp->ipl_info.fcp); + dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP; + dump_capabilities |= IPL_TYPE_FCP; + return 0; +} + +#define SHUTDOWN_ON_PANIC_PRIO 0 + +static int shutdown_on_panic_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + if (on_panic_action == SHUTDOWN_DUMP) + do_dump(); + else if (on_panic_action == SHUTDOWN_REIPL) + do_reipl(); + return NOTIFY_OK; +} + +static struct notifier_block shutdown_on_panic_nb = { + .notifier_call = shutdown_on_panic_notify, + .priority = SHUTDOWN_ON_PANIC_PRIO +}; + +static int __init dump_init(void) +{ + int rc; + + rc = firmware_register(&dump_subsys); + if (rc) + return rc; + rc = subsys_create_file(&dump_subsys, &dump_type_attr); + if (rc) { + firmware_unregister(&dump_subsys); + return rc; + } + rc = dump_ccw_init(); + if (rc) + return rc; + rc = dump_fcp_init(); + if (rc) + return rc; + dump_set_type(IPL_TYPE_NONE); + return 0; +} + +static int __init shutdown_actions_init(void) +{ + int rc; + + rc = firmware_register(&shutdown_actions_subsys); + if (rc) + return rc; + rc = subsys_create_file(&shutdown_actions_subsys, &on_panic_attr); + if (rc) { + firmware_unregister(&shutdown_actions_subsys); + return rc; + } + atomic_notifier_chain_register(&panic_notifier_list, + &shutdown_on_panic_nb); + return 0; +} + +static int __init s390_ipl_init(void) +{ + int rc; + + reipl_probe(); + rc = ipl_init(); + if (rc) + return rc; + rc = reipl_init(); + if (rc) + return rc; + rc = dump_init(); + if (rc) + return rc; + rc = shutdown_actions_init(); + if (rc) + return rc; + return 0; +} + +__initcall(s390_ipl_init); diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 658e5ac484f9..4562cdbce8eb 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -8,13 +8,30 @@ #include - .globl do_reipl -do_reipl: basr %r13,0 + .globl do_reipl_asm +do_reipl_asm: basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) -.Lpg1: lctl %c6,%c6,.Lall-.Lpg0(%r13) - stctl %c0,%c0,.Lctlsave-.Lpg0(%r13) - ni .Lctlsave-.Lpg0(%r13),0xef - lctl %c0,%c0,.Lctlsave-.Lpg0(%r13) + + # switch off lowcore protection + +.Lpg1: stctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) + stctl %c0,%c0,.Lctlsave2-.Lpg0(%r13) + ni .Lctlsave1-.Lpg0(%r13),0xef + lctl %c0,%c0,.Lctlsave1-.Lpg0(%r13) + + # do store status of all registers + + stm %r0,%r15,__LC_GPREGS_SAVE_AREA + stctl %c0,%c15,__LC_CREGS_SAVE_AREA + mvc __LC_CREGS_SAVE_AREA(4),.Lctlsave2-.Lpg0(%r13) + stam %a0,%a15,__LC_AREGS_SAVE_AREA + stpx __LC_PREFIX_SAVE_AREA + stckc .Lclkcmp-.Lpg0(%r13) + mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13) + stpt __LC_CPU_TIMER_SAVE_AREA + st %r13, __LC_PSW_SAVE_AREA+4 + + lctl %c6,%c6,.Lall-.Lpg0(%r13) lr %r1,%r2 mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) @@ -46,9 +63,11 @@ do_reipl: basr %r13,0 .Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) lpsw .Ldispsw-.Lpg0(%r13) .align 8 +.Lclkcmp: .quad 0x0000000000000000 .Lall: .long 0xff000000 .Lnull: .long 0x00000000 -.Lctlsave: .long 0x00000000 +.Lctlsave1: .long 0x00000000 +.Lctlsave2: .long 0x00000000 .align 8 .Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 .Lpcnew: .long 0x00080000,0x80000000+.Lecs diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 4d090d60f3ef..95bd1e234f63 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -8,13 +8,30 @@ */ #include - .globl do_reipl -do_reipl: basr %r13,0 -.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) + .globl do_reipl_asm +do_reipl_asm: basr %r13,0 + + # do store status of all registers + +.Lpg0: stg %r1,.Lregsave-.Lpg0(%r13) + lghi %r1,0x1000 + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) + lg %r0,.Lregsave-.Lpg0(%r13) + stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) + stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) + stpx __LC_PREFIX_SAVE_AREA-0x1000(%r1) + stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) + stckc .Lclkcmp-.Lpg0(%r13) + mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13) + stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) + stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) + + lpswe .Lnewpsw-.Lpg0(%r13) .Lpg1: lctlg %c6,%c6,.Lall-.Lpg0(%r13) - stctg %c0,%c0,.Lctlsave-.Lpg0(%r13) - ni .Lctlsave+4-.Lpg0(%r13),0xef - lctlg %c0,%c0,.Lctlsave-.Lpg0(%r13) + stctg %c0,%c0,.Lregsave-.Lpg0(%r13) + ni .Lregsave+4-.Lpg0(%r13),0xef + lctlg %c0,%c0,.Lregsave-.Lpg0(%r13) lgr %r1,%r2 mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) @@ -50,8 +67,9 @@ do_reipl: basr %r13,0 st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) .align 8 +.Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 -.Lctlsave: .quad 0x0000000000000000 +.Lregsave: .quad 0x0000000000000000 .Lnull: .long 0x0000000000000000 .align 16 /* @@ -92,5 +110,3 @@ do_reipl: basr %r13,0 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 - - diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c deleted file mode 100644 index 1f33951ba439..000000000000 --- a/arch/s390/kernel/reipl_diag.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * This file contains the implementation of the - * Linux re-IPL support - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Volker Sameske (sameske@de.ibm.com) - * - */ - -#include - -static unsigned int reipl_diag_rc1; -static unsigned int reipl_diag_rc2; - -/* - * re-IPL the system using the last used IPL parameters - */ -void reipl_diag(void) -{ - asm volatile ( - " la %%r4,0\n" - " la %%r5,0\n" - " diag %%r4,%2,0x308\n" - "0:\n" - " st %%r4,%0\n" - " st %%r5,%1\n" - ".section __ex_table,\"a\"\n" -#ifdef CONFIG_64BIT - " .align 8\n" - " .quad 0b, 0b\n" -#else - " .align 4\n" - " .long 0b, 0b\n" -#endif - ".previous\n" - : "=m" (reipl_diag_rc1), "=m" (reipl_diag_rc2) - : "d" (3) : "cc", "4", "5" ); -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 89051e8a5d8d..f2a9165ca4f8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -285,16 +285,9 @@ void (*_machine_power_off)(void) = machine_power_off_smp; /* * Reboot, halt and power_off routines for non SMP. */ -extern void reipl(unsigned long devno); -extern void reipl_diag(void); static void do_machine_restart_nonsmp(char * __unused) { - reipl_diag(); - - if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0, NULL); - else - reipl (0x10000 | S390_lowcore.ipl_device); + do_reipl(); } static void do_machine_halt_nonsmp(void) @@ -755,214 +748,3 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -#define DEFINE_IPL_ATTR(_name, _format, _value) \ -static ssize_t ipl_##_name##_show(struct subsystem *subsys, \ - char *page) \ -{ \ - return sprintf(page, _format, _value); \ -} \ -static struct subsys_attribute ipl_##_name##_attr = \ - __ATTR(_name, S_IRUGO, ipl_##_name##_show, NULL); - -DEFINE_IPL_ATTR(wwpn, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.wwpn); -DEFINE_IPL_ATTR(lun, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.lun); -DEFINE_IPL_ATTR(bootprog, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.bootprog); -DEFINE_IPL_ATTR(br_lba, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.br_lba); - -enum ipl_type_type { - ipl_type_unknown, - ipl_type_ccw, - ipl_type_fcp, -}; - -static enum ipl_type_type -get_ipl_type(void) -{ - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - - if (!IPL_DEVNO_VALID) - return ipl_type_unknown; - if (!IPL_PARMBLOCK_VALID) - return ipl_type_ccw; - if (ipl->hdr.header.version > IPL_MAX_SUPPORTED_VERSION) - return ipl_type_unknown; - if (ipl->fcp.pbt != IPL_TYPE_FCP) - return ipl_type_unknown; - return ipl_type_fcp; -} - -static ssize_t -ipl_type_show(struct subsystem *subsys, char *page) -{ - switch (get_ipl_type()) { - case ipl_type_ccw: - return sprintf(page, "ccw\n"); - case ipl_type_fcp: - return sprintf(page, "fcp\n"); - default: - return sprintf(page, "unknown\n"); - } -} - -static struct subsys_attribute ipl_type_attr = __ATTR_RO(ipl_type); - -static ssize_t -ipl_device_show(struct subsystem *subsys, char *page) -{ - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - - switch (get_ipl_type()) { - case ipl_type_ccw: - return sprintf(page, "0.0.%04x\n", ipl_devno); - case ipl_type_fcp: - return sprintf(page, "0.0.%04x\n", ipl->fcp.devno); - default: - return 0; - } -} - -static struct subsys_attribute ipl_device_attr = - __ATTR(device, S_IRUGO, ipl_device_show, NULL); - -static struct attribute *ipl_fcp_attrs[] = { - &ipl_type_attr.attr, - &ipl_device_attr.attr, - &ipl_wwpn_attr.attr, - &ipl_lun_attr.attr, - &ipl_bootprog_attr.attr, - &ipl_br_lba_attr.attr, - NULL, -}; - -static struct attribute_group ipl_fcp_attr_group = { - .attrs = ipl_fcp_attrs, -}; - -static struct attribute *ipl_ccw_attrs[] = { - &ipl_type_attr.attr, - &ipl_device_attr.attr, - NULL, -}; - -static struct attribute_group ipl_ccw_attr_group = { - .attrs = ipl_ccw_attrs, -}; - -static struct attribute *ipl_unknown_attrs[] = { - &ipl_type_attr.attr, - NULL, -}; - -static struct attribute_group ipl_unknown_attr_group = { - .attrs = ipl_unknown_attrs, -}; - -static ssize_t -ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off, size_t count) -{ - unsigned int size = IPL_PARMBLOCK_SIZE; - - if (off > size) - return 0; - if (off + count > size) - count = size - off; - - memcpy(buf, (void *) IPL_PARMBLOCK_START + off, count); - return count; -} - -static struct bin_attribute ipl_parameter_attr = { - .attr = { - .name = "binary_parameter", - .mode = S_IRUGO, - .owner = THIS_MODULE, - }, - .size = PAGE_SIZE, - .read = &ipl_parameter_read, -}; - -static ssize_t -ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off, size_t count) -{ - unsigned int size = IPL_PARMBLOCK_START->fcp.scp_data_len; - void *scp_data = &IPL_PARMBLOCK_START->fcp.scp_data; - - if (off > size) - return 0; - if (off + count > size) - count = size - off; - - memcpy(buf, scp_data + off, count); - return count; -} - -static struct bin_attribute ipl_scp_data_attr = { - .attr = { - .name = "scp_data", - .mode = S_IRUGO, - .owner = THIS_MODULE, - }, - .size = PAGE_SIZE, - .read = &ipl_scp_data_read, -}; - -static decl_subsys(ipl, NULL, NULL); - -static int ipl_register_fcp_files(void) -{ - int rc; - - rc = sysfs_create_group(&ipl_subsys.kset.kobj, - &ipl_fcp_attr_group); - if (rc) - goto out; - rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj, - &ipl_parameter_attr); - if (rc) - goto out_ipl_parm; - rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj, - &ipl_scp_data_attr); - if (!rc) - goto out; - - sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr); - -out_ipl_parm: - sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group); -out: - return rc; -} - -static int __init -ipl_device_sysfs_register(void) { - int rc; - - rc = firmware_register(&ipl_subsys); - if (rc) - goto out; - - switch (get_ipl_type()) { - case ipl_type_ccw: - rc = sysfs_create_group(&ipl_subsys.kset.kobj, - &ipl_ccw_attr_group); - break; - case ipl_type_fcp: - rc = ipl_register_fcp_files(); - break; - default: - rc = sysfs_create_group(&ipl_subsys.kset.kobj, - &ipl_unknown_attr_group); - break; - } - - if (rc) - firmware_unregister(&ipl_subsys); -out: - return rc; -} - -__initcall(ipl_device_sysfs_register); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8e03219eea76..b2e6f4c8d382 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -59,9 +59,6 @@ static struct task_struct *current_set[NR_CPUS]; extern char vmhalt_cmd[]; extern char vmpoff_cmd[]; -extern void reipl(unsigned long devno); -extern void reipl_diag(void); - static void smp_ext_bitcall(int, ec_bit_sig); static void smp_ext_bitcall_others(ec_bit_sig); @@ -279,12 +276,7 @@ static void do_machine_restart(void * __unused) * interrupted by an external interrupt and s390irq * locks are always held disabled). */ - reipl_diag(); - - if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0, NULL); - else - reipl (0x10000 | S390_lowcore.ipl_device); + do_reipl(); } void machine_restart_smp(char * __unused) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 89320c1ad825..050963f15802 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -841,14 +841,26 @@ __clear_subchannel_easy(struct subchannel_id schid) return -EBUSY; } -extern void do_reipl(unsigned long devno); -static int -__shutdown_subchannel_easy(struct subchannel_id schid, void *data) +struct sch_match_id { + struct subchannel_id schid; + struct ccw_dev_id devid; + int rc; +}; + +static int __shutdown_subchannel_easy_and_match(struct subchannel_id schid, + void *data) { struct schib schib; + struct sch_match_id *match_id = data; if (stsch_err(schid, &schib)) return -ENXIO; + if (match_id && schib.pmcw.dnv && + (schib.pmcw.dev == match_id->devid.devno) && + (schid.ssid == match_id->devid.ssid)) { + match_id->schid = schid; + match_id->rc = 0; + } if (!schib.pmcw.ena) return 0; switch(__disable_subchannel_easy(schid, &schib)) { @@ -864,18 +876,36 @@ __shutdown_subchannel_easy(struct subchannel_id schid, void *data) return 0; } -void -clear_all_subchannels(void) +static int clear_all_subchannels_and_match(struct ccw_dev_id *devid, + struct subchannel_id *schid) { + struct sch_match_id match_id; + + match_id.devid = *devid; + match_id.rc = -ENODEV; local_irq_disable(); - for_each_subchannel(__shutdown_subchannel_easy, NULL); + for_each_subchannel(__shutdown_subchannel_easy_and_match, &match_id); + if (match_id.rc == 0) + *schid = match_id.schid; + return match_id.rc; } -/* Make sure all subchannels are quiet before we re-ipl an lpar. */ -void -reipl(unsigned long devno) + +void clear_all_subchannels(void) { - clear_all_subchannels(); - cio_reset_channel_paths(); - do_reipl(devno); + local_irq_disable(); + for_each_subchannel(__shutdown_subchannel_easy_and_match, NULL); +} + +extern void do_reipl_asm(__u32 schid); + +/* Make sure all subchannels are quiet before we re-ipl an lpar. */ +void reipl_ccw_dev(struct ccw_dev_id *devid) +{ + struct subchannel_id schid; + + if (clear_all_subchannels_and_match(devid, &schid)) + panic("IPL Device not found\n"); + cio_reset_channel_paths(); + do_reipl_asm(*((__u32*)&schid)); } diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index 28fdd6e2b8ba..da063cd5f0a0 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -270,6 +270,11 @@ struct diag210 { __u32 vrdccrft : 8; /* real device feature (output) */ } __attribute__ ((packed,aligned(4))); +struct ccw_dev_id { + u8 ssid; + u16 devno; +}; + extern int diag210(struct diag210 *addr); extern void wait_cons_dev(void); @@ -280,6 +285,8 @@ extern void cio_reset_channel_paths(void); extern void css_schedule_reprobe(void); +extern void reipl_ccw_dev(struct ccw_dev_id *id); + #endif #endif diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 596c8b172104..2e3d4cca5e21 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -47,6 +47,7 @@ #define __LC_PER_ATMID 0x096 #define __LC_PER_ADDRESS 0x098 #define __LC_PER_ACCESS_ID 0x0A1 +#define __LC_AR_MODE_ID 0x0A3 #define __LC_SUBCHANNEL_ID 0x0B8 #define __LC_SUBCHANNEL_NR 0x0BA @@ -106,18 +107,28 @@ #define __LC_INT_CLOCK 0xDE8 #endif /* __s390x__ */ -#define __LC_PANIC_MAGIC 0xE00 +#define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ #define __LC_PFAULT_INTPARM 0x080 #define __LC_CPU_TIMER_SAVE_AREA 0x0D8 +#define __LC_CLOCK_COMP_SAVE_AREA 0x0E0 +#define __LC_PSW_SAVE_AREA 0x100 +#define __LC_PREFIX_SAVE_AREA 0x108 #define __LC_AREGS_SAVE_AREA 0x120 +#define __LC_FPREGS_SAVE_AREA 0x160 #define __LC_GPREGS_SAVE_AREA 0x180 #define __LC_CREGS_SAVE_AREA 0x1C0 #else /* __s390x__ */ #define __LC_PFAULT_INTPARM 0x11B8 +#define __LC_FPREGS_SAVE_AREA 0x1200 #define __LC_GPREGS_SAVE_AREA 0x1280 +#define __LC_PSW_SAVE_AREA 0x1300 +#define __LC_PREFIX_SAVE_AREA 0x1318 +#define __LC_FP_CREG_SAVE_AREA 0x131C +#define __LC_TODREG_SAVE_AREA 0x1324 #define __LC_CPU_TIMER_SAVE_AREA 0x1328 +#define __LC_CLOCK_COMP_SAVE_AREA 0x1331 #define __LC_AREGS_SAVE_AREA 0x1340 #define __LC_CREGS_SAVE_AREA 0x1380 #endif /* __s390x__ */ diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 02c96d57f0cf..4a1126d8439a 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -68,39 +68,59 @@ extern unsigned int console_irq; #define SET_CONSOLE_3215 do { console_mode = 2; } while (0) #define SET_CONSOLE_3270 do { console_mode = 3; } while (0) -struct ipl_list_header { - u32 length; - u8 reserved[3]; + +struct ipl_list_hdr { + u32 len; + u8 reserved1[3]; u8 version; + u32 blk0_len; + u8 pbt; + u8 flags; + u16 reserved2; } __attribute__((packed)); struct ipl_block_fcp { - u32 length; - u8 pbt; - u8 reserved1[322-1]; + u8 reserved1[313-1]; + u8 opt; + u8 reserved2[3]; + u16 reserved3; u16 devno; - u8 reserved2[4]; + u8 reserved4[4]; u64 wwpn; u64 lun; u32 bootprog; - u8 reserved3[12]; + u8 reserved5[12]; u64 br_lba; u32 scp_data_len; - u8 reserved4[260]; + u8 reserved6[260]; u8 scp_data[]; } __attribute__((packed)); -struct ipl_parameter_block { - union { - u32 length; - struct ipl_list_header header; - } hdr; - struct ipl_block_fcp fcp; +struct ipl_block_ccw { + u8 load_param[8]; + u8 reserved1[84]; + u8 reserved2[2]; + u16 devno; + u8 vm_flags; + u8 reserved3[3]; + u32 vm_parm_len; } __attribute__((packed)); -#define IPL_MAX_SUPPORTED_VERSION (0) +struct ipl_parameter_block { + struct ipl_list_hdr hdr; + union { + struct ipl_block_fcp fcp; + struct ipl_block_ccw ccw; + } ipl_info; +} __attribute__((packed)); -#define IPL_TYPE_FCP (0) +#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_fcp)) + +#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_ccw)) + +#define IPL_MAX_SUPPORTED_VERSION (0) /* * IPL validity flags and parameters as detected in head.S @@ -108,12 +128,14 @@ struct ipl_parameter_block { extern u32 ipl_parameter_flags; extern u16 ipl_devno; +void do_reipl(void); + #define IPL_DEVNO_VALID (ipl_parameter_flags & 1) #define IPL_PARMBLOCK_VALID (ipl_parameter_flags & 2) #define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \ IPL_PARMBLOCK_ORIGIN) -#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.length) +#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.len) #else /* __ASSEMBLY__ */ From 39b083fe1c3c7b88939f6fa1b0b96e579f12e96f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:58:51 +0200 Subject: [PATCH 0233/1063] [S390] empty function defines. Use do { } while (0) constructs instead of empty defines to avoid subtle compile bugs. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.h | 16 ++++++++-------- drivers/s390/scsi/zfcp_def.h | 8 ++++---- include/asm-s390/dma.h | 2 +- include/asm-s390/io.h | 2 +- include/asm-s390/smp.h | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index ceb3ab31ee08..124569362f02 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -191,49 +191,49 @@ enum qdio_irq_states { #if QDIO_VERBOSE_LEVEL>8 #define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_STUPID(x...) +#define QDIO_PRINT_STUPID(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>7 #define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_ALL(x...) +#define QDIO_PRINT_ALL(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>6 #define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_INFO(x...) +#define QDIO_PRINT_INFO(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>5 #define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_WARN(x...) +#define QDIO_PRINT_WARN(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>4 #define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_ERR(x...) +#define QDIO_PRINT_ERR(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>3 #define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_CRIT(x...) +#define QDIO_PRINT_CRIT(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>2 #define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_ALERT(x...) +#define QDIO_PRINT_ALERT(x...) do { } while (0) #endif #if QDIO_VERBOSE_LEVEL>1 #define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x) #else -#define QDIO_PRINT_EMERG(x...) +#define QDIO_PRINT_EMERG(x...) do { } while (0) #endif #define HEXDUMP16(importance,header,ptr) \ diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 94d1b74db356..7c84b3d4bd94 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -543,7 +543,7 @@ do { \ } while (0) #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_NORMAL -# define ZFCP_LOG_NORMAL(fmt, args...) +# define ZFCP_LOG_NORMAL(fmt, args...) do { } while (0) #else # define ZFCP_LOG_NORMAL(fmt, args...) \ do { \ @@ -553,7 +553,7 @@ do { \ #endif #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_INFO -# define ZFCP_LOG_INFO(fmt, args...) +# define ZFCP_LOG_INFO(fmt, args...) do { } while (0) #else # define ZFCP_LOG_INFO(fmt, args...) \ do { \ @@ -563,14 +563,14 @@ do { \ #endif #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_DEBUG -# define ZFCP_LOG_DEBUG(fmt, args...) +# define ZFCP_LOG_DEBUG(fmt, args...) do { } while (0) #else # define ZFCP_LOG_DEBUG(fmt, args...) \ ZFCP_LOG(ZFCP_LOG_LEVEL_DEBUG, fmt , ##args) #endif #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_TRACE -# define ZFCP_LOG_TRACE(fmt, args...) +# define ZFCP_LOG_TRACE(fmt, args...) do { } while (0) #else # define ZFCP_LOG_TRACE(fmt, args...) \ ZFCP_LOG(ZFCP_LOG_LEVEL_TRACE, fmt , ##args) diff --git a/include/asm-s390/dma.h b/include/asm-s390/dma.h index 02720c449cd8..7425c6af6cd4 100644 --- a/include/asm-s390/dma.h +++ b/include/asm-s390/dma.h @@ -11,6 +11,6 @@ #define MAX_DMA_ADDRESS 0x80000000 -#define free_dma(x) +#define free_dma(x) do { } while (0) #endif /* _ASM_DMA_H */ diff --git a/include/asm-s390/io.h b/include/asm-s390/io.h index d4614b35f423..a6cc27e77007 100644 --- a/include/asm-s390/io.h +++ b/include/asm-s390/io.h @@ -116,7 +116,7 @@ extern void iounmap(void *addr); #define outb(x,addr) ((void) writeb(x,addr)) #define outb_p(x,addr) outb(x,addr) -#define mmiowb() +#define mmiowb() do { } while (0) /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index 657646054c5e..9fb02e9779c9 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -104,7 +104,7 @@ smp_call_function_on(void (*func) (void *info), void *info, #define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) -#define smp_setup_cpu_possible_map() +#define smp_setup_cpu_possible_map() do { } while (0) #endif #endif From 8427082a506f7ae0abf82ce0047a045ec4309e59 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:58:54 +0200 Subject: [PATCH 0234/1063] [S390] fix syscall restart handling. If do_signal() gets called several times before returning to user space and no signal is pending (e.g. cancelled by a debugger) syscall restart handling could be done several times. This would change the user space PSW to an address prior to the syscall instruction. Fix this by making sure that syscall restart handling is only done once. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/signal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index a887b686f279..dd05423f87a8 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -457,6 +457,7 @@ void do_signal(struct pt_regs *regs) case -ERESTART_RESTARTBLOCK: regs->gprs[2] = -EINTR; } + regs->trap = -1; /* Don't deal with this again. */ } /* Get signal to deliver. When running under ptrace, at this point From ba8ce5c6f0a15f08eae39880a0de296007f4a4e7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:56 +0200 Subject: [PATCH 0235/1063] [S390] #undef in unistd.h Avoid using #undef in unistd.h. Signed-off-by: Martin Schwidefsky --- include/asm-s390/unistd.h | 170 +++++++++++++------------------------- 1 file changed, 59 insertions(+), 111 deletions(-) diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index aa7a243862e1..02b942d85c37 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -25,17 +25,12 @@ #define __NR_unlink 10 #define __NR_execve 11 #define __NR_chdir 12 -#define __NR_time 13 #define __NR_mknod 14 #define __NR_chmod 15 -#define __NR_lchown 16 #define __NR_lseek 19 #define __NR_getpid 20 #define __NR_mount 21 #define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 #define __NR_ptrace 26 #define __NR_alarm 27 #define __NR_pause 29 @@ -51,11 +46,7 @@ #define __NR_pipe 42 #define __NR_times 43 #define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 #define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 #define __NR_acct 51 #define __NR_umount2 52 #define __NR_ioctl 54 @@ -69,18 +60,13 @@ #define __NR_getpgrp 65 #define __NR_setsid 66 #define __NR_sigaction 67 -#define __NR_setreuid 70 -#define __NR_setregid 71 #define __NR_sigsuspend 72 #define __NR_sigpending 73 #define __NR_sethostname 74 #define __NR_setrlimit 75 -#define __NR_getrlimit 76 #define __NR_getrusage 77 #define __NR_gettimeofday 78 #define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 #define __NR_symlink 83 #define __NR_readlink 85 #define __NR_uselib 86 @@ -92,12 +78,10 @@ #define __NR_truncate 92 #define __NR_ftruncate 93 #define __NR_fchmod 94 -#define __NR_fchown 95 #define __NR_getpriority 96 #define __NR_setpriority 97 #define __NR_statfs 99 #define __NR_fstatfs 100 -#define __NR_ioperm 101 #define __NR_socketcall 102 #define __NR_syslog 103 #define __NR_setitimer 104 @@ -131,11 +115,7 @@ #define __NR_sysfs 135 #define __NR_personality 136 #define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 #define __NR_getdents 141 -#define __NR__newselect 142 #define __NR_flock 143 #define __NR_msync 144 #define __NR_readv 145 @@ -157,13 +137,9 @@ #define __NR_sched_rr_get_interval 161 #define __NR_nanosleep 162 #define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 #define __NR_query_module 167 #define __NR_poll 168 #define __NR_nfsservctl 169 -#define __NR_setresgid 170 -#define __NR_getresgid 171 #define __NR_prctl 172 #define __NR_rt_sigreturn 173 #define __NR_rt_sigaction 174 @@ -174,7 +150,6 @@ #define __NR_rt_sigsuspend 179 #define __NR_pread64 180 #define __NR_pwrite64 181 -#define __NR_chown 182 #define __NR_getcwd 183 #define __NR_capget 184 #define __NR_capset 185 @@ -183,39 +158,11 @@ #define __NR_getpmsg 188 #define __NR_putpmsg 189 #define __NR_vfork 190 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 #define __NR_pivot_root 217 #define __NR_mincore 218 #define __NR_madvise 219 #define __NR_getdents64 220 -#define __NR_fcntl64 221 #define __NR_readahead 222 -#define __NR_sendfile64 223 #define __NR_setxattr 224 #define __NR_lsetxattr 225 #define __NR_fsetxattr 226 @@ -256,7 +203,6 @@ #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) /* Number 263 is reserved for vserver */ -#define __NR_fadvise64_64 264 #define __NR_statfs64 265 #define __NR_fstatfs64 266 #define __NR_remap_file_pages 267 @@ -285,7 +231,6 @@ #define __NR_mknodat 290 #define __NR_fchownat 291 #define __NR_futimesat 292 -#define __NR_fstatat64 293 #define __NR_unlinkat 294 #define __NR_renameat 295 #define __NR_linkat 296 @@ -310,62 +255,65 @@ * have a different name although they do the same (e.g. __NR_chown32 * is __NR_chown on 64 bit). */ -#ifdef __s390x__ -#undef __NR_time -#undef __NR_lchown -#undef __NR_setuid -#undef __NR_getuid -#undef __NR_stime -#undef __NR_setgid -#undef __NR_getgid -#undef __NR_geteuid -#undef __NR_getegid -#undef __NR_setreuid -#undef __NR_setregid -#undef __NR_getrlimit -#undef __NR_getgroups -#undef __NR_setgroups -#undef __NR_fchown -#undef __NR_ioperm -#undef __NR_setfsuid -#undef __NR_setfsgid -#undef __NR__llseek -#undef __NR__newselect -#undef __NR_setresuid -#undef __NR_getresuid -#undef __NR_setresgid -#undef __NR_getresgid -#undef __NR_chown -#undef __NR_ugetrlimit -#undef __NR_mmap2 -#undef __NR_truncate64 -#undef __NR_ftruncate64 -#undef __NR_stat64 -#undef __NR_lstat64 -#undef __NR_fstat64 -#undef __NR_lchown32 -#undef __NR_getuid32 -#undef __NR_getgid32 -#undef __NR_geteuid32 -#undef __NR_getegid32 -#undef __NR_setreuid32 -#undef __NR_setregid32 -#undef __NR_getgroups32 -#undef __NR_setgroups32 -#undef __NR_fchown32 -#undef __NR_setresuid32 -#undef __NR_getresuid32 -#undef __NR_setresgid32 -#undef __NR_getresgid32 -#undef __NR_chown32 -#undef __NR_setuid32 -#undef __NR_setgid32 -#undef __NR_setfsuid32 -#undef __NR_setfsgid32 -#undef __NR_fcntl64 -#undef __NR_sendfile64 -#undef __NR_fadvise64_64 -#undef __NR_fstatat64 +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else #define __NR_select 142 #define __NR_getrlimit 191 /* SuS compliant getrlimit */ From 8301425534b87bae9990261f3008f39999be738c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:58:58 +0200 Subject: [PATCH 0236/1063] [S390] architecture co-maintainer. Add Heiko Carstens as co-maintainer for the s390 architecture. Signed-off-by: Martin Schwidefsky --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a34c53c08742..576a7f230840 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2445,6 +2445,8 @@ S: Maintained S390 P: Martin Schwidefsky M: schwidefsky@de.ibm.com +P: Heiko Carstens +M: heiko.carstens@de.ibm.com M: linux390@de.ibm.com L: linux-390@vm.marist.edu W: http://www.ibm.com/developerworks/linux/linux390/ From 7d5d688f724dd5a651d1ce7bc3ea7c03d28137a1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:00 +0200 Subject: [PATCH 0237/1063] [S390] Use simple_strtoul instead of own cmm_strtoul wrapper. Fix compile warning with some configurations: arch/s390/mm/cmm.c:58: warning: 'cmm_strtoul' defined but not used Originally cmm_strtoul was introduced because simple_strtoul couldn't handle strings with hexadecimal numbers that contained a capital 'X'. Since this is no longer true cmm_strtoul can be removed. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ceea51cff03b..786a44dba5bf 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -52,22 +52,6 @@ static struct timer_list cmm_timer; static void cmm_timer_fn(unsigned long); static void cmm_set_timer(void); -static long -cmm_strtoul(const char *cp, char **endp) -{ - unsigned int base = 10; - - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) { - base = 16; - cp++; - } - } - return simple_strtoul(cp, endp, base); -} - static long cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) { @@ -276,7 +260,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = cmm_strtoul(p, &p); + pages = simple_strtoul(p, &p, 0); if (ctl == &cmm_table[0]) cmm_set_pages(pages); else @@ -317,9 +301,9 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = cmm_strtoul(p, &p); + pages = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); - seconds = cmm_strtoul(p, &p); + seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(pages, seconds); } else { len = sprintf(buf, "%ld %ld\n", @@ -382,24 +366,24 @@ cmm_smsg_target(char *from, char *msg) if (strncmp(msg, "SHRINK", 6) == 0) { if (!cmm_skip_blanks(msg + 6, &msg)) return; - pages = cmm_strtoul(msg, &msg); + pages = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') cmm_set_pages(pages); } else if (strncmp(msg, "RELEASE", 7) == 0) { if (!cmm_skip_blanks(msg + 7, &msg)) return; - pages = cmm_strtoul(msg, &msg); + pages = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') cmm_add_timed_pages(pages); } else if (strncmp(msg, "REUSE", 5) == 0) { if (!cmm_skip_blanks(msg + 5, &msg)) return; - pages = cmm_strtoul(msg, &msg); + pages = simple_strtoul(msg, &msg, 0); if (!cmm_skip_blanks(msg, &msg)) return; - seconds = cmm_strtoul(msg, &msg); + seconds = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') cmm_set_timeout(pages, seconds); From 47addc84b450fd5e391ab118e178645cb0bbd89d Mon Sep 17 00:00:00 2001 From: Frank Pavlic Date: Wed, 20 Sep 2006 15:59:03 +0200 Subject: [PATCH 0238/1063] [S390] qdio_get_micros return value. qdio_get_micros is supposed to return microseconds. The get_clock() return value needs to be shifted by 12 to get to microseconds. Signed-off-by: Frank Pavlic Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 7c93a8798d23..16e3715d5c0d 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -115,7 +115,7 @@ qdio_min(int a,int b) static inline __u64 qdio_get_micros(void) { - return (get_clock() >> 10); /* time>>12 is microseconds */ + return (get_clock() >> 12); /* time>>12 is microseconds */ } /* From a00bfd7147c0c5c04a59f7adcb0e6d8948b90a6e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:59:05 +0200 Subject: [PATCH 0239/1063] [S390] dasd deadlock after state change pending interrupt. The dasd_device_from_cdev function is called from interrupt context to get the struct dasd_device associated with a ccw device. The driver_data of the ccw device points to the dasd_devmap structure which contains the pointer to the dasd_device structure. The lock that protects the dasd_devmap structure is acquire with out irqsave. To prevent the deadlock in dasd_device_from_cdev if it is called from interrupt context the dependency to the dasd_devmap structure needs to be removed. Let the driver_data of the ccw device point to the dasd_device structure directly and use the ccw device lock to protect the access. Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 4 +- drivers/s390/block/dasd_devmap.c | 74 +++++++++++++++++++++----------- drivers/s390/block/dasd_int.h | 1 + 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 25c1ef6dfd44..3cd87f85f702 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -893,7 +893,7 @@ dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm) device = (struct dasd_device *) cqr->device; if (device == NULL || - device != dasd_device_from_cdev(cdev) || + device != dasd_device_from_cdev_locked(cdev) || strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) { MESSAGE(KERN_DEBUG, "invalid device in request: bus_id %s", cdev->dev.bus_id); @@ -970,7 +970,7 @@ dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((irb->scsw.dstat & mask) == mask) { - device = dasd_device_from_cdev(cdev); + device = dasd_device_from_cdev_locked(cdev); if (!IS_ERR(device)) { dasd_handle_state_change_pending(device); dasd_put_device(device); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 9af02c79ce8a..80cf0999465a 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -523,17 +523,17 @@ dasd_create_device(struct ccw_device *cdev) { struct dasd_devmap *devmap; struct dasd_device *device; + unsigned long flags; int rc; devmap = dasd_devmap_from_cdev(cdev); if (IS_ERR(devmap)) return (void *) devmap; - cdev->dev.driver_data = devmap; device = dasd_alloc_device(); if (IS_ERR(device)) return device; - atomic_set(&device->ref_count, 2); + atomic_set(&device->ref_count, 3); spin_lock(&dasd_devmap_lock); if (!devmap->device) { @@ -552,6 +552,11 @@ dasd_create_device(struct ccw_device *cdev) dasd_free_device(device); return ERR_PTR(rc); } + + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + cdev->dev.driver_data = device; + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + return device; } @@ -569,6 +574,7 @@ dasd_delete_device(struct dasd_device *device) { struct ccw_device *cdev; struct dasd_devmap *devmap; + unsigned long flags; /* First remove device pointer from devmap. */ devmap = dasd_find_busid(device->cdev->dev.bus_id); @@ -582,9 +588,16 @@ dasd_delete_device(struct dasd_device *device) devmap->device = NULL; spin_unlock(&dasd_devmap_lock); - /* Drop ref_count by 2, one for the devmap reference and - * one for the passed reference. */ - atomic_sub(2, &device->ref_count); + /* Disconnect dasd_device structure from ccw_device structure. */ + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + device->cdev->dev.driver_data = NULL; + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + + /* + * Drop ref_count by 3, one for the devmap reference, one for + * the cdev reference and one for the passed reference. + */ + atomic_sub(3, &device->ref_count); /* Wait for reference counter to drop to zero. */ wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0); @@ -593,9 +606,6 @@ dasd_delete_device(struct dasd_device *device) cdev = device->cdev; device->cdev = NULL; - /* Disconnect dasd_devmap structure from ccw_device structure. */ - cdev->dev.driver_data = NULL; - /* Put ccw_device structure. */ put_device(&cdev->dev); @@ -613,23 +623,34 @@ dasd_put_device_wake(struct dasd_device *device) wake_up(&dasd_delete_wq); } +/* + * Return dasd_device structure associated with cdev. + * This function needs to be called with the ccw device + * lock held. It can be used from interrupt context. + */ +struct dasd_device * +dasd_device_from_cdev_locked(struct ccw_device *cdev) +{ + struct dasd_device *device = cdev->dev.driver_data; + + if (!device) + return ERR_PTR(-ENODEV); + dasd_get_device(device); + return device; +} + /* * Return dasd_device structure associated with cdev. */ struct dasd_device * dasd_device_from_cdev(struct ccw_device *cdev) { - struct dasd_devmap *devmap; struct dasd_device *device; + unsigned long flags; - device = ERR_PTR(-ENODEV); - spin_lock(&dasd_devmap_lock); - devmap = cdev->dev.driver_data; - if (devmap && devmap->device) { - device = devmap->device; - dasd_get_device(device); - } - spin_unlock(&dasd_devmap_lock); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + device = dasd_device_from_cdev_locked(cdev); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); return device; } @@ -730,16 +751,17 @@ static ssize_t dasd_discipline_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dasd_devmap *devmap; - char *dname; + struct dasd_device *device; + ssize_t len; - spin_lock(&dasd_devmap_lock); - dname = "none"; - devmap = dev->driver_data; - if (devmap && devmap->device && devmap->device->discipline) - dname = devmap->device->discipline->name; - spin_unlock(&dasd_devmap_lock); - return snprintf(buf, PAGE_SIZE, "%s\n", dname); + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (!IS_ERR(device) && device->discipline) { + len = snprintf(buf, PAGE_SIZE, "%s\n", + device->discipline->name); + dasd_put_device(device); + } else + len = snprintf(buf, PAGE_SIZE, "none\n"); + return len; } static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3ccf06d28ba1..9f52004f6fc2 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -534,6 +534,7 @@ int dasd_add_sysfs_files(struct ccw_device *); void dasd_remove_sysfs_files(struct ccw_device *); struct dasd_device *dasd_device_from_cdev(struct ccw_device *); +struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *); struct dasd_device *dasd_device_from_devindex(int); int dasd_parse(void); From b0035f127e007ea0afc8baad740093eb124f7b0b Mon Sep 17 00:00:00 2001 From: Horst Hummel Date: Wed, 20 Sep 2006 15:59:07 +0200 Subject: [PATCH 0240/1063] [S390] dasd default debug level. Enhanced default DBF level to get most important messages in debug feature files. Signed-off-by: Horst Hummel Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 3cd87f85f702..d0647d116eaa 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -184,7 +184,7 @@ dasd_state_known_to_basic(struct dasd_device * device) device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 2, 8 * sizeof (long)); debug_register_view(device->debug_area, &debug_sprintf_view); - debug_set_level(device->debug_area, DBF_EMERG); + debug_set_level(device->debug_area, DBF_WARNING); DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created"); device->state = DASD_STATE_BASIC; @@ -2169,7 +2169,7 @@ dasd_init(void) goto failed; } debug_register_view(dasd_debug_area, &debug_sprintf_view); - debug_set_level(dasd_debug_area, DBF_EMERG); + debug_set_level(dasd_debug_area, DBF_WARNING); DBF_EVENT(DBF_EMERG, "%s", "debug area created"); From 0fee644ada12c524abbf723132fbea6a082ecfc2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 20 Sep 2006 15:59:10 +0200 Subject: [PATCH 0241/1063] [S390] cleanup sysinfo and add system z9 specific extensions. With System z9 additional fields have been added to the output of the store system information instruction. This patch adds the new model information field and the alternate cpu capability fields to the output of /proc/sysinfo. While we at it clean up the code as well. Signed-off-by: Martin Schwidefsky --- drivers/s390/sysinfo.c | 475 +++++++++++++++++++++-------------------- 1 file changed, 241 insertions(+), 234 deletions(-) diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c index d1c1e75bfd60..1e788e815ce7 100644 --- a/drivers/s390/sysinfo.c +++ b/drivers/s390/sysinfo.c @@ -11,19 +11,18 @@ #include #include -struct sysinfo_1_1_1 -{ +struct sysinfo_1_1_1 { char reserved_0[32]; char manufacturer[16]; char type[4]; char reserved_1[12]; - char model[16]; + char model_capacity[16]; char sequence[16]; char plant[4]; + char model[16]; }; -struct sysinfo_1_2_1 -{ +struct sysinfo_1_2_1 { char reserved_0[80]; char sequence[16]; char plant[4]; @@ -31,9 +30,12 @@ struct sysinfo_1_2_1 unsigned short cpu_address; }; -struct sysinfo_1_2_2 -{ - char reserved_0[32]; +struct sysinfo_1_2_2 { + char format; + char reserved_0[1]; + unsigned short acc_offset; + char reserved_1[24]; + unsigned int secondary_capability; unsigned int capability; unsigned short cpus_total; unsigned short cpus_configured; @@ -42,8 +44,12 @@ struct sysinfo_1_2_2 unsigned short adjustment[0]; }; -struct sysinfo_2_2_1 -{ +struct sysinfo_1_2_2_extension { + unsigned int alt_capability; + unsigned short alt_adjustment[0]; +}; + +struct sysinfo_2_2_1 { char reserved_0[80]; char sequence[16]; char plant[4]; @@ -51,15 +57,11 @@ struct sysinfo_2_2_1 unsigned short cpu_address; }; -struct sysinfo_2_2_2 -{ +struct sysinfo_2_2_2 { char reserved_0[32]; unsigned short lpar_number; char reserved_1; unsigned char characteristics; - #define LPAR_CHAR_DEDICATED (1 << 7) - #define LPAR_CHAR_SHARED (1 << 6) - #define LPAR_CHAR_LIMITED (1 << 5) unsigned short cpus_total; unsigned short cpus_configured; unsigned short cpus_standby; @@ -71,12 +73,14 @@ struct sysinfo_2_2_2 unsigned short cpus_shared; }; -struct sysinfo_3_2_2 -{ +#define LPAR_CHAR_DEDICATED (1 << 7) +#define LPAR_CHAR_SHARED (1 << 6) +#define LPAR_CHAR_LIMITED (1 << 5) + +struct sysinfo_3_2_2 { char reserved_0[31]; unsigned char count; - struct - { + struct { char reserved_0[4]; unsigned short cpus_total; unsigned short cpus_configured; @@ -90,136 +94,223 @@ struct sysinfo_3_2_2 } vm[8]; }; -union s390_sysinfo +static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) { - struct sysinfo_1_1_1 sysinfo_1_1_1; - struct sysinfo_1_2_1 sysinfo_1_2_1; - struct sysinfo_1_2_2 sysinfo_1_2_2; - struct sysinfo_2_2_1 sysinfo_2_2_1; - struct sysinfo_2_2_2 sysinfo_2_2_2; - struct sysinfo_3_2_2 sysinfo_3_2_2; -}; + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; -static inline int stsi (void *sysinfo, - int fc, int sel1, int sel2) -{ - int cc, retv; - -#ifndef CONFIG_64BIT - __asm__ __volatile__ ( "lr\t0,%2\n" - "\tlr\t1,%3\n" - "\tstsi\t0(%4)\n" - "0:\tipm\t%0\n" - "\tsrl\t%0,28\n" - "1:lr\t%1,0\n" - ".section .fixup,\"ax\"\n" - "2:\tlhi\t%0,3\n" - "\tbras\t1,3f\n" - "\t.long 1b\n" - "3:\tl\t1,0(1)\n" - "\tbr\t1\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - "\t.align 4\n" - "\t.long 0b,2b\n" - ".previous\n" - : "=d" (cc), "=d" (retv) - : "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) - : "cc", "memory", "0", "1" ); -#else - __asm__ __volatile__ ( "lr\t0,%2\n" - "lr\t1,%3\n" - "\tstsi\t0(%4)\n" - "0:\tipm\t%0\n" - "\tsrl\t%0,28\n" - "1:lr\t%1,0\n" - ".section .fixup,\"ax\"\n" - "2:\tlhi\t%0,3\n" - "\tjg\t1b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - "\t.align 8\n" - "\t.quad 0b,2b\n" - ".previous\n" - : "=d" (cc), "=d" (retv) - : "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) - : "cc", "memory", "0", "1" ); -#endif - - return cc? -1 : retv; + asm volatile( + " stsi 0(%2)\n" + "0: jz 2f\n" + "1: lhi %0,%3\n" + "2:\n" + EX_TABLE(0b,1b) + : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS) + : "cc", "memory" ); + return r0; } -static inline int stsi_0 (void) +static inline int stsi_0(void) { int rc = stsi (NULL, 0, 0, 0); - return rc == -1 ? rc : (((unsigned int)rc) >> 28); + return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); } -static inline int stsi_1_1_1 (struct sysinfo_1_1_1 *info) +static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) { - int rc = stsi (info, 1, 1, 1); - if (rc != -1) - { - EBCASC (info->manufacturer, sizeof(info->manufacturer)); - EBCASC (info->type, sizeof(info->type)); - EBCASC (info->model, sizeof(info->model)); - EBCASC (info->sequence, sizeof(info->sequence)); - EBCASC (info->plant, sizeof(info->plant)); + if (stsi(info, 1, 1, 1) == -ENOSYS) + return len; + + EBCASC(info->manufacturer, sizeof(info->manufacturer)); + EBCASC(info->type, sizeof(info->type)); + EBCASC(info->model, sizeof(info->model)); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + EBCASC(info->model_capacity, sizeof(info->model_capacity)); + len += sprintf(page + len, "Manufacturer: %-16.16s\n", + info->manufacturer); + len += sprintf(page + len, "Type: %-4.4s\n", + info->type); + if (info->model[0] != '\0') + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + len += sprintf(page + len, + "Model: %-16.16s %-16.16s\n", + info->model_capacity, info->model); + else + len += sprintf(page + len, "Model: %-16.16s\n", + info->model_capacity); + len += sprintf(page + len, "Sequence Code: %-16.16s\n", + info->sequence); + len += sprintf(page + len, "Plant: %-4.4s\n", + info->plant); + len += sprintf(page + len, "Model Capacity: %-16.16s\n", + info->model_capacity); + return len; +} + +#if 0 /* Currently unused */ +static int stsi_1_2_1(struct sysinfo_1_2_1 *info, char *page, int len) +{ + if (stsi(info, 1, 2, 1) == -ENOSYS) + return len; + + len += sprintf(page + len, "\n"); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + len += sprintf(page + len, "Sequence Code of CPU: %-16.16s\n", + info->sequence); + len += sprintf(page + len, "Plant of CPU: %-16.16s\n", + info->plant); + return len; +} +#endif + +static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) +{ + struct sysinfo_1_2_2_extension *ext; + int i; + + if (stsi(info, 1, 2, 2) == -ENOSYS) + return len; + ext = (struct sysinfo_1_2_2_extension *) + ((unsigned long) info + info->acc_offset); + + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "CPUs Reserved: %d\n", + info->cpus_reserved); + + if (info->format == 1) { + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the stange + * encoding of the alternate cpu capability. + */ + len += sprintf(page + len, "Capability: %u %u\n", + info->capability, ext->alt_capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u %u\n", + i, info->adjustment[i-2], + ext->alt_adjustment[i-2]); + + } else { + len += sprintf(page + len, "Capability: %u\n", + info->capability); + for (i = 2; i <= info->cpus_total; i++) + len += sprintf(page + len, + "Adjustment %02d-way: %u\n", + i, info->adjustment[i-2]); } - return rc == -1 ? rc : 0; + + if (info->secondary_capability != 0) + len += sprintf(page + len, "Secondary Capability: %d\n", + info->secondary_capability); + + return len; } -static inline int stsi_1_2_1 (struct sysinfo_1_2_1 *info) +#if 0 /* Currently unused */ +static int stsi_2_2_1(struct sysinfo_2_2_1 *info, char *page, int len) { - int rc = stsi (info, 1, 2, 1); - if (rc != -1) - { - EBCASC (info->sequence, sizeof(info->sequence)); - EBCASC (info->plant, sizeof(info->plant)); + if (stsi(info, 2, 2, 1) == -ENOSYS) + return len; + + len += sprintf(page + len, "\n"); + EBCASC (info->sequence, sizeof(info->sequence)); + EBCASC (info->plant, sizeof(info->plant)); + len += sprintf(page + len, "Sequence Code of logical CPU: %-16.16s\n", + info->sequence); + len += sprintf(page + len, "Plant of logical CPU: %-16.16s\n", + info->plant); + return len; +} +#endif + +static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len) +{ + if (stsi(info, 2, 2, 2) == -ENOSYS) + return len; + + EBCASC (info->name, sizeof(info->name)); + + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "LPAR Number: %d\n", + info->lpar_number); + + len += sprintf(page + len, "LPAR Characteristics: "); + if (info->characteristics & LPAR_CHAR_DEDICATED) + len += sprintf(page + len, "Dedicated "); + if (info->characteristics & LPAR_CHAR_SHARED) + len += sprintf(page + len, "Shared "); + if (info->characteristics & LPAR_CHAR_LIMITED) + len += sprintf(page + len, "Limited "); + len += sprintf(page + len, "\n"); + + len += sprintf(page + len, "LPAR Name: %-8.8s\n", + info->name); + + len += sprintf(page + len, "LPAR Adjustment: %d\n", + info->caf); + + len += sprintf(page + len, "LPAR CPUs Total: %d\n", + info->cpus_total); + len += sprintf(page + len, "LPAR CPUs Configured: %d\n", + info->cpus_configured); + len += sprintf(page + len, "LPAR CPUs Standby: %d\n", + info->cpus_standby); + len += sprintf(page + len, "LPAR CPUs Reserved: %d\n", + info->cpus_reserved); + len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n", + info->cpus_dedicated); + len += sprintf(page + len, "LPAR CPUs Shared: %d\n", + info->cpus_shared); + return len; +} + +static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len) +{ + int i; + + if (stsi(info, 3, 2, 2) == -ENOSYS) + return len; + for (i = 0; i < info->count; i++) { + EBCASC (info->vm[i].name, sizeof(info->vm[i].name)); + EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi)); + len += sprintf(page + len, "\n"); + len += sprintf(page + len, "VM%02d Name: %-8.8s\n", + i, info->vm[i].name); + len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n", + i, info->vm[i].cpi); + + len += sprintf(page + len, "VM%02d Adjustment: %d\n", + i, info->vm[i].caf); + + len += sprintf(page + len, "VM%02d CPUs Total: %d\n", + i, info->vm[i].cpus_total); + len += sprintf(page + len, "VM%02d CPUs Configured: %d\n", + i, info->vm[i].cpus_configured); + len += sprintf(page + len, "VM%02d CPUs Standby: %d\n", + i, info->vm[i].cpus_standby); + len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n", + i, info->vm[i].cpus_reserved); } - return rc == -1 ? rc : 0; -} - -static inline int stsi_1_2_2 (struct sysinfo_1_2_2 *info) -{ - int rc = stsi (info, 1, 2, 2); - return rc == -1 ? rc : 0; -} - -static inline int stsi_2_2_1 (struct sysinfo_2_2_1 *info) -{ - int rc = stsi (info, 2, 2, 1); - if (rc != -1) - { - EBCASC (info->sequence, sizeof(info->sequence)); - EBCASC (info->plant, sizeof(info->plant)); - } - return rc == -1 ? rc : 0; -} - -static inline int stsi_2_2_2 (struct sysinfo_2_2_2 *info) -{ - int rc = stsi (info, 2, 2, 2); - if (rc != -1) - { - EBCASC (info->name, sizeof(info->name)); - } - return rc == -1 ? rc : 0; -} - -static inline int stsi_3_2_2 (struct sysinfo_3_2_2 *info) -{ - int rc = stsi (info, 3, 2, 2); - if (rc != -1) - { - int i; - for (i = 0; i < info->count; i++) - { - EBCASC (info->vm[i].name, sizeof(info->vm[i].name)); - EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi)); - } - } - return rc == -1 ? rc : 0; + return len; } @@ -227,118 +318,34 @@ static int proc_read_sysinfo(char *page, char **start, off_t off, int count, int *eof, void *data) { - unsigned long info_page = get_zeroed_page (GFP_KERNEL); - union s390_sysinfo *info = (union s390_sysinfo *) info_page; - int len = 0; - int level; - int i; + unsigned long info = get_zeroed_page (GFP_KERNEL); + int level, len; if (!info) return 0; - level = stsi_0 (); + len = 0; + level = stsi_0(); + if (level >= 1) + len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); - if (level >= 1 && stsi_1_1_1 (&info->sysinfo_1_1_1) == 0) - { - len += sprintf (page+len, "Manufacturer: %-16.16s\n", - info->sysinfo_1_1_1.manufacturer); - len += sprintf (page+len, "Type: %-4.4s\n", - info->sysinfo_1_1_1.type); - len += sprintf (page+len, "Model: %-16.16s\n", - info->sysinfo_1_1_1.model); - len += sprintf (page+len, "Sequence Code: %-16.16s\n", - info->sysinfo_1_1_1.sequence); - len += sprintf (page+len, "Plant: %-4.4s\n", - info->sysinfo_1_1_1.plant); - } + if (level >= 1) + len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); - if (level >= 1 && stsi_1_2_2 (&info->sysinfo_1_2_2) == 0) - { - len += sprintf (page+len, "\n"); - len += sprintf (page+len, "CPUs Total: %d\n", - info->sysinfo_1_2_2.cpus_total); - len += sprintf (page+len, "CPUs Configured: %d\n", - info->sysinfo_1_2_2.cpus_configured); - len += sprintf (page+len, "CPUs Standby: %d\n", - info->sysinfo_1_2_2.cpus_standby); - len += sprintf (page+len, "CPUs Reserved: %d\n", - info->sysinfo_1_2_2.cpus_reserved); - - len += sprintf (page+len, "Capability: %d\n", - info->sysinfo_1_2_2.capability); + if (level >= 2) + len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len); - for (i = 2; i <= info->sysinfo_1_2_2.cpus_total; i++) - len += sprintf (page+len, "Adjustment %02d-way: %d\n", - i, info->sysinfo_1_2_2.adjustment[i-2]); - } + if (level >= 3) + len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len); - if (level >= 2 && stsi_2_2_2 (&info->sysinfo_2_2_2) == 0) - { - len += sprintf (page+len, "\n"); - len += sprintf (page+len, "LPAR Number: %d\n", - info->sysinfo_2_2_2.lpar_number); - - len += sprintf (page+len, "LPAR Characteristics: "); - if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_DEDICATED) - len += sprintf (page+len, "Dedicated "); - if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_SHARED) - len += sprintf (page+len, "Shared "); - if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_LIMITED) - len += sprintf (page+len, "Limited "); - len += sprintf (page+len, "\n"); - - len += sprintf (page+len, "LPAR Name: %-8.8s\n", - info->sysinfo_2_2_2.name); - - len += sprintf (page+len, "LPAR Adjustment: %d\n", - info->sysinfo_2_2_2.caf); - - len += sprintf (page+len, "LPAR CPUs Total: %d\n", - info->sysinfo_2_2_2.cpus_total); - len += sprintf (page+len, "LPAR CPUs Configured: %d\n", - info->sysinfo_2_2_2.cpus_configured); - len += sprintf (page+len, "LPAR CPUs Standby: %d\n", - info->sysinfo_2_2_2.cpus_standby); - len += sprintf (page+len, "LPAR CPUs Reserved: %d\n", - info->sysinfo_2_2_2.cpus_reserved); - len += sprintf (page+len, "LPAR CPUs Dedicated: %d\n", - info->sysinfo_2_2_2.cpus_dedicated); - len += sprintf (page+len, "LPAR CPUs Shared: %d\n", - info->sysinfo_2_2_2.cpus_shared); - } - - if (level >= 3 && stsi_3_2_2 (&info->sysinfo_3_2_2) == 0) - { - for (i = 0; i < info->sysinfo_3_2_2.count; i++) - { - len += sprintf (page+len, "\n"); - len += sprintf (page+len, "VM%02d Name: %-8.8s\n", - i, info->sysinfo_3_2_2.vm[i].name); - len += sprintf (page+len, "VM%02d Control Program: %-16.16s\n", - i, info->sysinfo_3_2_2.vm[i].cpi); - - len += sprintf (page+len, "VM%02d Adjustment: %d\n", - i, info->sysinfo_3_2_2.vm[i].caf); - - len += sprintf (page+len, "VM%02d CPUs Total: %d\n", - i, info->sysinfo_3_2_2.vm[i].cpus_total); - len += sprintf (page+len, "VM%02d CPUs Configured: %d\n", - i, info->sysinfo_3_2_2.vm[i].cpus_configured); - len += sprintf (page+len, "VM%02d CPUs Standby: %d\n", - i, info->sysinfo_3_2_2.vm[i].cpus_standby); - len += sprintf (page+len, "VM%02d CPUs Reserved: %d\n", - i, info->sysinfo_3_2_2.vm[i].cpus_reserved); - } - } - - free_page (info_page); + free_page (info); return len; } static __init int create_proc_sysinfo(void) { - create_proc_read_entry ("sysinfo", 0444, NULL, - proc_read_sysinfo, NULL); + create_proc_read_entry("sysinfo", 0444, NULL, + proc_read_sysinfo, NULL); return 0; } From 1375fc1fb0434a26f93c59b1b9f3fdb8bf90bba5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:12 +0200 Subject: [PATCH 0242/1063] [S390] __exit cleanup. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 2 +- arch/s390/kernel/vmlinux.lds.S | 2 +- drivers/s390/block/dasd_eer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index fee5aee605f6..75144efbb92b 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -535,7 +535,7 @@ __init int hypfs_diag_init(void) return rc; } -__exit void hypfs_diag_exit(void) +void hypfs_diag_exit(void) { diag224_delete_name_table(); diag204_free_buffer(); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index df0c16ab8e92..af9e69a03011 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -118,7 +118,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exitcall.exit) + *(.exit.text) *(.exit.data) *(.exitcall.exit) } /* Stabs debugging sections. */ diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index da65f1b032f5..e0bf30ebb215 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -678,7 +678,7 @@ int __init dasd_eer_init(void) return 0; } -void __exit dasd_eer_exit(void) +void dasd_eer_exit(void) { WARN_ON(misc_deregister(&dasd_eer_dev) != 0); } From e87bfe51b5ca2db99dd680bbb1e8fe3c94b607df Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:15 +0200 Subject: [PATCH 0243/1063] [S390] convert some assembler to C. Convert GET_IPL_DEVICE assembler macro to C function. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head.S | 59 -------------------------------------- arch/s390/kernel/head31.S | 43 ++++++++++++++++++--------- arch/s390/kernel/head64.S | 39 ++++++++++++++++--------- arch/s390/kernel/ipl.c | 4 +-- drivers/s390/cio/cio.c | 38 ++++++++++++++++++++++-- include/asm-s390/lowcore.h | 1 + include/asm-s390/setup.h | 8 ++++-- 7 files changed, 99 insertions(+), 93 deletions(-) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index a6e9bdb53591..0f1db268a8a9 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -481,65 +481,6 @@ start: .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff -.macro GET_IPL_DEVICE -.Lget_ipl_device: - l %r1,0xb8 # get sid - sll %r1,15 # test if subchannel is enabled - srl %r1,31 - ltr %r1,%r1 - bz 2f-.LPG1(%r13) # subchannel disabled - l %r1,0xb8 - la %r5,.Lipl_schib-.LPG1(%r13) - stsch 0(%r5) # get schib of subchannel - bnz 2f-.LPG1(%r13) # schib not available - tm 5(%r5),0x01 # devno valid? - bno 2f-.LPG1(%r13) - la %r6,ipl_parameter_flags-.LPG1(%r13) - oi 3(%r6),0x01 # set flag - la %r2,ipl_devno-.LPG1(%r13) - mvc 0(2,%r2),6(%r5) # store devno - tm 4(%r5),0x80 # qdio capable device? - bno 2f-.LPG1(%r13) - oi 3(%r6),0x02 # set flag - - # copy ipl parameters - - lhi %r0,4096 - l %r2,20(%r0) # get address of parameter list - lhi %r3,IPL_PARMBLOCK_ORIGIN - st %r3,20(%r0) - lhi %r4,1 - cr %r2,%r3 # start parameters < destination ? - jl 0f - lhi %r1,1 # copy direction is upwards - j 1f -0: lhi %r1,-1 # copy direction is downwards - ar %r2,%r0 - ar %r3,%r0 - ar %r2,%r1 - ar %r3,%r1 -1: mvc 0(1,%r3),0(%r2) # finally copy ipl parameters - ar %r3,%r1 - ar %r2,%r1 - sr %r0,%r4 - jne 1b - b 2f-.LPG1(%r13) - - .align 4 -.Lipl_schib: - .rept 13 - .long 0 - .endr - - .globl ipl_parameter_flags -ipl_parameter_flags: - .long 0 - .globl ipl_devno -ipl_devno: - .word 0 -2: -.endm - #ifdef CONFIG_64BIT #include "head64.S" #else diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index d8bb68a72527..1fa9fa1ca740 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -37,13 +37,23 @@ startup:basr %r13,0 # get base startup_continue: basr %r13,0 # get base -.LPG1: GET_IPL_DEVICE - mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +.LPG1: mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12) +# +# Setup stack +# + l %r15,.Linittu-.LPG1(%r13) + mvc __LC_CURRENT(4),__TI_task(%r15) + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE + st %r15,__LC_KERNEL_STACK # set end of kernel stack + ahi %r15,-96 + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain + l %r14,.Lipl_save_parameters-.LPG1(%r13) + basr %r14,%r14 # # clear bss memory # @@ -115,6 +125,10 @@ startup_continue: b .Lfchunk-.LPG1(%r13) .align 4 +.Lipl_save_parameters: + .long ipl_save_parameters +.Linittu: + .long init_thread_union .Lpmask: .byte 0 .align 8 @@ -274,6 +288,20 @@ startup_continue: .Lbss_end: .long _end .Lparmaddr: .long PARMAREA .Lsccbaddr: .long .Lsccb + + .globl ipl_schib +ipl_schib: + .rept 13 + .long 0 + .endr + + .globl ipl_flags +ipl_flags: + .long 0 + .globl ipl_devno +ipl_devno: + .word 0 + .org 0x12000 .globl s390_readinfo_sccb s390_readinfo_sccb: @@ -305,16 +333,6 @@ s390_readinfo_sccb: .globl _stext _stext: basr %r13,0 # get base .LPG3: -# -# Setup stack -# - l %r15,.Linittu-.LPG3(%r13) - mvc __LC_CURRENT(4),__TI_task(%r15) - ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE - st %r15,__LC_KERNEL_STACK # set end of kernel stack - ahi %r15,-96 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers stctl %c0,%c15,0(%r15) oi 2(%r15),0x40 # enable sigp emergency signal @@ -333,6 +351,5 @@ _stext: basr %r13,0 # get base # .align 8 .Ldw: .long 0x000a0000,0x00000000 -.Linittu:.long init_thread_union .Lstart:.long start_kernel .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index c2005101fee1..1ebaa338aa7e 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -39,7 +39,6 @@ startup_continue: basr %r13,0 # get base .LPG1: sll %r13,1 # remove high order bit srl %r13,1 - GET_IPL_DEVICE lhi %r1,1 # mode 1 = esame mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero @@ -49,7 +48,18 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13)# pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) +# +# Setup stack +# + larl %r15,init_thread_union + lg %r14,__TI_task(%r15) # cache current in lowcore + stg %r14,__LC_CURRENT + aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE + stg %r15,__LC_KERNEL_STACK # set end of kernel stack + aghi %r15,-160 + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain + brasl %r14,ipl_save_parameters # # clear bss memory # @@ -269,6 +279,19 @@ startup_continue: .Lparmaddr: .quad PARMAREA + .globl ipl_schib +ipl_schib: + .rept 13 + .long 0 + .endr + + .globl ipl_flags +ipl_flags: + .long 0 + .globl ipl_devno +ipl_devno: + .word 0 + .org 0x12000 .globl s390_readinfo_sccb s390_readinfo_sccb: @@ -300,24 +323,12 @@ s390_readinfo_sccb: .globl _stext _stext: basr %r13,0 # get base .LPG3: -# -# Setup stack -# - larl %r15,init_thread_union - lg %r14,__TI_task(%r15) # cache current in lowcore - stg %r14,__LC_CURRENT - aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE - stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-160 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers stctg %c0,%c15,0(%r15) oi 6(%r15),0x40 # enable sigp emergency signal oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) -# lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess brasl %r14,start_kernel # go to C code # @@ -325,7 +336,7 @@ _stext: basr %r13,0 # get base # basr %r13,0 lpswe .Ldw-.(%r13) # load disabled wait psw -# + .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 105ee15a2b31..6555cc48e28f 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -189,9 +189,9 @@ static enum ipl_type ipl_get_type(void) { struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - if (!IPL_DEVNO_VALID) + if (!(ipl_flags & IPL_DEVNO_VALID)) return IPL_TYPE_UNKNOWN; - if (!IPL_PARMBLOCK_VALID) + if (!(ipl_flags & IPL_PARMBLOCK_VALID)) return IPL_TYPE_CCW; if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) return IPL_TYPE_UNKNOWN; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 050963f15802..61eb7caa1567 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -16,11 +16,10 @@ #include #include #include - #include #include #include - +#include #include "airq.h" #include "cio.h" #include "css.h" @@ -909,3 +908,38 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) cio_reset_channel_paths(); do_reipl_asm(*((__u32*)&schid)); } + +extern struct schib ipl_schib; + +/* + * ipl_save_parameters gets called very early. It is not allowed to access + * anything in the bss section at all. The bss section is not cleared yet, + * but may contain some ipl parameters written by the firmware. + * These parameters (if present) are copied to 0x2000. + * To avoid corruption of the ipl parameters, all variables used by this + * function must reside on the stack or in the data section. + */ +void ipl_save_parameters(void) +{ + struct subchannel_id schid; + unsigned int *ipl_ptr; + void *src, *dst; + + schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID; + if (!schid.one) + return; + if (stsch(schid, &ipl_schib)) + return; + if (!ipl_schib.pmcw.dnv) + return; + ipl_devno = ipl_schib.pmcw.dev; + ipl_flags |= IPL_DEVNO_VALID; + if (!ipl_schib.pmcw.qf) + return; + ipl_flags |= IPL_PARMBLOCK_VALID; + ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR; + src = (void *)(unsigned long)*ipl_ptr; + dst = (void *)IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + *ipl_ptr = IPL_PARMBLOCK_ORIGIN; +} diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index 2e3d4cca5e21..18695d10dedf 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -35,6 +35,7 @@ #define __LC_IO_NEW_PSW 0x01f0 #endif /* !__s390x__ */ +#define __LC_IPL_PARMBLOCK_PTR 0x014 #define __LC_EXT_PARAMS 0x080 #define __LC_CPU_ADDRESS 0x084 #define __LC_EXT_INT_CODE 0x086 diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 4a1126d8439a..00c03e46689b 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -125,13 +125,15 @@ struct ipl_parameter_block { /* * IPL validity flags and parameters as detected in head.S */ -extern u32 ipl_parameter_flags; +extern u32 ipl_flags; extern u16 ipl_devno; void do_reipl(void); -#define IPL_DEVNO_VALID (ipl_parameter_flags & 1) -#define IPL_PARMBLOCK_VALID (ipl_parameter_flags & 2) +enum { + IPL_DEVNO_VALID = 1, + IPL_PARMBLOCK_VALID = 2, +}; #define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \ IPL_PARMBLOCK_ORIGIN) From 81388d2a45b89c890b981cfc83b01ec15ae3483b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:17 +0200 Subject: [PATCH 0244/1063] [S390] Missing initialization in common i/o layer. Previous patch that was intended to reduce stack usage within common i/o layer didn't consider implicit memset(..., 0, ...) used with the initializations used before. Add these missing memsets wherever it's not obvious that the concerned memory region is zeroed. This should give the same semantics as before. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 35e162ba6d54..7756f324fb6f 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -267,6 +267,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) notify = 1; } /* fill out sense information */ + memset(&cdev->id, 0, sizeof(cdev->id)); cdev->id.cu_type = cdev->private->senseid.cu_type; cdev->id.cu_model = cdev->private->senseid.cu_model; cdev->id.dev_type = cdev->private->senseid.dev_type; From 6981e936aa156c747bb3e6aea414bba673457115 Mon Sep 17 00:00:00 2001 From: Frank Pavlic Date: Wed, 20 Sep 2006 15:59:19 +0200 Subject: [PATCH 0245/1063] [S390] qdio slsb processing state. The last SLSB has to be set to STATE_PROCESSING if we really want to use the PROCESSING feature. Signed-off-by: Frank Pavlic Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 16e3715d5c0d..cde822d8b5c8 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -1129,7 +1129,7 @@ out: #ifdef QDIO_USE_PROCESSING_STATE if (last_position>=0) - set_slsb(q, &last_position, SLSB_P_INPUT_NOT_INIT, &count); + set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count); #endif /* QDIO_USE_PROCESSING_STATE */ QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int)); From 9514e2311be97a01e8669c4de78e9fea37489f09 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:22 +0200 Subject: [PATCH 0246/1063] [S390] Kernel stack overflow handling. Substract the size of the initial stack frame from the correct register. Otherwise we will end up in a program check loop. Fix the offset into the save area as well. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 8b956d1538f5..29bbfbab7332 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -827,7 +827,7 @@ restart_go: */ stack_overflow: lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r1,-SP_SIZE + aghi %r15,-SP_SIZE mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack la %r1,__LC_SAVE_AREA @@ -835,7 +835,7 @@ stack_overflow: je 0f chi %r12,__LC_PGM_OLD_PSW je 0f - la %r1,__LC_SAVE_AREA+16 + la %r1,__LC_SAVE_AREA+32 0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain la %r2,SP_PTREGS(%r15) # load pt_regs From 45af3af8761a3f790fe414c017de039a08ccd780 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 20 Sep 2006 15:59:24 +0200 Subject: [PATCH 0247/1063] [S390] fix typo in vmcp. Fix comment typo in vmcp, it is z/VM and not v/VM. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- drivers/s390/char/vmcp.c | 2 +- drivers/s390/char/vmcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 19762f3476aa..1678b6c757ec 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2004,2005 IBM Corporation - * Interface implementation for communication with the v/VM control program + * Interface implementation for communication with the z/VM control program * Author(s): Christian Borntraeger * * diff --git a/drivers/s390/char/vmcp.h b/drivers/s390/char/vmcp.h index 87389e730465..8a5975f3dad7 100644 --- a/drivers/s390/char/vmcp.h +++ b/drivers/s390/char/vmcp.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2004, 2005 IBM Corporation - * Interface implementation for communication with the v/VM control program + * Interface implementation for communication with the z/VM control program * Version 1.0 * Author(s): Christian Borntraeger * From 1f38d61347203055b55e34083cce7a9cd8c529a9 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 20 Sep 2006 15:59:26 +0200 Subject: [PATCH 0248/1063] [S390] cleanup appldata. Introduce asm header that contains the appldata data structures and the diag inline assembly. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata.h | 16 ------ arch/s390/appldata/appldata_base.c | 81 ++++----------------------- arch/s390/appldata/appldata_os.c | 1 + include/asm-s390/appldata.h | 90 ++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 85 deletions(-) create mode 100644 include/asm-s390/appldata.h diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h index 71d65eb30650..0429481dea63 100644 --- a/arch/s390/appldata/appldata.h +++ b/arch/s390/appldata/appldata.h @@ -29,22 +29,6 @@ #define CTL_APPLDATA_NET_SUM 2125 #define CTL_APPLDATA_PROC 2126 -#ifndef CONFIG_64BIT - -#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ -#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ -#define APPLDATA_GEN_EVENT_RECORD 0x02 -#define APPLDATA_START_CONFIG_REC 0x03 - -#else - -#define APPLDATA_START_INTERVAL_REC 0x80 -#define APPLDATA_STOP_REC 0x81 -#define APPLDATA_GEN_EVENT_RECORD 0x82 -#define APPLDATA_START_CONFIG_REC 0x83 - -#endif /* CONFIG_64BIT */ - #define P_INFO(x...) printk(KERN_INFO MY_PRINT_NAME " info: " x) #define P_ERROR(x...) printk(KERN_ERR MY_PRINT_NAME " error: " x) #define P_WARNING(x...) printk(KERN_WARNING MY_PRINT_NAME " status: " x) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index a0a94e0ef8d1..b69ed742f981 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -14,20 +14,20 @@ #include #include #include -#include -#include -#include #include #include #include #include #include #include -#include -//#include #include #include #include +#include +#include +#include +#include +#include #include "appldata.h" @@ -39,34 +39,6 @@ #define TOD_MICRO 0x01000 /* nr. of TOD clock units for 1 microsecond */ - -/* - * Parameter list for DIAGNOSE X'DC' - */ -#ifndef CONFIG_64BIT -struct appldata_parameter_list { - u16 diag; /* The DIAGNOSE code X'00DC' */ - u8 function; /* The function code for the DIAGNOSE */ - u8 parlist_length; /* Length of the parameter list */ - u32 product_id_addr; /* Address of the 16-byte product ID */ - u16 reserved; - u16 buffer_length; /* Length of the application data buffer */ - u32 buffer_addr; /* Address of the application data buffer */ -}; -#else -struct appldata_parameter_list { - u16 diag; - u8 function; - u8 parlist_length; - u32 unused01; - u16 reserved; - u16 buffer_length; - u32 unused02; - u64 product_id_addr; - u64 buffer_addr; -}; -#endif /* CONFIG_64BIT */ - /* * /proc entries (sysctl) */ @@ -181,46 +153,17 @@ static void appldata_work_fn(void *data) int appldata_diag(char record_nr, u16 function, unsigned long buffer, u16 length, char *mod_lvl) { - unsigned long ry; - struct appldata_product_id { - char prod_nr[7]; /* product nr. */ - char prod_fn[2]; /* product function */ - char record_nr; /* record nr. */ - char version_nr[2]; /* version */ - char release_nr[2]; /* release */ - char mod_lvl[2]; /* modification lvl. */ - } appldata_product_id = { - /* all strings are EBCDIC, record_nr is byte */ + struct appldata_product_id id = { .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, - 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ - .prod_fn = {0xD5, 0xD3}, /* "NL" */ + 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ + .prod_fn = 0xD5D3, /* "NL" */ .record_nr = record_nr, - .version_nr = {0xF2, 0xF6}, /* "26" */ - .release_nr = {0xF0, 0xF1}, /* "01" */ - .mod_lvl = {mod_lvl[0], mod_lvl[1]}, - }; - struct appldata_parameter_list appldata_parameter_list = { - .diag = 0xDC, - .function = function, - .parlist_length = - sizeof(appldata_parameter_list), - .buffer_length = length, - .product_id_addr = - (unsigned long) &appldata_product_id, - .buffer_addr = virt_to_phys((void *) buffer) + .version_nr = 0xF2F6, /* "26" */ + .release_nr = 0xF0F1, /* "01" */ + .mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1], }; - if (!MACHINE_IS_VM) - return -ENOSYS; - ry = -1; - asm volatile( - "diag %1,%0,0xDC\n\t" - : "=d" (ry) - : "d" (&appldata_parameter_list), - "m" (appldata_parameter_list), - "m" (appldata_product_id) - : "cc"); - return (int) ry; + return appldata_asm(&id, function, (void *) buffer, length); } /************************ timer, work, DIAG ****************************/ diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 161acc5c8a1b..76a15523ae9e 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "appldata.h" diff --git a/include/asm-s390/appldata.h b/include/asm-s390/appldata.h new file mode 100644 index 000000000000..b1770703b706 --- /dev/null +++ b/include/asm-s390/appldata.h @@ -0,0 +1,90 @@ +/* + * include/asm-s390/appldata.h + * + * Copyright (C) IBM Corp. 2006 + * + * Author(s): Melissa Howland + */ + +#ifndef _ASM_S390_APPLDATA_H +#define _ASM_S390_APPLDATA_H + +#include + +#ifndef CONFIG_64BIT + +#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ +#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ +#define APPLDATA_GEN_EVENT_REC 0x02 +#define APPLDATA_START_CONFIG_REC 0x03 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; /* The DIAGNOSE code X'00DC' */ + u8 function; /* The function code for the DIAGNOSE */ + u8 parlist_length; /* Length of the parameter list */ + u32 product_id_addr; /* Address of the 16-byte product ID */ + u16 reserved; + u16 buffer_length; /* Length of the application data buffer */ + u32 buffer_addr; /* Address of the application data buffer */ +} __attribute__ ((packed)); + +#else /* CONFIG_64BIT */ + +#define APPLDATA_START_INTERVAL_REC 0x80 +#define APPLDATA_STOP_REC 0x81 +#define APPLDATA_GEN_EVENT_REC 0x82 +#define APPLDATA_START_CONFIG_REC 0x83 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; + u8 function; + u8 parlist_length; + u32 unused01; + u16 reserved; + u16 buffer_length; + u32 unused02; + u64 product_id_addr; + u64 buffer_addr; +} __attribute__ ((packed)); + +#endif /* CONFIG_64BIT */ + +struct appldata_product_id { + char prod_nr[7]; /* product number */ + u16 prod_fn; /* product function */ + u8 record_nr; /* record number */ + u16 version_nr; /* version */ + u16 release_nr; /* release */ + u16 mod_lvl; /* modification level */ +} __attribute__ ((packed)); + +static inline int appldata_asm(struct appldata_product_id *id, + unsigned short fn, void *buffer, + unsigned short length) +{ + struct appldata_parameter_list parm_list; + int ry; + + if (!MACHINE_IS_VM) + return -ENOSYS; + parm_list.diag = 0xdc; + parm_list.function = fn; + parm_list.parlist_length = sizeof(parm_list); + parm_list.buffer_length = length; + parm_list.product_id_addr = (unsigned long) id; + parm_list.buffer_addr = virt_to_phys(buffer); + asm volatile( + "diag %1,%0,0xdc" + : "=d" (ry) + : "d" (&parm_list), "m" (parm_list), "m" (*id) + : "cc"); + return ry; +} + +#endif /* _ASM_S390_APPLDATA_H */ From 07d43ce6a2ba0bb914078c3b066a7a3bab57599d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Sep 2006 15:59:29 +0200 Subject: [PATCH 0249/1063] [S390] Remove kexec experimental flag. Follow other architectures and remove kexec experimental flag. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 76122ce1e6cb..b216ca659cdf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -460,8 +460,7 @@ config S390_HYPFS_FS information in an s390 hypervisor environment. config KEXEC - bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "kexec system call" help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot From e620c4940002348417e8d317d65bc7b152646493 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 20 Sep 2006 15:59:32 +0200 Subject: [PATCH 0250/1063] [S390] xpram off by one error. The xpram driver shows and uses 4096 bytes less than available. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- drivers/s390/block/xpram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index ca7d51f7eccc..cab2c736683a 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -453,7 +453,7 @@ static int __init xpram_init(void) PRINT_WARN("No expanded memory available\n"); return -ENODEV; } - xpram_pages = xpram_highest_page_index(); + xpram_pages = xpram_highest_page_index() + 1; PRINT_INFO(" %u pages expanded memory found (%lu KB).\n", xpram_pages, (unsigned long) xpram_pages*4); rc = xpram_setup_sizes(xpram_pages); From 31b58088292c7f00f0b81088bfb557285b0b6247 Mon Sep 17 00:00:00 2001 From: Melissa Howland Date: Wed, 20 Sep 2006 15:59:34 +0200 Subject: [PATCH 0251/1063] [S390] Linux API for writing z/VM APPLDATA Monitor records. This patch delivers a new Linux API in the form of a misc char device that is useable from user space and allows write access to the z/VM APPLDATA Monitor Records collected by the *MONITOR System Service of z/VM. Signed-off-by: Melissa Howland Signed-off-by: Martin Schwidefsky --- arch/s390/defconfig | 1 + drivers/s390/Kconfig | 6 + drivers/s390/char/Makefile | 1 + drivers/s390/char/monwriter.c | 292 ++++++++++++++++++++++++++++++++++ include/asm-s390/Kbuild | 2 +- include/asm-s390/monwriter.h | 33 ++++ 6 files changed, 334 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/char/monwriter.c create mode 100644 include/asm-s390/monwriter.h diff --git a/arch/s390/defconfig b/arch/s390/defconfig index f1d4591eddbb..35da53986b1b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -428,6 +428,7 @@ CONFIG_S390_TAPE_34XX=m # CONFIG_VMLOGRDR is not set # CONFIG_VMCP is not set # CONFIG_MONREADER is not set +CONFIG_MONWRITER=m # # Cryptographic devices diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig index bc4261e8b606..ae89b9b88743 100644 --- a/drivers/s390/Kconfig +++ b/drivers/s390/Kconfig @@ -213,6 +213,12 @@ config MONREADER help Character device driver for reading z/VM monitor service records +config MONWRITER + tristate "API for writing z/VM monitor service records" + default "m" + help + Character device driver for writing z/VM monitor service records + endmenu menu "Cryptographic devices" diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 0c0162ff6c0c..c3e97b4fc186 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_S390_TAPE) += tape.o tape_class.o obj-$(CONFIG_S390_TAPE_34XX) += tape_34xx.o obj-$(CONFIG_S390_TAPE_3590) += tape_3590.o obj-$(CONFIG_MONREADER) += monreader.o +obj-$(CONFIG_MONWRITER) += monwriter.o diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c new file mode 100644 index 000000000000..1e3939aeb8ab --- /dev/null +++ b/drivers/s390/char/monwriter.c @@ -0,0 +1,292 @@ +/* + * drivers/s390/char/monwriter.c + * + * Character device driver for writing z/VM *MONITOR service records. + * + * Copyright (C) IBM Corp. 2006 + * + * Author(s): Melissa Howland + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MONWRITE_MAX_DATALEN 4024 + +static int mon_max_bufs = 255; + +struct mon_buf { + struct list_head list; + struct monwrite_hdr hdr; + int diag_done; + char *data; +}; + +struct mon_private { + struct list_head list; + struct monwrite_hdr hdr; + size_t hdr_to_read; + size_t data_to_read; + struct mon_buf *current_buf; + int mon_buf_count; +}; + +/* + * helper functions + */ + +static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) +{ + struct appldata_product_id id; + int rc; + + strcpy(id.prod_nr, "LNXAPPL"); + id.prod_fn = myhdr->applid; + id.record_nr = myhdr->record_num; + id.version_nr = myhdr->version; + id.release_nr = myhdr->release; + id.mod_lvl = myhdr->mod_level; + rc = appldata_asm(&id, fcn, (void *) buffer, myhdr->datalen); + if (rc <= 0) + return rc; + if (rc == 5) + return -EPERM; + printk("DIAG X'DC' error with return code: %i\n", rc); + return -EINVAL; +} + +static inline struct mon_buf *monwrite_find_hdr(struct mon_private *monpriv, + struct monwrite_hdr *monhdr) +{ + struct mon_buf *entry, *next; + + list_for_each_entry_safe(entry, next, &monpriv->list, list) + if (entry->hdr.applid == monhdr->applid && + entry->hdr.record_num == monhdr->record_num && + entry->hdr.version == monhdr->version && + entry->hdr.release == monhdr->release && + entry->hdr.mod_level == monhdr->mod_level) + return entry; + return NULL; +} + +static int monwrite_new_hdr(struct mon_private *monpriv) +{ + struct monwrite_hdr *monhdr = &monpriv->hdr; + struct mon_buf *monbuf; + int rc; + + if (monhdr->datalen > MONWRITE_MAX_DATALEN || + monhdr->mon_function > MONWRITE_START_CONFIG || + monhdr->hdrlen != sizeof(struct monwrite_hdr)) + return -EINVAL; + monbuf = monwrite_find_hdr(monpriv, monhdr); + if (monbuf) { + if (monhdr->mon_function == MONWRITE_STOP_INTERVAL) { + monhdr->datalen = monbuf->hdr.datalen; + rc = monwrite_diag(monhdr, monbuf->data, + APPLDATA_STOP_REC); + list_del(&monbuf->list); + monpriv->mon_buf_count--; + kfree(monbuf->data); + kfree(monbuf); + monbuf = NULL; + } + } else { + if (monpriv->mon_buf_count >= mon_max_bufs) + return -ENOSPC; + monbuf = kzalloc(sizeof(struct mon_buf), GFP_KERNEL); + if (!monbuf) + return -ENOMEM; + monbuf->data = kzalloc(monbuf->hdr.datalen, + GFP_KERNEL | GFP_DMA); + if (!monbuf->data) { + kfree(monbuf); + return -ENOMEM; + } + monbuf->hdr = *monhdr; + list_add_tail(&monbuf->list, &monpriv->list); + monpriv->mon_buf_count++; + } + monpriv->current_buf = monbuf; + return 0; +} + +static int monwrite_new_data(struct mon_private *monpriv) +{ + struct monwrite_hdr *monhdr = &monpriv->hdr; + struct mon_buf *monbuf = monpriv->current_buf; + int rc = 0; + + switch (monhdr->mon_function) { + case MONWRITE_START_INTERVAL: + if (!monbuf->diag_done) { + rc = monwrite_diag(monhdr, monbuf->data, + APPLDATA_START_INTERVAL_REC); + monbuf->diag_done = 1; + } + break; + case MONWRITE_START_CONFIG: + if (!monbuf->diag_done) { + rc = monwrite_diag(monhdr, monbuf->data, + APPLDATA_START_CONFIG_REC); + monbuf->diag_done = 1; + } + break; + case MONWRITE_GEN_EVENT: + rc = monwrite_diag(monhdr, monbuf->data, + APPLDATA_GEN_EVENT_REC); + list_del(&monpriv->current_buf->list); + kfree(monpriv->current_buf->data); + kfree(monpriv->current_buf); + monpriv->current_buf = NULL; + break; + default: + /* monhdr->mon_function is checked in monwrite_new_hdr */ + BUG(); + } + return rc; +} + +/* + * file operations + */ + +static int monwrite_open(struct inode *inode, struct file *filp) +{ + struct mon_private *monpriv; + + monpriv = kzalloc(sizeof(struct mon_private), GFP_KERNEL); + if (!monpriv) + return -ENOMEM; + INIT_LIST_HEAD(&monpriv->list); + monpriv->hdr_to_read = sizeof(monpriv->hdr); + filp->private_data = monpriv; + return nonseekable_open(inode, filp); +} + +static int monwrite_close(struct inode *inode, struct file *filp) +{ + struct mon_private *monpriv = filp->private_data; + struct mon_buf *entry, *next; + + list_for_each_entry_safe(entry, next, &monpriv->list, list) { + if (entry->hdr.mon_function != MONWRITE_GEN_EVENT) + monwrite_diag(&entry->hdr, entry->data, + APPLDATA_STOP_REC); + monpriv->mon_buf_count--; + list_del(&entry->list); + kfree(entry->data); + kfree(entry); + } + kfree(monpriv); + return 0; +} + +static ssize_t monwrite_write(struct file *filp, const char __user *data, + size_t count, loff_t *ppos) +{ + struct mon_private *monpriv = filp->private_data; + size_t len, written; + void *to; + int rc; + + for (written = 0; written < count; ) { + if (monpriv->hdr_to_read) { + len = min(count - written, monpriv->hdr_to_read); + to = (char *) &monpriv->hdr + + sizeof(monpriv->hdr) - monpriv->hdr_to_read; + if (copy_from_user(to, data + written, len)) { + rc = -EFAULT; + goto out_error; + } + monpriv->hdr_to_read -= len; + written += len; + if (monpriv->hdr_to_read > 0) + continue; + rc = monwrite_new_hdr(monpriv); + if (rc) + goto out_error; + monpriv->data_to_read = monpriv->current_buf ? + monpriv->current_buf->hdr.datalen : 0; + } + + if (monpriv->data_to_read) { + len = min(count - written, monpriv->data_to_read); + to = monpriv->current_buf->data + + monpriv->hdr.datalen - monpriv->data_to_read; + if (copy_from_user(to, data + written, len)) { + rc = -EFAULT; + goto out_error; + } + monpriv->data_to_read -= len; + written += len; + if (monpriv->data_to_read > 0) + continue; + rc = monwrite_new_data(monpriv); + if (rc) + goto out_error; + } + monpriv->hdr_to_read = sizeof(monpriv->hdr); + } + return written; + +out_error: + monpriv->data_to_read = 0; + monpriv->hdr_to_read = sizeof(struct monwrite_hdr); + return rc; +} + +static struct file_operations monwrite_fops = { + .owner = THIS_MODULE, + .open = &monwrite_open, + .release = &monwrite_close, + .write = &monwrite_write, +}; + +static struct miscdevice mon_dev = { + .name = "monwriter", + .fops = &monwrite_fops, + .minor = MISC_DYNAMIC_MINOR, +}; + +/* + * module init/exit + */ + +static int __init mon_init(void) +{ + if (MACHINE_IS_VM) + return misc_register(&mon_dev); + else + return -ENODEV; +} + +static void __exit mon_exit(void) +{ + WARN_ON(misc_deregister(&mon_dev) != 0); +} + +module_init(mon_init); +module_exit(mon_exit); + +module_param_named(max_bufs, mon_max_bufs, int, 0644); +MODULE_PARM_DESC(max_bufs, "Maximum number of sample monitor data buffers" + "that can be active at one time"); + +MODULE_AUTHOR("Melissa Howland "); +MODULE_DESCRIPTION("Character device driver for writing z/VM " + "APPLDATA monitor records."); +MODULE_LICENSE("GPL"); diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild index ed8955f49e47..979145026a29 100644 --- a/include/asm-s390/Kbuild +++ b/include/asm-s390/Kbuild @@ -1,4 +1,4 @@ include include/asm-generic/Kbuild.asm unifdef-y += cmb.h debug.h -header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h +header-y += dasd.h monwriter.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h diff --git a/include/asm-s390/monwriter.h b/include/asm-s390/monwriter.h new file mode 100644 index 000000000000..f0cbf96c52e6 --- /dev/null +++ b/include/asm-s390/monwriter.h @@ -0,0 +1,33 @@ +/* + * include/asm-s390/monwriter.h + * + * Copyright (C) IBM Corp. 2006 + * Character device driver for writing z/VM APPLDATA monitor records + * Version 1.0 + * Author(s): Melissa Howland + * + */ + +#ifndef _ASM_390_MONWRITER_H +#define _ASM_390_MONWRITER_H + +/* mon_function values */ +#define MONWRITE_START_INTERVAL 0x00 /* start interval recording */ +#define MONWRITE_STOP_INTERVAL 0x01 /* stop interval or config recording */ +#define MONWRITE_GEN_EVENT 0x02 /* generate event record */ +#define MONWRITE_START_CONFIG 0x03 /* start configuration recording */ + +/* the header the app uses in its write() data */ +struct monwrite_hdr { + unsigned char mon_function; + unsigned short applid; + unsigned char record_num; + unsigned short version; + unsigned short release; + unsigned short mod_level; + unsigned short datalen; + unsigned char hdrlen; + +} __attribute__((packed)); + +#endif /* _ASM_390_MONWRITER_H */ From 9282ed929758b82f448a40d3c17319d794970624 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 20 Sep 2006 15:59:37 +0200 Subject: [PATCH 0252/1063] [S390] Cleanup in page table related code. Changed and simplified some page table related #defines and code. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/init.c | 36 +++++------ include/asm-s390/pgalloc.h | 65 ++++++++++---------- include/asm-s390/pgtable.h | 122 +++++++++++++++++-------------------- 3 files changed, 104 insertions(+), 119 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6e6b6de77770..cfd9b8f7a523 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -108,16 +108,23 @@ void __init paging_init(void) unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; static const int ssm_mask = 0x04000000L; unsigned long ro_start_pfn, ro_end_pfn; + unsigned long zones_size[MAX_NR_ZONES]; ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); + memset(zones_size, 0, sizeof(zones_size)); + zones_size[ZONE_DMA] = max_low_pfn; + free_area_init_node(0, &contig_page_data, zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, + zholes_size); + /* unmap whole virtual address space */ pg_dir = swapper_pg_dir; - for (i=0;ipgd0 = (_PAGE_TABLE | __pa(pg_table)); - pg_dir->pgd1 = (_PAGE_TABLE | (__pa(pg_table)+1024)); - pg_dir->pgd2 = (_PAGE_TABLE | (__pa(pg_table)+2048)); - pg_dir->pgd3 = (_PAGE_TABLE | (__pa(pg_table)+3072)); + pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table); pg_dir++; for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { @@ -143,8 +147,8 @@ void __init paging_init(void) else pte = pfn_pte(pfn, PAGE_KERNEL); if (pfn >= max_low_pfn) - pte_clear(&init_mm, 0, &pte); - set_pte(pg_table, pte); + pte_val(pte) = _PAGE_TYPE_EMPTY; + set_pte(pg_table, pte); pfn++; } } @@ -159,16 +163,6 @@ void __init paging_init(void) : : "m" (pgdir_k), "m" (ssm_mask)); local_flush_tlb(); - - { - unsigned long zones_size[MAX_NR_ZONES]; - - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_DMA] = max_low_pfn; - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, - zholes_size); - } return; } @@ -236,10 +230,8 @@ void __init paging_init(void) pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); else pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) { - pte_clear(&init_mm, 0, &pte); - continue; - } + if (pfn >= max_low_pfn) + pte_val(pte) = _PAGE_TYPE_EMPTY; set_pte(pt_dir, pte); pfn++; } diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index a78e853e0dd5..803bc7064418 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -21,6 +21,16 @@ extern void diag10(unsigned long addr); +/* + * Page allocation orders. + */ +#ifndef __s390x__ +# define PGD_ALLOC_ORDER 1 +#else /* __s390x__ */ +# define PMD_ALLOC_ORDER 2 +# define PGD_ALLOC_ORDER 2 +#endif /* __s390x__ */ + /* * Allocate and free page tables. The xxx_kernel() versions are * used to allocate a kernel page table - this turns on ASN bits @@ -29,30 +39,23 @@ extern void diag10(unsigned long addr); static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *pgd; + pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER); int i; + if (!pgd) + return NULL; + for (i = 0; i < PTRS_PER_PGD; i++) #ifndef __s390x__ - pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1); - if (pgd != NULL) - for (i = 0; i < USER_PTRS_PER_PGD; i++) - pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE)); -#else /* __s390x__ */ - pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2); - if (pgd != NULL) - for (i = 0; i < PTRS_PER_PGD; i++) - pgd_clear(pgd + i); -#endif /* __s390x__ */ + pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE)); +#else + pgd_clear(pgd + i); +#endif return pgd; } static inline void pgd_free(pgd_t *pgd) { -#ifndef __s390x__ - free_pages((unsigned long) pgd, 1); -#else /* __s390x__ */ - free_pages((unsigned long) pgd, 2); -#endif /* __s390x__ */ + free_pages((unsigned long) pgd, PGD_ALLOC_ORDER); } #ifndef __s390x__ @@ -68,20 +71,19 @@ static inline void pgd_free(pgd_t *pgd) #else /* __s390x__ */ static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - pmd_t *pmd; - int i; + pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER); + int i; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2); - if (pmd != NULL) { - for (i=0; i < PTRS_PER_PMD; i++) - pmd_clear(pmd+i); - } + if (!pmd) + return NULL; + for (i=0; i < PTRS_PER_PMD; i++) + pmd_clear(pmd + i); return pmd; } static inline void pmd_free (pmd_t *pmd) { - free_pages((unsigned long) pmd, 2); + free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); } #define __pmd_free_tlb(tlb,pmd) \ @@ -123,15 +125,14 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr) { - pte_t *pte; - int i; + pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); + int i; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - if (pte != NULL) { - for (i=0; i < PTRS_PER_PTE; i++) { - pte_clear(mm, vmaddr, pte+i); - vmaddr += PAGE_SIZE; - } + if (!pte) + return NULL; + for (i=0; i < PTRS_PER_PTE; i++) { + pte_clear(mm, vmaddr, pte + i); + vmaddr += PAGE_SIZE; } return pte; } diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 24312387fa24..1a07028d575e 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -89,19 +89,6 @@ extern char empty_zero_page[PAGE_SIZE]; # define PTRS_PER_PGD 2048 #endif /* __s390x__ */ -/* - * pgd entries used up by user/kernel: - */ -#ifndef __s390x__ -# define USER_PTRS_PER_PGD 512 -# define USER_PGD_PTRS 512 -# define KERNEL_PGD_PTRS 512 -#else /* __s390x__ */ -# define USER_PTRS_PER_PGD 2048 -# define USER_PGD_PTRS 2048 -# define KERNEL_PGD_PTRS 2048 -#endif /* __s390x__ */ - #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ @@ -216,12 +203,14 @@ extern char empty_zero_page[PAGE_SIZE]; #define _PAGE_RO 0x200 /* HW read-only */ #define _PAGE_INVALID 0x400 /* HW invalid */ -/* Mask and four different kinds of invalid pages. */ -#define _PAGE_INVALID_MASK 0x601 -#define _PAGE_INVALID_EMPTY 0x400 -#define _PAGE_INVALID_NONE 0x401 -#define _PAGE_INVALID_SWAP 0x600 -#define _PAGE_INVALID_FILE 0x601 +/* Mask and six different types of pages. */ +#define _PAGE_TYPE_MASK 0x601 +#define _PAGE_TYPE_EMPTY 0x400 +#define _PAGE_TYPE_NONE 0x401 +#define _PAGE_TYPE_SWAP 0x600 +#define _PAGE_TYPE_FILE 0x601 +#define _PAGE_TYPE_RO 0x200 +#define _PAGE_TYPE_RW 0x000 #ifndef __s390x__ @@ -280,15 +269,14 @@ extern char empty_zero_page[PAGE_SIZE]; #endif /* __s390x__ */ /* - * No mapping available + * Page protection definitions. */ -#define PAGE_NONE_SHARED __pgprot(_PAGE_INVALID_NONE) -#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE) -#define PAGE_RO_SHARED __pgprot(_PAGE_RO) -#define PAGE_RO_PRIVATE __pgprot(_PAGE_RO) -#define PAGE_COPY __pgprot(_PAGE_RO) -#define PAGE_SHARED __pgprot(0) -#define PAGE_KERNEL __pgprot(0) +#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) +#define PAGE_RO __pgprot(_PAGE_TYPE_RO) +#define PAGE_RW __pgprot(_PAGE_TYPE_RW) + +#define PAGE_KERNEL PAGE_RW +#define PAGE_COPY PAGE_RO /* * The S390 can't do page protection for execute, and considers that the @@ -296,23 +284,23 @@ extern char empty_zero_page[PAGE_SIZE]; * the closest we can get.. */ /*xwr*/ -#define __P000 PAGE_NONE_PRIVATE -#define __P001 PAGE_RO_PRIVATE -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_RO_PRIVATE -#define __P101 PAGE_RO_PRIVATE -#define __P110 PAGE_COPY -#define __P111 PAGE_COPY +#define __P000 PAGE_NONE +#define __P001 PAGE_RO +#define __P010 PAGE_RO +#define __P011 PAGE_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO -#define __S000 PAGE_NONE_SHARED -#define __S001 PAGE_RO_SHARED -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_RO_SHARED -#define __S101 PAGE_RO_SHARED -#define __S110 PAGE_SHARED -#define __S111 PAGE_SHARED +#define __S000 PAGE_NONE +#define __S001 PAGE_RO +#define __S010 PAGE_RW +#define __S011 PAGE_RW +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW /* * Certain architectures need to do special things when PTEs @@ -377,18 +365,18 @@ static inline int pmd_bad(pmd_t pmd) static inline int pte_none(pte_t pte) { - return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_EMPTY; + return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_EMPTY; } static inline int pte_present(pte_t pte) { return !(pte_val(pte) & _PAGE_INVALID) || - (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_NONE; + (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_NONE; } static inline int pte_file(pte_t pte) { - return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_FILE; + return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_FILE; } #define pte_same(a,b) (pte_val(a) == pte_val(b)) @@ -461,7 +449,7 @@ static inline void pmd_clear(pmd_t * pmdp) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_INVALID_EMPTY; + pte_val(*ptep) = _PAGE_TYPE_EMPTY; } /* @@ -477,7 +465,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) static inline pte_t pte_wrprotect(pte_t pte) { - /* Do not clobber _PAGE_INVALID_NONE pages! */ + /* Do not clobber _PAGE_TYPE_NONE pages! */ if (!(pte_val(pte) & _PAGE_INVALID)) pte_val(pte) |= _PAGE_RO; return pte; @@ -556,26 +544,30 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return pte; } +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + if (!(pte_val(*ptep) & _PAGE_INVALID)) { +#ifndef __s390x__ + /* S390 has 1mb segments, we are emulating 4MB segments */ + pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); +#else + /* ipte in zarch mode can do the math */ + pte_t *pto = ptep; +#endif + asm volatile ("ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), + "a" (pto), "a" (address) ); + } + pte_val(*ptep) = _PAGE_TYPE_EMPTY; +} + static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { pte_t pte = *ptep; -#ifndef __s390x__ - if (!(pte_val(pte) & _PAGE_INVALID)) { - /* S390 has 1mb segments, we are emulating 4MB segments */ - pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); - __asm__ __volatile__ ("ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (pto), "a" (address) ); - } -#else /* __s390x__ */ - if (!(pte_val(pte) & _PAGE_INVALID)) - __asm__ __volatile__ ("ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (ptep), "a" (address) ); -#endif /* __s390x__ */ - pte_val(*ptep) = _PAGE_INVALID_EMPTY; + + __ptep_ipte(address, ptep); return pte; } @@ -755,7 +747,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_INVALID_SWAP | ((type & 0x1f) << 2) | + pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); return pte; } @@ -778,7 +770,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define pgoff_to_pte(__off) \ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_INVALID_FILE }) + | _PAGE_TYPE_FILE }) #endif /* !__ASSEMBLY__ */ From 6837a8c352efcc5efc70424e9bfd94ff9bfa9a47 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 20 Sep 2006 15:59:39 +0200 Subject: [PATCH 0253/1063] [S390] Cleanup in signal handling code. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/signal.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index dd05423f87a8..642095ec7c07 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -114,29 +114,26 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { unsigned long old_mask = regs->psw.mask; - int err; - + _sigregs user_sregs; + save_access_regs(current->thread.acrs); /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ regs->psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask); - err = __copy_to_user(&sregs->regs.psw, ®s->psw, - sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs)); + memcpy(&user_sregs.regs.psw, ®s->psw, sizeof(sregs->regs.psw) + + sizeof(sregs->regs.gprs)); regs->psw.mask = old_mask; - if (err != 0) - return err; - err = __copy_to_user(&sregs->regs.acrs, current->thread.acrs, - sizeof(sregs->regs.acrs)); - if (err != 0) - return err; + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(sregs->regs.acrs)); /* * We have to store the fp registers to current->thread.fp_regs * to merge them with the emulated registers. */ save_fp_regs(¤t->thread.fp_regs); - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(s390_fp_regs)); + return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs)); } /* Returns positive number on error */ @@ -144,27 +141,25 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { unsigned long old_mask = regs->psw.mask; int err; + _sigregs user_sregs; /* Alwys make any pending restarted system call return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(®s->psw, &sregs->regs.psw, - sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs)); + err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); regs->psw.mask = PSW_MASK_MERGE(old_mask, regs->psw.mask); regs->psw.addr |= PSW_ADDR_AMODE; if (err) return err; - err = __copy_from_user(¤t->thread.acrs, &sregs->regs.acrs, - sizeof(sregs->regs.acrs)); - if (err) - return err; + memcpy(®s->psw, &user_sregs.regs.psw, sizeof(sregs->regs.psw) + + sizeof(sregs->regs.gprs)); + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(sregs->regs.acrs)); restore_access_regs(current->thread.acrs); - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(s390_fp_regs)); + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(s390_fp_regs)); current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; restore_fp_regs(¤t->thread.fp_regs); regs->trap = -1; /* disable syscall checks */ From d02765d1af743567398eb6d523dea0ba5e5e7e8e Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 20 Sep 2006 15:59:42 +0200 Subject: [PATCH 0254/1063] [S390] Make user-copy operations run-time configurable. Introduces a struct uaccess_ops which allows setting user-copy operations at run-time. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/s390_ksyms.c | 6 - arch/s390/kernel/setup.c | 7 + arch/s390/lib/Makefile | 3 +- arch/s390/lib/uaccess.S | 211 --------------------- arch/s390/lib/uaccess64.S | 207 --------------------- arch/s390/lib/uaccess_std.c | 340 ++++++++++++++++++++++++++++++++++ include/asm-s390/futex.h | 87 +-------- include/asm-s390/uaccess.h | 171 ++++++----------- 8 files changed, 411 insertions(+), 621 deletions(-) delete mode 100644 arch/s390/lib/uaccess.S delete mode 100644 arch/s390/lib/uaccess64.S create mode 100644 arch/s390/lib/uaccess_std.c diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index c73a45467fa4..9f19e833a562 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -25,12 +25,6 @@ EXPORT_SYMBOL(_oi_bitmap); EXPORT_SYMBOL(_ni_bitmap); EXPORT_SYMBOL(_zb_findmap); EXPORT_SYMBOL(_sb_findmap); -EXPORT_SYMBOL(__copy_from_user_asm); -EXPORT_SYMBOL(__copy_to_user_asm); -EXPORT_SYMBOL(__copy_in_user_asm); -EXPORT_SYMBOL(__clear_user_asm); -EXPORT_SYMBOL(__strncpy_from_user_asm); -EXPORT_SYMBOL(__strnlen_user_asm); EXPORT_SYMBOL(diag10); /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f2a9165ca4f8..e229af59976c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -50,6 +50,12 @@ #include #include +/* + * User copy operations. + */ +struct uaccess_ops uaccess; +EXPORT_SYMBOL_GPL(uaccess); + /* * Machine setup.. */ @@ -641,6 +647,7 @@ setup_arch(char **cmdline_p) memory_end = memory_size; + memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); parse_early_param(); #ifndef CONFIG_64BIT diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index e05d087a6eae..96c82424d88b 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,6 +4,5 @@ EXTRA_AFLAGS := -traditional -lib-y += delay.o string.o -lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o) +lib-y += delay.o string.o uaccess_std.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/uaccess.S b/arch/s390/lib/uaccess.S deleted file mode 100644 index 837275284d9f..000000000000 --- a/arch/s390/lib/uaccess.S +++ /dev/null @@ -1,211 +0,0 @@ -/* - * arch/s390/lib/uaccess.S - * __copy_{from|to}_user functions. - * - * s390 - * Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * These functions have standard call interface - */ - -#include -#include -#include - - .text - .align 4 - .globl __copy_from_user_asm - # %r2 = to, %r3 = n, %r4 = from -__copy_from_user_asm: - slr %r0,%r0 -0: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1f - slr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - ahi %r3,-256 -2: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1b -3: slr %r2,%r2 - br %r14 -4: lhi %r0,-4096 - lr %r5,%r4 - slr %r5,%r0 - nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slr %r5,%r4 # %r5 = #bytes to next user page boundary - clr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcp 0(%r5,%r2),0(%r4),%r0 - slr %r3,%r5 -6: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 0b,4b - .long 2b,4b - .long 5b,6b - .previous - - .align 4 - .text - .globl __copy_to_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_to_user_asm: - slr %r0,%r0 -0: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1f - slr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - ahi %r3,-256 -2: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1b -3: slr %r2,%r2 - br %r14 -4: lhi %r0,-4096 - lr %r5,%r4 - slr %r5,%r0 - nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slr %r5,%r4 # %r5 = #bytes to next user page boundary - clr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcs 0(%r5,%r4),0(%r2),%r0 - slr %r3,%r5 -6: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 0b,4b - .long 2b,4b - .long 5b,6b - .previous - - .align 4 - .text - .globl __copy_in_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_in_user_asm: - ahi %r3,-1 - jo 6f - sacf 256 - bras %r1,4f -0: ahi %r3,257 -1: mvc 0(1,%r4),0(%r2) - la %r2,1(%r2) - la %r4,1(%r4) - ahi %r3,-1 - jnz 1b -2: lr %r2,%r3 - br %r14 -3: mvc 0(256,%r4),0(%r2) - la %r2,256(%r2) - la %r4,256(%r4) -4: ahi %r3,-256 - jnm 3b -5: ex %r3,4(%r1) - sacf 0 -6: slr %r2,%r2 - br %r14 - .section __ex_table,"a" - .long 1b,2b - .long 3b,0b - .long 5b,0b - .previous - - .align 4 - .text - .globl __clear_user_asm - # %r2 = to, %r3 = n -__clear_user_asm: - bras %r5,0f - .long empty_zero_page -0: l %r5,0(%r5) - slr %r0,%r0 -1: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2f - slr %r2,%r2 - br %r14 -2: la %r2,256(%r2) - ahi %r3,-256 -3: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2b -4: slr %r2,%r2 - br %r14 -5: lhi %r0,-4096 - lr %r4,%r2 - slr %r4,%r0 - nr %r4,%r0 # %r4 = (%r2 + 4096) & -4096 - slr %r4,%r2 # %r4 = #bytes to next user page boundary - clr %r3,%r4 # clear crosses next page boundary ? - jnh 7f # no, the current page faulted - # clear with the reduced length which is < 256 -6: mvcs 0(%r4,%r2),0(%r5),%r0 - slr %r3,%r4 -7: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 1b,5b - .long 3b,5b - .long 6b,7b - .previous - - .align 4 - .text - .globl __strncpy_from_user_asm - # %r2 = count, %r3 = dst, %r4 = src -__strncpy_from_user_asm: - lhi %r0,0 - lr %r1,%r4 - la %r4,0(%r4) # clear high order bit from %r4 - la %r2,0(%r2,%r4) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - lr %r1,%r2 - jh 1f # \0 found in string ? - ahi %r1,1 # include \0 in copy -1: slr %r1,%r4 # %r1 = copy length (without \0) - slr %r2,%r4 # %r2 = return length (including \0) -2: mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3f - br %r14 -3: la %r3,256(%r3) - la %r4,256(%r4) - ahi %r1,-256 - mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3b - br %r14 -4: sacf 0 - lhi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .long 0b,4b - .previous - - .align 4 - .text - .globl __strnlen_user_asm - # %r2 = count, %r3 = src -__strnlen_user_asm: - lhi %r0,0 - lr %r1,%r3 - la %r3,0(%r3) # clear high order bit from %r4 - la %r2,0(%r2,%r3) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - ahi %r2,1 # strnlen_user result includes the \0 - # or return count+1 if \0 not found - slr %r2,%r3 - br %r14 -2: sacf 0 - slr %r2,%r2 # return 0 on exception - br %r14 - .section __ex_table,"a" - .long 0b,2b - .previous diff --git a/arch/s390/lib/uaccess64.S b/arch/s390/lib/uaccess64.S deleted file mode 100644 index 1f755be22f92..000000000000 --- a/arch/s390/lib/uaccess64.S +++ /dev/null @@ -1,207 +0,0 @@ -/* - * arch/s390x/lib/uaccess.S - * __copy_{from|to}_user functions. - * - * s390 - * Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * These functions have standard call interface - */ - -#include -#include -#include - - .text - .align 4 - .globl __copy_from_user_asm - # %r2 = to, %r3 = n, %r4 = from -__copy_from_user_asm: - slgr %r0,%r0 -0: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1f - slgr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - aghi %r3,-256 -2: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1b -3: slgr %r2,%r2 - br %r14 -4: lghi %r0,-4096 - lgr %r5,%r4 - slgr %r5,%r0 - ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slgr %r5,%r4 # %r5 = #bytes to next user page boundary - clgr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcp 0(%r5,%r2),0(%r4),%r0 - slgr %r3,%r5 -6: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .quad 2b,4b - .quad 5b,6b - .previous - - .align 4 - .text - .globl __copy_to_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_to_user_asm: - slgr %r0,%r0 -0: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1f - slgr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - aghi %r3,-256 -2: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1b -3: slgr %r2,%r2 - br %r14 -4: lghi %r0,-4096 - lgr %r5,%r4 - slgr %r5,%r0 - ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slgr %r5,%r4 # %r5 = #bytes to next user page boundary - clgr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcs 0(%r5,%r4),0(%r2),%r0 - slgr %r3,%r5 -6: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .quad 2b,4b - .quad 5b,6b - .previous - - .align 4 - .text - .globl __copy_in_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_in_user_asm: - aghi %r3,-1 - jo 6f - sacf 256 - bras %r1,4f -0: aghi %r3,257 -1: mvc 0(1,%r4),0(%r2) - la %r2,1(%r2) - la %r4,1(%r4) - aghi %r3,-1 - jnz 1b -2: lgr %r2,%r3 - br %r14 -3: mvc 0(256,%r4),0(%r2) - la %r2,256(%r2) - la %r4,256(%r4) -4: aghi %r3,-256 - jnm 3b -5: ex %r3,4(%r1) - sacf 0 -6: slgr %r2,%r2 - br 14 - .section __ex_table,"a" - .quad 1b,2b - .quad 3b,0b - .quad 5b,0b - .previous - - .align 4 - .text - .globl __clear_user_asm - # %r2 = to, %r3 = n -__clear_user_asm: - slgr %r0,%r0 - larl %r5,empty_zero_page -1: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2f - slgr %r2,%r2 - br %r14 -2: la %r2,256(%r2) - aghi %r3,-256 -3: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2b -4: slgr %r2,%r2 - br %r14 -5: lghi %r0,-4096 - lgr %r4,%r2 - slgr %r4,%r0 - ngr %r4,%r0 # %r4 = (%r2 + 4096) & -4096 - slgr %r4,%r2 # %r4 = #bytes to next user page boundary - clgr %r3,%r4 # clear crosses next page boundary ? - jnh 7f # no, the current page faulted - # clear with the reduced length which is < 256 -6: mvcs 0(%r4,%r2),0(%r5),%r0 - slgr %r3,%r4 -7: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 1b,5b - .quad 3b,5b - .quad 6b,7b - .previous - - .align 4 - .text - .globl __strncpy_from_user_asm - # %r2 = count, %r3 = dst, %r4 = src -__strncpy_from_user_asm: - lghi %r0,0 - lgr %r1,%r4 - la %r2,0(%r2,%r4) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - lgr %r1,%r2 - jh 1f # \0 found in string ? - aghi %r1,1 # include \0 in copy -1: slgr %r1,%r4 # %r1 = copy length (without \0) - slgr %r2,%r4 # %r2 = return length (including \0) -2: mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3f - br %r14 -3: la %r3,256(%r3) - la %r4,256(%r4) - aghi %r1,-256 - mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3b - br %r14 -4: sacf 0 - lghi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .previous - - .align 4 - .text - .globl __strnlen_user_asm - # %r2 = count, %r3 = src -__strnlen_user_asm: - lghi %r0,0 - lgr %r1,%r3 - la %r2,0(%r2,%r3) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - aghi %r2,1 # strnlen_user result includes the \0 - # or return count+1 if \0 not found - slgr %r2,%r3 - br %r14 -2: sacf 0 - slgr %r2,%r2 # return 0 on exception - br %r14 - .section __ex_table,"a" - .quad 0b,2b - .previous diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c new file mode 100644 index 000000000000..9a4d4a29ea79 --- /dev/null +++ b/arch/s390/lib/uaccess_std.c @@ -0,0 +1,340 @@ +/* + * arch/s390/lib/uaccess_std.c + * + * Standard user space access functions based on mvcp/mvcs and doing + * interesting things in the secondary space mode. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include +#include +#include +#include + +#ifndef __s390x__ +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + "0: mvcp 0(%0,%2),0(%1),%3\n" + " jz 5f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcp 0(%0,%2),0(%1),%3\n" + " jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "4: mvcp 0(%4,%2),0(%1),%3\n" + " "SLR" %0,%4\n" + " j 6f\n" + "5:"SLR" %0,%0\n" + "6: \n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t copy_from_user_std_small(size_t size, const void __user *ptr, void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = 0UL; + asm volatile( + "0: mvcp 0(%0,%2),0(%1),%3\n" + " "SLR" %0,%0\n" + " j 3f\n" + "1: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 3f\n" + "2: mvcp 0(%4,%2),0(%1),%3\n" + " "SLR" %0,%4\n" + "3:\n" + EX_TABLE(0b,1b) EX_TABLE(2b,3b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + "0: mvcs 0(%0,%1),0(%2),%3\n" + " jz 5f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcs 0(%0,%1),0(%2),%3\n" + " jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "4: mvcs 0(%4,%1),0(%2),%3\n" + " "SLR" %0,%4\n" + " j 6f\n" + "5:"SLR" %0,%0\n" + "6: \n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t copy_to_user_std_small(size_t size, void __user *ptr, const void *x) +{ + unsigned long tmp1, tmp2; + + tmp1 = 0UL; + asm volatile( + "0: mvcs 0(%0,%1),0(%2),%3\n" + " "SLR" %0,%0\n" + " j 3f\n" + "1: la %4,255(%1)\n" /* ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* (ptr + 255) & -4096UL */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 3f\n" + "2: mvcs 0(%4,%1),0(%2),%3\n" + " "SLR" %0,%4\n" + "3:\n" + EX_TABLE(0b,1b) EX_TABLE(2b,3b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t copy_in_user_std(size_t size, void __user *to, const void __user *from) +{ + unsigned long tmp1; + + asm volatile( + " "AHI" %0,-1\n" + " jo 5f\n" + " sacf 256\n" + " bras %3,3f\n" + "0:"AHI" %0,257\n" + "1: mvc 0(1,%1),0(%2)\n" + " la %1,1(%1)\n" + " la %2,1(%2)\n" + " "AHI" %0,-1\n" + " jnz 1b\n" + " j 5f\n" + "2: mvc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,1b-0b(%3)\n" + " sacf 0\n" + "5: "SLR" %0,%0\n" + "6:\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) + : : "cc", "memory"); + return size; +} + +size_t clear_user_std(size_t size, void __user *to) +{ + unsigned long tmp1, tmp2; + + asm volatile( + " "AHI" %0,-1\n" + " jo 5f\n" + " sacf 256\n" + " bras %3,3f\n" + " xc 0(1,%1),0(%1)\n" + "0:"AHI" %0,257\n" + " la %2,255(%1)\n" /* %2 = ptr + 255 */ + " srl %2,12\n" + " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ + " "SLR" %2,%1\n" + " "CLR" %0,%2\n" /* clear crosses next page boundary? */ + " jnh 5f\n" + " "AHI" %2,-1\n" + "1: ex %2,0(%3)\n" + " "AHI" %2,1\n" + " "SLR" %0,%2\n" + " j 5f\n" + "2: xc 0(256,%1),0(%1)\n" + " la %1,256(%1)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,0(%3)\n" + " sacf 0\n" + "5: "SLR" %0,%0\n" + "6:\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +size_t strnlen_user_std(size_t size, const char __user *src) +{ + register unsigned long reg0 asm("0") = 0UL; + unsigned long tmp1, tmp2; + + asm volatile( + " la %2,0(%1)\n" + " la %3,0(%0,%1)\n" + " "SLR" %0,%0\n" + " sacf 256\n" + "0: srst %3,%2\n" + " jo 0b\n" + " la %0,1(%3)\n" /* strnlen_user results includes \0 */ + " "SLR" %0,%1\n" + "1: sacf 0\n" + EX_TABLE(0b,1b) + : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst) +{ + register unsigned long reg0 asm("0") = 0UL; + unsigned long tmp1, tmp2; + + asm volatile( + " la %3,0(%1)\n" + " la %4,0(%0,%1)\n" + " sacf 256\n" + "0: srst %4,%3\n" + " jo 0b\n" + " sacf 0\n" + " la %0,0(%4)\n" + " jh 1f\n" /* found \0 in string ? */ + " "AHI" %4,1\n" /* include \0 in copy */ + "1:"SLR" %0,%1\n" /* %0 = return length (without \0) */ + " "SLR" %4,%1\n" /* %4 = copy length (including \0) */ + "2: mvcp 0(%4,%2),0(%1),%5\n" + " jz 9f\n" + "3:"AHI" %4,-256\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "4: mvcp 0(%4,%2),0(%1),%5\n" + " jnz 3b\n" + " j 9f\n" + "7: sacf 0\n" + "8:"LHI" %0,%6\n" + "9:\n" + EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b) + : "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0), "K" (-EFAULT) : "cc", "memory"); + return size; +} + +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile( \ + " sacf 256\n" \ + "0: l %1,0(%6)\n" \ + "1:"insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 0\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc"); + +int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) +{ + int oldval = 0, newval, ret; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + dec_preempt_count(); + *old = oldval; + return ret; +} + +int futex_atomic_cmpxchg(int __user *uaddr, int oldval, int newval) +{ + int ret; + + asm volatile( + " sacf 256\n" + " cs %1,%4,0(%5)\n" + "0: lr %0,%1\n" + "1: sacf 0\n" + EX_TABLE(0b,1b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory" ); + return ret; +} + +struct uaccess_ops uaccess_std = { + .copy_from_user = copy_from_user_std, + .copy_from_user_small = copy_from_user_std_small, + .copy_to_user = copy_to_user_std, + .copy_to_user_small = copy_to_user_std_small, + .copy_in_user = copy_in_user_std, + .clear_user = clear_user_std, + .strnlen_user = strnlen_user_std, + .strncpy_from_user = strncpy_from_user_std, + .futex_atomic_op = futex_atomic_op, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg, +}; diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h index ffedf14f89f6..5e261e1de671 100644 --- a/include/asm-s390/futex.h +++ b/include/asm-s390/futex.h @@ -7,75 +7,21 @@ #include #include -#ifndef __s390x__ -#define __futex_atomic_fixup \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b,2b,4b,3b,4b\n" \ - ".previous" -#else /* __s390x__ */ -#define __futex_atomic_fixup \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 0b,4b,2b,4b,3b,4b\n" \ - ".previous" -#endif /* __s390x__ */ - -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile(" sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1: " insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 0\n" \ - __futex_atomic_fixup \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc" ); - static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, newval, ret; + int oldval, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - - dec_preempt_count(); + ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval); if (!ret) { switch (cmp) { @@ -91,32 +37,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) return ret; } -static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, + int oldval, int newval) { - int ret; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - asm volatile(" sacf 256\n" - " cs %1,%4,0(%5)\n" - "0: lr %0,%1\n" - "1: sacf 0\n" -#ifndef __s390x__ - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,1b\n" - ".previous" -#else /* __s390x__ */ - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 0b,1b\n" - ".previous" -#endif /* __s390x__ */ - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) - : "cc", "memory" ); - return oldval; + + return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h index 0b7c0ca4c3d7..39a2716ae188 100644 --- a/include/asm-s390/uaccess.h +++ b/include/asm-s390/uaccess.h @@ -47,7 +47,7 @@ S390_lowcore.user_asce : S390_lowcore.kernel_asce; \ asm volatile ("lctlg 7,7,%0" : : "m" (__pto) ); \ }) -#else +#else /* __s390x__ */ #define set_fs(x) \ ({ \ unsigned long __pto; \ @@ -56,7 +56,7 @@ S390_lowcore.user_asce : S390_lowcore.kernel_asce; \ asm volatile ("lctl 7,7,%0" : : "m" (__pto) ); \ }) -#endif +#endif /* __s390x__ */ #define segment_eq(a,b) ((a).ar4 == (b).ar4) @@ -85,76 +85,50 @@ struct exception_table_entry unsigned long insn, fixup; }; -#ifndef __s390x__ -#define __uaccess_fixup \ - ".section .fixup,\"ax\"\n" \ - "2: lhi %0,%4\n" \ - " bras 1,3f\n" \ - " .long 1b\n" \ - "3: l 1,0(1)\n" \ - " br 1\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" -#define __uaccess_clobber "cc", "1" -#else /* __s390x__ */ -#define __uaccess_fixup \ - ".section .fixup,\"ax\"\n" \ - "2: lghi %0,%4\n" \ - " jg 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 0b,2b\n" \ - ".previous" -#define __uaccess_clobber "cc" -#endif /* __s390x__ */ +struct uaccess_ops { + size_t (*copy_from_user)(size_t, const void __user *, void *); + size_t (*copy_from_user_small)(size_t, const void __user *, void *); + size_t (*copy_to_user)(size_t, void __user *, const void *); + size_t (*copy_to_user_small)(size_t, void __user *, const void *); + size_t (*copy_in_user)(size_t, void __user *, const void __user *); + size_t (*clear_user)(size_t, void __user *); + size_t (*strnlen_user)(size_t, const char __user *); + size_t (*strncpy_from_user)(size_t, const char __user *, char *); + int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); + int (*futex_atomic_cmpxchg)(int __user *, int old, int new); +}; + +extern struct uaccess_ops uaccess; +extern struct uaccess_ops uaccess_std; + +static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +{ + size = uaccess.copy_to_user_small(size, ptr, x); + return size ? -EFAULT : size; +} + +static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +{ + size = uaccess.copy_from_user_small(size, ptr, x); + return size ? -EFAULT : size; +} /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) -#define __put_user_asm(x, ptr, err) \ -({ \ - err = 0; \ - asm volatile( \ - "0: mvcs 0(%1,%2),%3,%0\n" \ - "1:\n" \ - __uaccess_fixup \ - : "+&d" (err) \ - : "d" (sizeof(*(ptr))), "a" (ptr), "Q" (x), \ - "K" (-EFAULT) \ - : __uaccess_clobber ); \ -}) -#else -#define __put_user_asm(x, ptr, err) \ -({ \ - err = 0; \ - asm volatile( \ - "0: mvcs 0(%1,%2),0(%3),%0\n" \ - "1:\n" \ - __uaccess_fixup \ - : "+&d" (err) \ - : "d" (sizeof(*(ptr))), "a" (ptr), "a" (&(x)), \ - "K" (-EFAULT), "m" (x) \ - : __uaccess_clobber ); \ -}) -#endif - #define __put_user(x, ptr) \ ({ \ __typeof__(*(ptr)) __x = (x); \ - int __pu_err; \ + int __pu_err = -EFAULT; \ __chk_user_ptr(ptr); \ switch (sizeof (*(ptr))) { \ case 1: \ case 2: \ case 4: \ case 8: \ - __put_user_asm(__x, ptr, __pu_err); \ + __pu_err = __put_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ break; \ default: \ __put_user_bad(); \ @@ -172,60 +146,36 @@ struct exception_table_entry extern int __put_user_bad(void) __attribute__((noreturn)); -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) -#define __get_user_asm(x, ptr, err) \ -({ \ - err = 0; \ - asm volatile ( \ - "0: mvcp %O1(%2,%R1),0(%3),%0\n" \ - "1:\n" \ - __uaccess_fixup \ - : "+&d" (err), "=Q" (x) \ - : "d" (sizeof(*(ptr))), "a" (ptr), \ - "K" (-EFAULT) \ - : __uaccess_clobber ); \ -}) -#else -#define __get_user_asm(x, ptr, err) \ -({ \ - err = 0; \ - asm volatile ( \ - "0: mvcp 0(%2,%5),0(%3),%0\n" \ - "1:\n" \ - __uaccess_fixup \ - : "+&d" (err), "=m" (x) \ - : "d" (sizeof(*(ptr))), "a" (ptr), \ - "K" (-EFAULT), "a" (&(x)) \ - : __uaccess_clobber ); \ -}) -#endif - #define __get_user(x, ptr) \ ({ \ - int __gu_err; \ - __chk_user_ptr(ptr); \ + int __gu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ unsigned char __x; \ - __get_user_asm(__x, ptr, __gu_err); \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ unsigned short __x; \ - __get_user_asm(__x, ptr, __gu_err); \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ unsigned int __x; \ - __get_user_asm(__x, ptr, __gu_err); \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ unsigned long long __x; \ - __get_user_asm(__x, ptr, __gu_err); \ + __gu_err = __get_user_fn(sizeof (*(ptr)), \ + ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ @@ -247,8 +197,6 @@ extern int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user -extern long __copy_to_user_asm(const void *from, long n, void __user *to); - /** * __copy_to_user: - Copy a block of data into user space, with less checking. * @to: Destination address, in user space. @@ -266,7 +214,10 @@ extern long __copy_to_user_asm(const void *from, long n, void __user *to); static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { - return __copy_to_user_asm(from, n, to); + if (__builtin_constant_p(n) && (n <= 256)) + return uaccess.copy_to_user_small(n, to, from); + else + return uaccess.copy_to_user(n, to, from); } #define __copy_to_user_inatomic __copy_to_user @@ -294,8 +245,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return n; } -extern long __copy_from_user_asm(void *to, long n, const void __user *from); - /** * __copy_from_user: - Copy a block of data from user space, with less checking. * @to: Destination address, in kernel space. @@ -316,7 +265,10 @@ extern long __copy_from_user_asm(void *to, long n, const void __user *from); static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - return __copy_from_user_asm(to, n, from); + if (__builtin_constant_p(n) && (n <= 256)) + return uaccess.copy_from_user_small(n, from, to); + else + return uaccess.copy_from_user(n, from, to); } /** @@ -346,13 +298,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -extern unsigned long __copy_in_user_asm(const void __user *from, long n, - void __user *to); - static inline unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) { - return __copy_in_user_asm(from, n, to); + return uaccess.copy_in_user(n, to, from); } static inline unsigned long @@ -360,34 +309,28 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_sleep(); if (__access_ok(from,n) && __access_ok(to,n)) - n = __copy_in_user_asm(from, n, to); + n = __copy_in_user(to, from, n); return n; } /* * Copy a null terminated string from userspace. */ -extern long __strncpy_from_user_asm(long count, char *dst, - const char __user *src); - static inline long strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; might_sleep(); if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user_asm(count, dst, src); + res = uaccess.strncpy_from_user(count, src, dst); return res; } - -extern long __strnlen_user_asm(long count, const char __user *src); - static inline unsigned long strnlen_user(const char __user * src, unsigned long n) { might_sleep(); - return __strnlen_user_asm(n, src); + return uaccess.strnlen_user(n, src); } /** @@ -410,12 +353,10 @@ strnlen_user(const char __user * src, unsigned long n) * Zero Userspace */ -extern long __clear_user_asm(void __user *to, long n); - static inline unsigned long __clear_user(void __user *to, unsigned long n) { - return __clear_user_asm(to, n); + return uaccess.clear_user(n, to); } static inline unsigned long @@ -423,7 +364,7 @@ clear_user(void __user *to, unsigned long n) { might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) - n = __clear_user_asm(to, n); + n = uaccess.clear_user(n, to); return n; } From 6c2a9e6df60478e712f3c3d98b5047778a82a3d7 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 20 Sep 2006 15:59:44 +0200 Subject: [PATCH 0255/1063] [S390] Use alternative user-copy operations for new hardware. This introduces new user-copy operations which are optimized for copying more than 256 Bytes on new hardware. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head64.S | 13 +++ arch/s390/kernel/setup.c | 6 +- arch/s390/lib/Makefile | 1 + arch/s390/lib/uaccess_mvcos.c | 156 ++++++++++++++++++++++++++++++++++ include/asm-s390/setup.h | 2 + include/asm-s390/uaccess.h | 1 + 6 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 arch/s390/lib/uaccess_mvcos.c diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 1ebaa338aa7e..a8bdd96494c7 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -250,6 +250,19 @@ startup_continue: oi 7(%r12),0x80 # set IDTE flag 0: +# +# find out if we have the MVCOS instruction +# + la %r1,0f-.LPG1(%r13) # set program check address + stg %r1,__LC_PGM_NEW_PSW+8 + .short 0xc800 # mvcos 0(%r0),0(%r0),%r0 + .short 0x0000 + .short 0x0000 +0: tm 0x8f,0x13 # special-operation exception? + bno 1f-.LPG1(%r13) # if yes, MVCOS is present + oi 6(%r12),2 # set MVCOS flag +1: + lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 16 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e229af59976c..e3d9325f6022 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -647,7 +647,11 @@ setup_arch(char **cmdline_p) memory_end = memory_size; - memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); + if (MACHINE_HAS_MVCOS) + memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); + else + memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); + parse_early_param(); #ifndef CONFIG_64BIT diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 96c82424d88b..c42ffedfdb49 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -5,4 +5,5 @@ EXTRA_AFLAGS := -traditional lib-y += delay.o string.o uaccess_std.o +lib-$(CONFIG_64BIT) += uaccess_mvcos.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c new file mode 100644 index 000000000000..86c96d6c191a --- /dev/null +++ b/arch/s390/lib/uaccess_mvcos.c @@ -0,0 +1,156 @@ +/* + * arch/s390/lib/uaccess_mvcos.c + * + * Optimized user space space access functions based on mvcos. + * + * Copyright (C) IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include +#include +#include +#include + +#ifndef __s390x__ +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) +{ + register unsigned long reg0 asm("0") = 0x81UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + " jz 4f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + " "SLR" %0,%4\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + " jz 4f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + " "SLR" %0,%4\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +size_t copy_in_user_mvcos(size_t size, void __user *to, const void __user *from) +{ + register unsigned long reg0 asm("0") = 0x810081UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + /* FIXME: copy with reduced length. */ + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + " jz 2f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2:"SLR" %0,%0\n" + "3: \n" + EX_TABLE(0b,3b) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +size_t clear_user_mvcos(size_t size, void __user *to) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + " jz 4f\n" + "1:"ALR" %0,%2\n" + " "SLR" %1,%2\n" + " j 0b\n" + "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ + " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ + " "SLR" %3,%1\n" + " "CLR" %0,%3\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + " "SLR" %0,%3\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5: \n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); + return size; +} + +extern size_t copy_from_user_std_small(size_t, const void __user *, void *); +extern size_t copy_to_user_std_small(size_t, void __user *, const void *); +extern size_t strnlen_user_std(size_t, const char __user *); +extern size_t strncpy_from_user_std(size_t, const char __user *, char *); +extern int futex_atomic_op(int, int __user *, int, int *); +extern int futex_atomic_cmpxchg(int __user *, int, int); + +struct uaccess_ops uaccess_mvcos = { + .copy_from_user = copy_from_user_mvcos, + .copy_from_user_small = copy_from_user_std_small, + .copy_to_user = copy_to_user_mvcos, + .copy_to_user_small = copy_to_user_std_small, + .copy_in_user = copy_in_user_mvcos, + .clear_user = clear_user_mvcos, + .strnlen_user = strnlen_user_std, + .strncpy_from_user = strncpy_from_user_std, + .futex_atomic_op = futex_atomic_op, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg, +}; diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 00c03e46689b..f1959732b6fd 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -44,10 +44,12 @@ extern unsigned long machine_flags; #define MACHINE_HAS_IEEE (machine_flags & 2) #define MACHINE_HAS_CSP (machine_flags & 8) #define MACHINE_HAS_DIAG44 (1) +#define MACHINE_HAS_MVCOS (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) #define MACHINE_HAS_DIAG44 (machine_flags & 32) +#define MACHINE_HAS_MVCOS (machine_flags & 512) #endif /* __s390x__ */ diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h index 39a2716ae188..e2047b0c9092 100644 --- a/include/asm-s390/uaccess.h +++ b/include/asm-s390/uaccess.h @@ -100,6 +100,7 @@ struct uaccess_ops { extern struct uaccess_ops uaccess; extern struct uaccess_ops uaccess_std; +extern struct uaccess_ops uaccess_mvcos; static inline int __put_user_fn(size_t size, void __user *ptr, void *x) { From 250b2dc83347feb73eb6bdf7511685e72b587e68 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 20 Sep 2006 15:59:47 +0200 Subject: [PATCH 0256/1063] [S390] Get rid of DBG macro. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/s390mach.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 5399c5d99b81..a914129a4da9 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -19,9 +19,6 @@ #include "s390mach.h" -#define DBG printk -// #define DBG(args,...) do {} while (0); - static struct semaphore m_sem; extern int css_process_crw(int, int); @@ -83,11 +80,11 @@ repeat: ccode = stcrw(&crw[chain]); if (ccode != 0) break; - DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, " - "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n", - crw[chain].slct, crw[chain].oflw, crw[chain].chn, - crw[chain].rsc, crw[chain].anc, crw[chain].erc, - crw[chain].rsid); + printk(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, " + "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n", + crw[chain].slct, crw[chain].oflw, crw[chain].chn, + crw[chain].rsc, crw[chain].anc, crw[chain].erc, + crw[chain].rsid); /* Check for overflows. */ if (crw[chain].oflw) { pr_debug("%s: crw overflow detected!\n", __FUNCTION__); @@ -117,8 +114,8 @@ repeat: * reported to the common I/O layer. */ if (crw[chain].slct) { - DBG(KERN_INFO"solicited machine check for " - "channel path %02X\n", crw[0].rsid); + pr_debug("solicited machine check for " + "channel path %02X\n", crw[0].rsid); break; } switch (crw[0].erc) { From db0c2d59087296b3567ec408abe17108db88b385 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 15:59:49 +0200 Subject: [PATCH 0257/1063] [S390] set modalias for ccw bus uevents. Add the MODALIAS environment variable for ccw bus uevents. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 111 +++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 646da5640401..688945662c15 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -52,53 +52,81 @@ ccw_bus_match (struct device * dev, struct device_driver * drv) return 1; } -/* - * Hotplugging interface for ccw devices. - * Heavily modeled on pci and usb hotplug. - */ -static int -ccw_uevent (struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) +/* Store modalias string delimited by prefix/suffix string into buffer with + * specified size. Return length of resulting string (excluding trailing '\0') + * even if string doesn't fit buffer (snprintf semantics). */ +static int snprint_alias(char *buf, size_t size, const char *prefix, + struct ccw_device_id *id, const char *suffix) +{ + int len; + + len = snprintf(buf, size, "%sccw:t%04Xm%02X", prefix, id->cu_type, + id->cu_model); + if (len > size) + return len; + buf += len; + size -= len; + + if (id->dev_type != 0) + len += snprintf(buf, size, "dt%04Xdm%02X%s", id->dev_type, + id->dev_model, suffix); + else + len += snprintf(buf, size, "dtdm%s", suffix); + + return len; +} + +/* Set up environment variables for ccw device uevent. Return 0 on success, + * non-zero otherwise. */ +static int ccw_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) { struct ccw_device *cdev = to_ccwdev(dev); + struct ccw_device_id *id = &(cdev->id); int i = 0; - int length = 0; + int len; - if (!cdev) - return -ENODEV; - - /* what we want to pass to /sbin/hotplug */ - - envp[i++] = buffer; - length += scnprintf(buffer, buffer_size - length, "CU_TYPE=%04X", - cdev->id.cu_type); - if ((buffer_size - length <= 0) || (i >= num_envp)) + /* CU_TYPE= */ + len = snprintf(buffer, buffer_size, "CU_TYPE=%04X", id->cu_type) + 1; + if (len > buffer_size || i >= num_envp) return -ENOMEM; - ++length; - buffer += length; - envp[i++] = buffer; - length += scnprintf(buffer, buffer_size - length, "CU_MODEL=%02X", - cdev->id.cu_model); - if ((buffer_size - length <= 0) || (i >= num_envp)) + buffer += len; + buffer_size -= len; + + /* CU_MODEL= */ + len = snprintf(buffer, buffer_size, "CU_MODEL=%02X", id->cu_model) + 1; + if (len > buffer_size || i >= num_envp) return -ENOMEM; - ++length; - buffer += length; + envp[i++] = buffer; + buffer += len; + buffer_size -= len; /* The next two can be zero, that's ok for us */ - envp[i++] = buffer; - length += scnprintf(buffer, buffer_size - length, "DEV_TYPE=%04X", - cdev->id.dev_type); - if ((buffer_size - length <= 0) || (i >= num_envp)) + /* DEV_TYPE= */ + len = snprintf(buffer, buffer_size, "DEV_TYPE=%04X", id->dev_type) + 1; + if (len > buffer_size || i >= num_envp) return -ENOMEM; - ++length; - buffer += length; + envp[i++] = buffer; + buffer += len; + buffer_size -= len; - envp[i++] = buffer; - length += scnprintf(buffer, buffer_size - length, "DEV_MODEL=%02X", - cdev->id.dev_model); - if ((buffer_size - length <= 0) || (i >= num_envp)) + /* DEV_MODEL= */ + len = snprintf(buffer, buffer_size, "DEV_MODEL=%02X", + (unsigned char) id->dev_model) + 1; + if (len > buffer_size || i >= num_envp) return -ENOMEM; + envp[i++] = buffer; + buffer += len; + buffer_size -= len; + + /* MODALIAS= */ + len = snprint_alias(buffer, buffer_size, "MODALIAS=", id, "") + 1; + if (len > buffer_size || i >= num_envp) + return -ENOMEM; + envp[i++] = buffer; + buffer += len; + buffer_size -= len; envp[i] = NULL; @@ -251,16 +279,11 @@ modalias_show (struct device *dev, struct device_attribute *attr, char *buf) { struct ccw_device *cdev = to_ccwdev(dev); struct ccw_device_id *id = &(cdev->id); - int ret; + int len; - ret = sprintf(buf, "ccw:t%04Xm%02X", - id->cu_type, id->cu_model); - if (id->dev_type != 0) - ret += sprintf(buf + ret, "dt%04Xdm%02X\n", - id->dev_type, id->dev_model); - else - ret += sprintf(buf + ret, "dtdm\n"); - return ret; + len = snprint_alias(buf, PAGE_SIZE, "", id, "\n") + 1; + + return len > PAGE_SIZE ? PAGE_SIZE : len; } static ssize_t From dcd707b4bdc10b4fa20efa116dbaeded21513115 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 15:59:52 +0200 Subject: [PATCH 0258/1063] [S390] Replace nopav-message on VM. Specifying kernel parameter "dasd=nopav" on systems running under VM has no function but results in message "disable PAV mode". Correct message is "'nopav' not supported on VM". Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_devmap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 80cf0999465a..91cf971f0652 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -258,8 +258,12 @@ dasd_parse_keyword( char *parsestring ) { return residual_str; } if (strncmp("nopav", parsestring, length) == 0) { - dasd_nopav = 1; - MESSAGE(KERN_INFO, "%s", "disable PAV mode"); + if (MACHINE_IS_VM) + MESSAGE(KERN_INFO, "%s", "'nopav' not supported on VM"); + else { + dasd_nopav = 1; + MESSAGE(KERN_INFO, "%s", "disable PAV mode"); + } return residual_str; } if (strncmp("fixedbuffers", parsestring, length) == 0) { From dd9963f9dd0985e16e878fd3632ecadfc54d3fbb Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 15:59:54 +0200 Subject: [PATCH 0259/1063] [S390] cio: subchannels in no-path state. Subchannel may incorrectly remain in state no-path after channel paths have reappeared. Currently the scan for subchannels which are using a channel path ends at the first occurrence if a full link address was provided by the channel subsystem. The scan needs to continue over all subchannels. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index c28444af0919..9f9134b67e40 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -378,6 +378,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data) if (chp_mask == 0) { spin_unlock_irq(&sch->lock); + put_device(&sch->dev); return 0; } old_lpm = sch->lpm; @@ -392,7 +393,7 @@ __s390_process_res_acc(struct subchannel_id schid, void *data) spin_unlock_irq(&sch->lock); put_device(&sch->dev); - return (res_data->fla_mask == 0xffff) ? -ENODEV : 0; + return 0; } From e0e32c8eba86fd5ea79eefad6f2c0b4988dfd02a Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 15:59:57 +0200 Subject: [PATCH 0260/1063] [S390] cio: update path groups on logical CHPID changes. CHPIDs that are logically varied off will not be removed from a CCW device's path group because resign-from-pathgroup command is issued with invalid path mask of 0 because internal CCW operations are masked by the logical path mask after the relevant bits are cleared by the vary operation. Do not apply logical path mask to internal operations. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/cio.c | 2 +- drivers/s390/cio/device_ops.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 61eb7caa1567..54cce542a1ee 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -191,7 +191,7 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */ sch->orb.pfch = sch->options.prefetch == 0; sch->orb.spnd = sch->options.suspend; sch->orb.ssic = sch->options.suspend && sch->options.inter; - sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm; + sch->orb.lpm = (lpm != 0) ? lpm : sch->lpm; #ifdef CONFIG_64BIT /* * for 64 bit we always support 64 bit IDAWs with 4k page size only diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 9e3de0bd59b5..acad8f852eda 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -96,6 +96,12 @@ ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, ret = cio_set_options (sch, flags); if (ret) return ret; + /* Adjust requested path mask to excluded varied off paths. */ + if (lpm) { + lpm &= sch->opm; + if (lpm == 0) + return -EACCES; + } ret = cio_start_key (sch, cpa, lpm, key); if (ret == 0) cdev->private->intparm = intparm; @@ -304,7 +310,7 @@ __ccw_device_retry_loop(struct ccw_device *cdev, struct ccw1 *ccw, long magic, _ sch = to_subchannel(cdev->dev.parent); do { ret = cio_start (sch, ccw, lpm); - if ((ret == -EBUSY) || (ret == -EACCES)) { + if (ret == -EBUSY) { /* Try again later. */ spin_unlock_irq(&sch->lock); msleep(10); @@ -433,6 +439,13 @@ read_conf_data_lpm (struct ccw_device *cdev, void **buffer, int *length, __u8 lp if (!ciw || ciw->cmd == 0) return -EOPNOTSUPP; + /* Adjust requested path mask to excluded varied off paths. */ + if (lpm) { + lpm &= sch->opm; + if (lpm == 0) + return -EACCES; + } + rcd_ccw = kzalloc(sizeof(struct ccw1), GFP_KERNEL | GFP_DMA); if (!rcd_ccw) return -ENOMEM; From 28bdc6f6233f380ddc0b430cabd88ffeafea34c7 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 15:59:59 +0200 Subject: [PATCH 0261/1063] [S390] cio: always query all paths on path verification. Reappearing channel paths are sometimes not utilized by CCW devices because path verification incorrectly relies on path-operational-mask information which is not updated until a channel path has been used again. Modify path verification procedure to always query all available paths to a device. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 2 +- drivers/s390/cio/cio.c | 5 +-- drivers/s390/cio/device_fsm.c | 39 +++++++++------- drivers/s390/cio/device_ops.c | 2 +- drivers/s390/cio/device_pgid.c | 81 ++++++++++++++++++---------------- 5 files changed, 68 insertions(+), 61 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9f9134b67e40..3bb4e472d73d 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -256,7 +256,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data) /* trigger path verification. */ if (sch->driver && sch->driver->verify) sch->driver->verify(&sch->dev); - else if (sch->vpm == mask) + else if (sch->lpm == mask) goto out_unreg; out_unlock: spin_unlock_irq(&sch->lock); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 54cce542a1ee..2e2882daefbb 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -569,10 +569,7 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid) sch->opm = 0xff; if (!cio_is_console(sch->schid)) chsc_validate_chpids(sch); - sch->lpm = sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom & - sch->opm; + sch->lpm = sch->schib.pmcw.pam & sch->opm; CIO_DEBUG(KERN_INFO, 0, "Detected device %04x on subchannel 0.%x.%04X" diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 7756f324fb6f..dace46fc32e8 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -232,10 +232,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state) */ old_lpm = sch->lpm; stsch(sch->schid, &sch->schib); - sch->lpm = sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom & - sch->opm; + sch->lpm = sch->schib.pmcw.pam & sch->opm; /* Check since device may again have become not operational. */ if (!sch->schib.pmcw.dnv) state = DEV_STATE_NOT_OPER; @@ -455,8 +452,8 @@ ccw_device_sense_pgid_done(struct ccw_device *cdev, int err) return; } /* Start Path Group verification. */ - sch->vpm = 0; /* Start with no path groups set. */ cdev->private->state = DEV_STATE_VERIFY; + cdev->private->flags.doverify = 0; ccw_device_verify_start(cdev); } @@ -556,7 +553,19 @@ ccw_device_nopath_notify(void *data) void ccw_device_verify_done(struct ccw_device *cdev, int err) { - cdev->private->flags.doverify = 0; + struct subchannel *sch; + + sch = to_subchannel(cdev->dev.parent); + /* Update schib - pom may have changed. */ + stsch(sch->schid, &sch->schib); + /* Update lpm with verified path mask. */ + sch->lpm = sch->vpm; + /* Repeat path verification? */ + if (cdev->private->flags.doverify) { + cdev->private->flags.doverify = 0; + ccw_device_verify_start(cdev); + return; + } switch (err) { case -EOPNOTSUPP: /* path grouping not supported, just set online. */ cdev->private->options.pgroup = 0; @@ -614,6 +623,7 @@ ccw_device_online(struct ccw_device *cdev) if (!cdev->private->options.pgroup) { /* Start initial path verification. */ cdev->private->state = DEV_STATE_VERIFY; + cdev->private->flags.doverify = 0; ccw_device_verify_start(cdev); return 0; } @@ -660,7 +670,6 @@ ccw_device_offline(struct ccw_device *cdev) /* Are we doing path grouping? */ if (!cdev->private->options.pgroup) { /* No, set state offline immediately. */ - sch->vpm = 0; ccw_device_done(cdev, DEV_STATE_OFFLINE); return 0; } @@ -781,6 +790,7 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event) } /* Device is idle, we can do the path verification. */ cdev->private->state = DEV_STATE_VERIFY; + cdev->private->flags.doverify = 0; ccw_device_verify_start(cdev); } @@ -1043,9 +1053,9 @@ ccw_device_wait4io_timeout(struct ccw_device *cdev, enum dev_event dev_event) } static void -ccw_device_wait4io_verify(struct ccw_device *cdev, enum dev_event dev_event) +ccw_device_delay_verify(struct ccw_device *cdev, enum dev_event dev_event) { - /* When the I/O has terminated, we have to start verification. */ + /* Start verification after current task finished. */ cdev->private->flags.doverify = 1; } @@ -1111,10 +1121,7 @@ device_trigger_reprobe(struct subchannel *sch) * The pim, pam, pom values may not be accurate, but they are the best * we have before performing device selection :/ */ - sch->lpm = sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom & - sch->opm; + sch->lpm = sch->schib.pmcw.pam & sch->opm; /* Re-set some bits in the pmcw that were lost. */ sch->schib.pmcw.isc = 3; sch->schib.pmcw.csense = 1; @@ -1238,7 +1245,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_verify_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, - [DEV_EVENT_VERIFY] = ccw_device_nop, + [DEV_EVENT_VERIFY] = ccw_device_delay_verify, }, [DEV_STATE_ONLINE] = { [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, @@ -1281,7 +1288,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_wait4io_irq, [DEV_EVENT_TIMEOUT] = ccw_device_wait4io_timeout, - [DEV_EVENT_VERIFY] = ccw_device_wait4io_verify, + [DEV_EVENT_VERIFY] = ccw_device_delay_verify, }, [DEV_STATE_QUIESCE] = { [DEV_EVENT_NOTOPER] = ccw_device_quiesce_done, @@ -1294,7 +1301,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_NOTOPER] = ccw_device_nop, [DEV_EVENT_INTERRUPT] = ccw_device_start_id, [DEV_EVENT_TIMEOUT] = ccw_device_bug, - [DEV_EVENT_VERIFY] = ccw_device_nop, + [DEV_EVENT_VERIFY] = ccw_device_start_id, }, [DEV_STATE_DISCONNECTED_SENSE_ID] = { [DEV_EVENT_NOTOPER] = ccw_device_recog_notoper, diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index acad8f852eda..93a897eebfff 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -256,7 +256,7 @@ ccw_device_get_path_mask(struct ccw_device *cdev) if (!sch) return 0; else - return sch->vpm; + return sch->lpm; } static void diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index 1693a102dcfe..8ca2d078848c 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -245,18 +245,17 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func) memset(&cdev->private->irb, 0, sizeof(struct irb)); /* Try multiple times. */ - ret = -ENODEV; + ret = -EACCES; if (cdev->private->iretry > 0) { cdev->private->iretry--; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); - /* ret is 0, -EBUSY, -EACCES or -ENODEV */ - if ((ret != -EACCES) && (ret != -ENODEV)) + /* We expect an interrupt in case of success or busy + * indication. */ + if ((ret == 0) || (ret == -EBUSY)) return ret; } - /* PGID command failed on this path. Switch it off. */ - sch->lpm &= ~cdev->private->imask; - sch->vpm &= ~cdev->private->imask; + /* PGID command failed on this path. */ CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", cdev->private->devno, sch->schid.ssid, @@ -286,18 +285,17 @@ static int __ccw_device_do_nop(struct ccw_device *cdev) memset(&cdev->private->irb, 0, sizeof(struct irb)); /* Try multiple times. */ - ret = -ENODEV; + ret = -EACCES; if (cdev->private->iretry > 0) { cdev->private->iretry--; ret = cio_start (sch, cdev->private->iccws, cdev->private->imask); - /* ret is 0, -EBUSY, -EACCES or -ENODEV */ - if ((ret != -EACCES) && (ret != -ENODEV)) + /* We expect an interrupt in case of success or busy + * indication. */ + if ((ret == 0) || (ret == -EBUSY)) return ret; } - /* nop command failed on this path. Switch it off. */ - sch->lpm &= ~cdev->private->imask; - sch->vpm &= ~cdev->private->imask; + /* nop command failed on this path. */ CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel " "0.%x.%04x, lpm %02X, became 'not operational'\n", cdev->private->devno, sch->schid.ssid, @@ -372,27 +370,32 @@ static void __ccw_device_verify_start(struct ccw_device *cdev) { struct subchannel *sch; - __u8 imask, func; + __u8 func; int ret; sch = to_subchannel(cdev->dev.parent); - while (sch->vpm != sch->lpm) { - /* Find first unequal bit in vpm vs. lpm */ - for (imask = 0x80; imask != 0; imask >>= 1) - if ((sch->vpm & imask) != (sch->lpm & imask)) - break; - cdev->private->imask = imask; + /* Repeat for all paths. */ + for (; cdev->private->imask; cdev->private->imask >>= 1, + cdev->private->iretry = 5) { + if ((cdev->private->imask & sch->schib.pmcw.pam) == 0) + /* Path not available, try next. */ + continue; if (cdev->private->options.pgroup) { - func = (sch->vpm & imask) ? - SPID_FUNC_RESIGN : SPID_FUNC_ESTABLISH; + if (sch->opm & cdev->private->imask) + func = SPID_FUNC_ESTABLISH; + else + func = SPID_FUNC_RESIGN; ret = __ccw_device_do_pgid(cdev, func); } else ret = __ccw_device_do_nop(cdev); + /* We expect an interrupt in case of success or busy + * indication. */ if (ret == 0 || ret == -EBUSY) return; - cdev->private->iretry = 5; + /* Permanent path failure, try next. */ } - ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV); + /* Done with all paths. */ + ccw_device_verify_done(cdev, (sch->vpm != 0) ? 0 : -ENODEV); } /* @@ -421,14 +424,14 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event) else ret = __ccw_device_check_nop(cdev); memset(&cdev->private->irb, 0, sizeof(struct irb)); + switch (ret) { /* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */ case 0: - /* Establish or Resign Path Group done. Update vpm. */ - if ((sch->lpm & cdev->private->imask) != 0) - sch->vpm |= cdev->private->imask; - else - sch->vpm &= ~cdev->private->imask; + /* Path verification ccw finished successfully, update lpm. */ + sch->vpm |= sch->opm & cdev->private->imask; + /* Go on with next path. */ + cdev->private->imask >>= 1; cdev->private->iretry = 5; __ccw_device_verify_start(cdev); break; @@ -441,6 +444,10 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->options.pgroup = 0; else cdev->private->flags.pgid_single = 1; + /* Retry */ + sch->vpm = 0; + cdev->private->imask = 0x80; + cdev->private->iretry = 5; /* fall through. */ case -EAGAIN: /* Try again. */ __ccw_device_verify_start(cdev); @@ -449,8 +456,7 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_verify_done(cdev, -ETIME); break; case -EACCES: /* channel is not operational. */ - sch->lpm &= ~cdev->private->imask; - sch->vpm &= ~cdev->private->imask; + cdev->private->imask >>= 1; cdev->private->iretry = 5; __ccw_device_verify_start(cdev); break; @@ -463,19 +469,17 @@ ccw_device_verify_start(struct ccw_device *cdev) struct subchannel *sch = to_subchannel(cdev->dev.parent); cdev->private->flags.pgid_single = 0; + cdev->private->imask = 0x80; cdev->private->iretry = 5; - /* - * Update sch->lpm with current values to catch paths becoming - * available again. - */ + + /* Start with empty vpm. */ + sch->vpm = 0; + + /* Get current pam. */ if (stsch(sch->schid, &sch->schib)) { ccw_device_verify_done(cdev, -ENODEV); return; } - sch->lpm = sch->schib.pmcw.pim & - sch->schib.pmcw.pam & - sch->schib.pmcw.pom & - sch->opm; __ccw_device_verify_start(cdev); } @@ -524,7 +528,6 @@ ccw_device_disband_irq(struct ccw_device *cdev, enum dev_event dev_event) switch (ret) { /* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */ case 0: /* disband successful. */ - sch->vpm = 0; ccw_device_disband_done(cdev, ret); break; case -EOPNOTSUPP: From 564337f34cc10fd8f30329e4e5f14f8995db5711 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 20 Sep 2006 16:00:01 +0200 Subject: [PATCH 0262/1063] [S390] cio: subchannel evaluation function operates without lock css_evaluate_subchannel() operates subchannel without lock which can lead to erratic behavior caused by concurrent device access. Also split evaluation function to make it more readable. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 207 +++++++++++++++++++++-------------------- 1 file changed, 106 insertions(+), 101 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 13eeea3d547f..7086a74e9871 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -182,136 +182,141 @@ get_subchannel_by_schid(struct subchannel_id schid) return dev ? to_subchannel(dev) : NULL; } - -static inline int -css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid) +static inline int css_get_subchannel_status(struct subchannel *sch) { struct schib schib; - int cc; - cc = stsch(schid, &schib); - if (cc) + if (stsch(sch->schid, &schib) || !schib.pmcw.dnv) return CIO_GONE; - if (!schib.pmcw.dnv) - return CIO_GONE; - if (sch && sch->schib.pmcw.dnv && - (schib.pmcw.dev != sch->schib.pmcw.dev)) + if (sch->schib.pmcw.dnv && (schib.pmcw.dev != sch->schib.pmcw.dev)) return CIO_REVALIDATE; - if (sch && !sch->lpm) + if (!sch->lpm) return CIO_NO_PATH; return CIO_OPER; } - -static int -css_evaluate_subchannel(struct subchannel_id schid, int slow) + +static int css_evaluate_known_subchannel(struct subchannel *sch, int slow) { int event, ret, disc; - struct subchannel *sch; unsigned long flags; + enum { NONE, UNREGISTER, UNREGISTER_PROBE, REPROBE } action; - sch = get_subchannel_by_schid(schid); - disc = sch ? device_is_disconnected(sch) : 0; + spin_lock_irqsave(&sch->lock, flags); + disc = device_is_disconnected(sch); if (disc && slow) { - if (sch) - put_device(&sch->dev); - return 0; /* Already processed. */ + /* Disconnected devices are evaluated directly only.*/ + spin_unlock_irqrestore(&sch->lock, flags); + return 0; } - /* - * We've got a machine check, so running I/O won't get an interrupt. - * Kill any pending timers. - */ - if (sch) - device_kill_pending_timer(sch); + /* No interrupt after machine check - kill pending timers. */ + device_kill_pending_timer(sch); if (!disc && !slow) { - if (sch) - put_device(&sch->dev); - return -EAGAIN; /* Will be done on the slow path. */ + /* Non-disconnected devices are evaluated on the slow path. */ + spin_unlock_irqrestore(&sch->lock, flags); + return -EAGAIN; } - event = css_get_subchannel_status(sch, schid); + event = css_get_subchannel_status(sch); CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n", - schid.ssid, schid.sch_no, event, - sch?(disc?"disconnected":"normal"):"unknown", - slow?"slow":"fast"); + sch->schid.ssid, sch->schid.sch_no, event, + disc ? "disconnected" : "normal", + slow ? "slow" : "fast"); + /* Analyze subchannel status. */ + action = NONE; switch (event) { case CIO_NO_PATH: + if (disc) { + /* Check if paths have become available. */ + action = REPROBE; + break; + } + /* fall through */ case CIO_GONE: - if (!sch) { - /* Never used this subchannel. Ignore. */ - ret = 0; - break; - } - if (disc && (event == CIO_NO_PATH)) { - /* - * Uargh, hack again. Because we don't get a machine - * check on configure on, our path bookkeeping can - * be out of date here (it's fine while we only do - * logical varying or get chsc machine checks). We - * need to force reprobing or we might miss devices - * coming operational again. It won't do harm in real - * no path situations. - */ - spin_lock_irqsave(&sch->lock, flags); - device_trigger_reprobe(sch); - spin_unlock_irqrestore(&sch->lock, flags); - ret = 0; - break; - } - if (sch->driver && sch->driver->notify && - sch->driver->notify(&sch->dev, event)) { - cio_disable_subchannel(sch); - device_set_disconnected(sch); - ret = 0; - break; - } - /* - * Unregister subchannel. - * The device will be killed automatically. - */ + /* Prevent unwanted effects when opening lock. */ cio_disable_subchannel(sch); + device_set_disconnected(sch); + /* Ask driver what to do with device. */ + action = UNREGISTER; + if (sch->driver && sch->driver->notify) { + spin_unlock_irqrestore(&sch->lock, flags); + ret = sch->driver->notify(&sch->dev, event); + spin_lock_irqsave(&sch->lock, flags); + if (ret) + action = NONE; + } + break; + case CIO_REVALIDATE: + /* Device will be removed, so no notify necessary. */ + if (disc) + /* Reprobe because immediate unregister might block. */ + action = REPROBE; + else + action = UNREGISTER_PROBE; + break; + case CIO_OPER: + if (disc) + /* Get device operational again. */ + action = REPROBE; + break; + } + /* Perform action. */ + ret = 0; + switch (action) { + case UNREGISTER: + case UNREGISTER_PROBE: + /* Unregister device (will use subchannel lock). */ + spin_unlock_irqrestore(&sch->lock, flags); css_sch_device_unregister(sch); + spin_lock_irqsave(&sch->lock, flags); + /* Reset intparm to zeroes. */ sch->schib.pmcw.intparm = 0; cio_modify(sch); - put_device(&sch->dev); - ret = 0; + + /* Probe if necessary. */ + if (action == UNREGISTER_PROBE) + ret = css_probe_device(sch->schid); break; - case CIO_REVALIDATE: - /* - * Revalidation machine check. Sick. - * We don't notify the driver since we have to throw the device - * away in any case. - */ - if (!disc) { - css_sch_device_unregister(sch); - /* Reset intparm to zeroes. */ - sch->schib.pmcw.intparm = 0; - cio_modify(sch); - put_device(&sch->dev); - ret = css_probe_device(schid); - } else { - /* - * We can't immediately deregister the disconnected - * device since it might block. - */ - spin_lock_irqsave(&sch->lock, flags); - device_trigger_reprobe(sch); - spin_unlock_irqrestore(&sch->lock, flags); - ret = 0; - } - break; - case CIO_OPER: - if (disc) { - spin_lock_irqsave(&sch->lock, flags); - /* Get device operational again. */ - device_trigger_reprobe(sch); - spin_unlock_irqrestore(&sch->lock, flags); - } - ret = sch ? 0 : css_probe_device(schid); + case REPROBE: + device_trigger_reprobe(sch); break; default: - BUG(); - ret = 0; + break; } + spin_unlock_irqrestore(&sch->lock, flags); + + return ret; +} + +static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow) +{ + struct schib schib; + + if (!slow) { + /* Will be done on the slow path. */ + return -EAGAIN; + } + if (stsch(schid, &schib) || !schib.pmcw.dnv) { + /* Unusable - ignore. */ + return 0; + } + CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, unknown, " + "slow path.\n", schid.ssid, schid.sch_no, CIO_OPER); + + return css_probe_device(schid); +} + +static int css_evaluate_subchannel(struct subchannel_id schid, int slow) +{ + struct subchannel *sch; + int ret; + + sch = get_subchannel_by_schid(schid); + if (sch) { + ret = css_evaluate_known_subchannel(sch, slow); + put_device(&sch->dev); + } else + ret = css_evaluate_new_subchannel(schid, slow); + return ret; } From 388c571cffc4ae4e64f0786333e811308acbbc10 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 20 Sep 2006 16:00:04 +0200 Subject: [PATCH 0263/1063] [S390] hypfs crashes with invalid mount option. When an invalid mount option is specified, no root inode is created for hypfs, hypfs_fill_super() returns with -EINVAL and then hypfs_kill_super() is called. hypfs_kill_super() does not check if the root inode has been initialized. This patch adds this check. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/inode.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index bdcad2ea1ff4..bdade5f2e325 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -312,10 +312,12 @@ static void hypfs_kill_super(struct super_block *sb) { struct hypfs_sb_info *sb_info = sb->s_fs_info; - hypfs_delete_tree(sb->s_root); - hypfs_remove(sb_info->update_file); - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; + if (sb->s_root) { + hypfs_delete_tree(sb->s_root); + hypfs_remove(sb_info->update_file); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + } kill_litter_super(sb); } From b4c98f625fffee3a6f633082e9e4be3e952ca2ab Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 13 Sep 2006 11:01:19 -0700 Subject: [PATCH 0264/1063] configfs: Prevent duplicate subsystem names. For all child objects, creation comes through mkdir(2), so duplicate names are prevented. Subsystems, though, are registered by client drivers at init_module()/__init time. This patch prevents duplicate subsystem names. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/configfs/dir.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index df025453dd97..816e8ef64560 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare return sd; } +/* + * + * Return -EEXIST if there is already a configfs element with the same + * name for the same parent. + * + * called with parent inode's i_mutex held + */ +int configfs_dirent_exists(struct configfs_dirent *parent_sd, + const unsigned char *new) +{ + struct configfs_dirent * sd; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_element) { + const unsigned char *existing = configfs_get_name(sd); + if (strcmp(existing, new)) + continue; + else + return -EEXIST; + } + } + + return 0; +} + + int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, umode_t mode, int type) @@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p, int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; - error = configfs_make_dirent(p->d_fsdata, d, k, mode, - CONFIGFS_DIR); + error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); + if (!error) + error = configfs_make_dirent(p->d_fsdata, d, k, mode, + CONFIGFS_DIR); if (!error) { error = configfs_create(d, mode, init_dir); if (!error) { From ca4d147e62df370c334898464023aa7f9126abe1 Mon Sep 17 00:00:00 2001 From: Herbert Poetzl Date: Mon, 3 Jul 2006 17:27:12 -0700 Subject: [PATCH 0265/1063] ocfs2: add ext2 attributes Support immutable, and other attributes. Some renaming and other minor fixes done by myself. Signed-off-by: Herbert Poetzl Signed-off-by: Mark Fasheh --- fs/ocfs2/Makefile | 1 + fs/ocfs2/dlmglue.c | 9 ++- fs/ocfs2/dlmglue.h | 5 +- fs/ocfs2/file.c | 3 + fs/ocfs2/inode.c | 28 ++++++++- fs/ocfs2/inode.h | 3 + fs/ocfs2/ioctl.c | 134 ++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/ioctl.h | 16 ++++++ fs/ocfs2/ocfs2_fs.h | 24 +++++++- 9 files changed, 217 insertions(+), 6 deletions(-) create mode 100644 fs/ocfs2/ioctl.c create mode 100644 fs/ocfs2/ioctl.h diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 7d3be845a614..9fb8132f19b0 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -16,6 +16,7 @@ ocfs2-objs := \ file.o \ heartbeat.o \ inode.o \ + ioctl.o \ journal.o \ localalloc.o \ mmap.o \ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 762eb1fbb34d..151b41781eab 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); + lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); mlog_meta_lvb(0, lockres); @@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); + oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); + ocfs2_set_inode_flags(inode); + /* fast-symlinks are a special case */ if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) inode->i_blocks = 0; @@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level, be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), be16_to_cpu(lvb->lvb_imode)); mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " - "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink), + "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), (long long)be64_to_cpu(lvb->lvb_iatime_packed), (long long)be64_to_cpu(lvb->lvb_ictime_packed), - (long long)be64_to_cpu(lvb->lvb_imtime_packed)); + (long long)be64_to_cpu(lvb->lvb_imtime_packed), + be32_to_cpu(lvb->lvb_iattr)); } diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 8f2d1db2d9ea..243ae862ece5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -27,7 +27,7 @@ #ifndef DLMGLUE_H #define DLMGLUE_H -#define OCFS2_LVB_VERSION 2 +#define OCFS2_LVB_VERSION 3 struct ocfs2_meta_lvb { __be32 lvb_version; @@ -40,7 +40,8 @@ struct ocfs2_meta_lvb { __be64 lvb_isize; __be16 lvb_imode; __be16 lvb_inlink; - __be32 lvb_reserved[3]; + __be32 lvb_iattr; + __be32 lvb_reserved[2]; }; /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a9559c874530..2bbfa17090cf 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -44,6 +44,7 @@ #include "file.h" #include "sysfile.h" #include "inode.h" +#include "ioctl.h" #include "journal.h" #include "mmap.h" #include "suballoc.h" @@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = { .open = ocfs2_file_open, .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write, + .ioctl = ocfs2_ioctl, }; const struct file_operations ocfs2_dops = { .read = generic_read_dir, .readdir = ocfs2_readdir, .fsync = ocfs2_sync_file, + .ioctl = ocfs2_ioctl, }; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 327a5b7b86ed..3f496c41fea8 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *fe_bh); +void ocfs2_set_inode_flags(struct inode *inode) +{ + unsigned int flags = OCFS2_I(inode)->ip_attr; + + inode->i_flags &= ~(S_IMMUTABLE | + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); + + if (flags & OCFS2_IMMUTABLE_FL) + inode->i_flags |= S_IMMUTABLE; + + if (flags & OCFS2_SYNC_FL) + inode->i_flags |= S_SYNC; + if (flags & OCFS2_APPEND_FL) + inode->i_flags |= S_APPEND; + if (flags & OCFS2_NOATIME_FL) + inode->i_flags |= S_NOATIME; + if (flags & OCFS2_DIRSYNC_FL) + inode->i_flags |= S_DIRSYNC; +} + struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, u64 blkno, int delete_vote) @@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_blocks = ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); inode->i_mapping->a_ops = &ocfs2_aops; - inode->i_flags |= S_NOATIME; inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); @@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; + OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); if (create_ino) inode->i_ino = ino_from_blkno(inode->i_sb, @@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, OCFS2_LOCK_TYPE_DATA, inode); + ocfs2_set_inode_flags(inode); + inode->i_flags |= S_NOATIME; + status = 0; bail: mlog_exit(status); @@ -1131,6 +1154,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle, spin_lock(&OCFS2_I(inode)->ip_lock); fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); + fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); spin_unlock(&OCFS2_I(inode)->ip_lock); fe->i_size = cpu_to_le64(i_size_read(inode)); @@ -1169,6 +1193,8 @@ void ocfs2_refresh_inode(struct inode *inode, spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); + OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); + ocfs2_set_inode_flags(inode); i_size_write(inode, le64_to_cpu(fe->i_size)); inode->i_nlink = le16_to_cpu(fe->i_links_count); inode->i_uid = le32_to_cpu(fe->i_uid); diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 35140f6cf840..4d1e53992566 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -56,6 +56,7 @@ struct ocfs2_inode_info struct ocfs2_journal_handle *ip_handle; u32 ip_flags; /* see below */ + u32 ip_attr; /* inode attributes */ /* protected by recovery_lock. */ struct inode *ip_next_orphan; @@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle, int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); +void ocfs2_set_inode_flags(struct inode *inode); + #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c new file mode 100644 index 000000000000..68f4806d3a35 --- /dev/null +++ b/fs/ocfs2/ioctl.c @@ -0,0 +1,134 @@ +/* + * linux/fs/ocfs2/ioctl.c + * + * Copyright (C) 2006 Herbert Poetzl + * adapted from Remy Card's ext2/ioctl.c + */ + +#include +#include + +#define MLOG_MASK_PREFIX ML_INODE +#include + +#include "ocfs2.h" +#include "alloc.h" +#include "dlmglue.h" +#include "inode.h" +#include "journal.h" + +#include "ocfs2_fs.h" +#include + +static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) +{ + int status; + + status = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (status < 0) { + mlog_errno(status); + return status; + } + *flags = OCFS2_I(inode)->ip_attr; + ocfs2_meta_unlock(inode, 0); + + mlog_exit(status); + return status; +} + +static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, + unsigned mask) +{ + struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_journal_handle *handle = NULL; + struct buffer_head *bh = NULL; + unsigned oldflags; + int status; + + mutex_lock(&inode->i_mutex); + + status = ocfs2_meta_lock(inode, NULL, &bh, 1); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + status = -EROFS; + if (IS_RDONLY(inode)) + goto bail_unlock; + + status = -EACCES; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + goto bail_unlock; + + if (!S_ISDIR(inode->i_mode)) + flags &= ~OCFS2_DIRSYNC_FL; + + handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } + + oldflags = ocfs2_inode->ip_attr; + flags = flags & mask; + flags |= oldflags & ~mask; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + status = -EPERM; + if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & + (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { + if (!capable(CAP_LINUX_IMMUTABLE)) + goto bail_unlock; + } + + ocfs2_inode->ip_attr = flags; + ocfs2_set_inode_flags(inode); + + status = ocfs2_mark_inode_dirty(handle, inode, bh); + if (status < 0) + mlog_errno(status); + + ocfs2_commit_trans(handle); +bail_unlock: + ocfs2_meta_unlock(inode, 1); +bail: + mutex_unlock(&inode->i_mutex); + + if (bh) + brelse(bh); + + mlog_exit(status); + return status; +} + +int ocfs2_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + unsigned int flags; + int status; + + switch (cmd) { + case OCFS2_IOC_GETFLAGS: + status = ocfs2_get_inode_attr(inode, &flags); + if (status < 0) + return status; + + flags &= OCFS2_FL_VISIBLE; + return put_user(flags, (int __user *) arg); + case OCFS2_IOC_SETFLAGS: + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + return ocfs2_set_inode_attr(inode, flags, + OCFS2_FL_MODIFIABLE); + default: + return -ENOTTY; + } +} + diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h new file mode 100644 index 000000000000..4a7c82931dba --- /dev/null +++ b/fs/ocfs2/ioctl.h @@ -0,0 +1,16 @@ +/* + * ioctl.h + * + * Function prototypes + * + * Copyright (C) 2006 Herbert Poetzl + * + */ + +#ifndef OCFS2_IOCTL_H +#define OCFS2_IOCTL_H + +int ocfs2_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); + +#endif /* OCFS2_IOCTL_H */ diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index c5b1ac547c15..3330a5dc6be2 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -114,6 +114,26 @@ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ +/* Inode attributes, keep in sync with EXT2 */ +#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ +#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ +#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ +#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ +#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ +#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ +#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ +#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ +#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ + +#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ +#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ + +/* + * ioctl commands + */ +#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) +#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) + /* * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) */ @@ -399,7 +419,9 @@ struct ocfs2_dinode { __le32 i_atime_nsec; __le32 i_ctime_nsec; __le32 i_mtime_nsec; -/*70*/ __le64 i_reserved1[9]; + __le32 i_attr; + __le32 i_reserved1; +/*70*/ __le64 i_reserved2[8]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ From 2d5625181fac18f572cbbd18878d28f5eebf4733 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 10 Jul 2006 01:32:51 +0200 Subject: [PATCH 0266/1063] [PATCH] fs/ocfs2/ioctl.c should #include "ioctl.h" Every file should #include the headers containing the prototypes for its global functions. Signed-off-by: Adrian Bunk Signed-off-by: Mark Fasheh --- fs/ocfs2/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 68f4806d3a35..3663cef80689 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -18,6 +18,8 @@ #include "journal.h" #include "ocfs2_fs.h" +#include "ioctl.h" + #include static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) From 471e3f57286da7ce8820ad42c77d5f5f49d56a41 Mon Sep 17 00:00:00 2001 From: Mathieu Avila Date: Wed, 13 Sep 2006 11:11:27 -0700 Subject: [PATCH 0267/1063] ocfs2: Fix heartbeat sector calculation This fixes things for devices which set max_sectors to 8. Signed-off-by: Mark Fasheh --- fs/ocfs2/cluster/heartbeat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 504595d6cf65..305cba3681fe 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev) max_pages = q->max_hw_segments; max_pages--; /* Handle I/Os that straddle a page */ - max_sectors = max_pages << (PAGE_SHIFT - 9); - + if (max_pages) { + max_sectors = max_pages << (PAGE_SHIFT - 9); + } else { + /* If BIO contains 1 or less than 1 page. */ + max_sectors = q->max_sectors; + } /* Why is fls() 1-based???? */ pow_two_sectors = 1 << (fls(max_sectors) - 1); From a663e30513d7ecc77dd71d474e7646bf78c0ba62 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 9 Aug 2006 11:45:07 -0700 Subject: [PATCH 0268/1063] ocfs2: move nlink check in ocfs2_mknod() The dir nlink check in ocfs2_mknod() was being done outside of the cluster lock, which means we could have been checking against a stale version of the inode. Fix this by doing the check after the cluster lock instead. Signed-off-by: Mark Fasheh --- fs/ocfs2/namei.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 0673862c8bdd..d8161a77c370 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -310,13 +310,6 @@ static int ocfs2_mknod(struct inode *dir, /* get our super block */ osb = OCFS2_SB(dir->i_sb); - if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { - mlog(ML_ERROR, "inode %llu has i_nlink of %u\n", - (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink); - status = -EMLINK; - goto leave; - } - handle = ocfs2_alloc_handle(osb); if (handle == NULL) { status = -ENOMEM; @@ -331,6 +324,11 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) { + status = -EMLINK; + goto leave; + } + dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; if (!dirfe->i_links_count) { /* can't make a file in a deleted directory. */ From 0f62de2c9ca60a35f63122e7ea992cee8aae4bef Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Thu, 31 Aug 2006 20:39:47 -0700 Subject: [PATCH 0269/1063] ocfs2: Fix directory link count checks in ocfs2_link() Remove the redundant "i_nlink >= OCFS2_LINK_MAX" check and adds an unlinked directory check in ocfs2_link(). Signed-off-by: Tiger Yang Signed-off-by: Mark Fasheh --- fs/ocfs2/namei.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d8161a77c370..24126476a8cc 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -641,11 +641,6 @@ static int ocfs2_link(struct dentry *old_dentry, goto bail; } - if (inode->i_nlink >= OCFS2_LINK_MAX) { - err = -EMLINK; - goto bail; - } - handle = ocfs2_alloc_handle(osb); if (handle == NULL) { err = -ENOMEM; @@ -659,6 +654,11 @@ static int ocfs2_link(struct dentry *old_dentry, goto bail; } + if (!dir->i_nlink) { + err = -ENOENT; + goto bail; + } + err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, dentry->d_name.len); if (err) From e0b4096d34fbd6b30838c417100c9d0ef73c71f2 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 11 Jul 2006 14:38:54 -0700 Subject: [PATCH 0270/1063] ocfs2: properly update i_mtime on buffered write We weren't always updating i_mtime on writes, so fix ocfs2_commit_write() to handle this. Signed-off-by: Mark Fasheh Acked-by: Zach Brown --- fs/ocfs2/aops.c | 83 ++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f1d1c342ce01..3d7c082a8f58 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -391,31 +391,28 @@ out: static int ocfs2_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - int ret, extending = 0, locklevel = 0; - loff_t new_i_size; + int ret; struct buffer_head *di_bh = NULL; struct inode *inode = page->mapping->host; struct ocfs2_journal_handle *handle = NULL; + struct ocfs2_dinode *di; mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); /* NOTE: ocfs2_file_aio_write has ensured that it's safe for - * us to sample inode->i_size here without the metadata lock: + * us to continue here without rechecking the I/O against + * changed inode values. * * 1) We're currently holding the inode alloc lock, so no * nodes can change it underneath us. * * 2) We've had to take the metadata lock at least once - * already to check for extending writes, hence insuring - * that our current copy is also up to date. + * already to check for extending writes, suid removal, etc. + * The meta data update code then ensures that we don't get a + * stale inode allocation image (i_size, i_clusters, etc). */ - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - if (new_i_size > i_size_read(inode)) { - extending = 1; - locklevel = 1; - } - ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page); + ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page); if (ret != 0) { mlog_errno(ret); goto out; @@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page, goto out_unlock_meta; } - if (extending) { - handle = ocfs2_start_walk_page_trans(inode, page, from, to); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - goto out_unlock_data; - } + handle = ocfs2_start_walk_page_trans(inode, page, from, to); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock_data; + } - /* Mark our buffer early. We'd rather catch this error up here - * as opposed to after a successful commit_write which would - * require us to set back inode->i_size. */ - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } + /* Mark our buffer early. We'd rather catch this error up here + * as opposed to after a successful commit_write which would + * require us to set back inode->i_size. */ + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out_commit; } /* might update i_size */ @@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page, goto out_commit; } - if (extending) { - loff_t size = (u64) i_size_read(inode); - struct ocfs2_dinode *di = - (struct ocfs2_dinode *)di_bh->b_data; + di = (struct ocfs2_dinode *)di_bh->b_data; - /* ocfs2_mark_inode_dirty is too heavy to use here. */ - inode->i_blocks = ocfs2_align_bytes_to_sectors(size); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; + /* ocfs2_mark_inode_dirty() is too heavy to use here. */ + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - di->i_size = cpu_to_le64(size); - di->i_ctime = di->i_mtime = - cpu_to_le64(inode->i_mtime.tv_sec); - di->i_ctime_nsec = di->i_mtime_nsec = - cpu_to_le32(inode->i_mtime.tv_nsec); + inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode))); + di->i_size = cpu_to_le64((u64)i_size_read(inode)); - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret < 0) { - mlog_errno(ret); - goto out_commit; - } + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) { + mlog_errno(ret); + goto out_commit; } - BUG_ON(extending && (i_size_read(inode) != new_i_size)); - out_commit: - if (handle) - ocfs2_commit_trans(handle); + ocfs2_commit_trans(handle); out_unlock_data: ocfs2_data_unlock(inode, 1); out_unlock_meta: - ocfs2_meta_unlock(inode, locklevel); + ocfs2_meta_unlock(inode, 1); out: if (di_bh) brelse(di_bh); From aa9588741db907785e4d92c8b768dd6c9077e6f0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 21 Apr 2006 13:49:02 -0700 Subject: [PATCH 0271/1063] ocfs2: implement directory read-ahead Uptodate.c now knows about read-ahead buffers. Use some more aggressive logic in ocfs2_readdir(). The two functions which currently use directory read-ahead are ocfs2_find_entry() and ocfs2_readdir(). Signed-off-by: Mark Fasheh --- fs/ocfs2/buffer_head_io.c | 93 ++++++++++++++++++++++++++++++--------- fs/ocfs2/buffer_head_io.h | 2 +- fs/ocfs2/dir.c | 28 +++++++----- fs/ocfs2/inode.c | 4 -- fs/ocfs2/namei.c | 10 ++--- fs/ocfs2/uptodate.c | 21 ++++++++- fs/ocfs2/uptodate.h | 2 + 7 files changed, 114 insertions(+), 46 deletions(-) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 9a24adf9be6e..c9037414f4f6 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", (unsigned long long)block, nr, flags, inode); + BUG_ON((flags & OCFS2_BH_READAHEAD) && + (!inode || !(flags & OCFS2_BH_CACHED))); + if (osb == NULL || osb->sb == NULL || bhs == NULL) { status = -EINVAL; mlog_errno(status); @@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, bh = bhs[i]; ignore_cache = 0; + /* There are three read-ahead cases here which we need to + * be concerned with. All three assume a buffer has + * previously been submitted with OCFS2_BH_READAHEAD + * and it hasn't yet completed I/O. + * + * 1) The current request is sync to disk. This rarely + * happens these days, and never when performance + * matters - the code can just wait on the buffer + * lock and re-submit. + * + * 2) The current request is cached, but not + * readahead. ocfs2_buffer_uptodate() will return + * false anyway, so we'll wind up waiting on the + * buffer lock to do I/O. We re-check the request + * with after getting the lock to avoid a re-submit. + * + * 3) The current request is readahead (and so must + * also be a caching one). We short circuit if the + * buffer is locked (under I/O) and if it's in the + * uptodate cache. The re-check from #2 catches the + * case that the previous read-ahead completes just + * before our is-it-in-flight check. + */ + if (flags & OCFS2_BH_CACHED && !ocfs2_buffer_uptodate(inode, bh)) { mlog(ML_UPTODATE, @@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; } + /* A read-ahead request was made - if the + * buffer is already under read-ahead from a + * previously submitted request than we are + * done here. */ + if ((flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_read_ahead(inode, bh)) + continue; + lock_buffer(bh); if (buffer_jbd(bh)) { #ifdef CATCH_BH_JBD_RACES @@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, continue; #endif } + + /* Re-check ocfs2_buffer_uptodate() as a + * previously read-ahead buffer may have + * completed I/O while we were waiting for the + * buffer lock. */ + if ((flags & OCFS2_BH_CACHED) + && !(flags & OCFS2_BH_READAHEAD) + && ocfs2_buffer_uptodate(inode, bh)) { + unlock_buffer(bh); + continue; + } + clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - if (flags & OCFS2_BH_READAHEAD) - submit_bh(READA, bh); - else - submit_bh(READ, bh); + submit_bh(READ, bh); continue; } } @@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; - /* We know this can't have changed as we hold the - * inode sem. Avoid doing any work on the bh if the - * journal has it. */ - if (!buffer_jbd(bh)) - wait_on_buffer(bh); + if (!(flags & OCFS2_BH_READAHEAD)) { + /* We know this can't have changed as we hold the + * inode sem. Avoid doing any work on the bh if the + * journal has it. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - /* Status won't be cleared from here on out, - * so we can safely record this and loop back - * to cleanup the other buffers. Don't need to - * remove the clustered uptodate information - * for this bh as it's not marked locally - * uptodate. */ - status = -EIO; - brelse(bh); - bhs[i] = NULL; - continue; + if (!buffer_uptodate(bh)) { + /* Status won't be cleared from here on out, + * so we can safely record this and loop back + * to cleanup the other buffers. Don't need to + * remove the clustered uptodate information + * for this bh as it's not marked locally + * uptodate. */ + status = -EIO; + brelse(bh); + bhs[i] = NULL; + continue; + } } + /* Always set the buffer in the cache, even if it was + * a forced read, or read-ahead which hasn't yet + * completed. */ if (inode) ocfs2_set_buffer_uptodate(inode, bh); } if (inode) mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); - mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", + mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", (unsigned long long)block, nr, - (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); + (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); bail: diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 6ecb90937b68..6cc20930fac3 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, #define OCFS2_BH_CACHED 1 -#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */ +#define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, struct buffer_head **bh, int flags, diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 3d494d1a5f36..04e01915b86e 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; - unsigned long offset, blk; - int i, num, stored; + unsigned long offset, blk, last_ra_blk = 0; + int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_dentry->d_inode; struct super_block * sb = inode->i_sb; - int have_disk_lock = 0; + unsigned int ra_sectors = 16; mlog_entry("dirino=%llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; - goto bail; + goto bail_nolock; } - have_disk_lock = 1; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) continue; } - /* - * Do the readahead (8k) - */ - if (!offset) { - for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; + /* The idea here is to begin with 8k read-ahead and to stay + * 4k ahead of our current position. + * + * TODO: Use the pagecache for this. We just need to + * make sure it's cluster-safe... */ + if (!last_ra_blk + || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { + for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } + last_ra_blk = blk; + ra_sectors = 8; } revalidate: @@ -194,9 +198,9 @@ revalidate: stored = 0; bail: - if (have_disk_lock) - ocfs2_meta_unlock(inode, 0); + ocfs2_meta_unlock(inode, 0); +bail_nolock: mlog_exit(stored); return stored; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 3f496c41fea8..7bcf69154592 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode, u64 p_blkno; int readflags = OCFS2_BH_CACHED; -#if 0 - /* only turn this on if we know we can deal with read_block - * returning nothing */ if (reada) readflags |= OCFS2_BH_READAHEAD; -#endif if (((u64)block << inode->i_sb->s_blocksize_bits) >= i_size_read(inode)) { diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 24126476a8cc..0d3e939b1f56 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -56,6 +56,7 @@ #include "journal.h" #include "namei.h" #include "suballoc.h" +#include "super.h" #include "symlink.h" #include "sysfile.h" #include "uptodate.h" @@ -1962,13 +1963,8 @@ restart: } num++; - /* XXX: questionable readahead stuff here */ bh = ocfs2_bread(dir, b++, &err, 1); bh_use[ra_max] = bh; -#if 0 // ??? - if (bh) - ll_rw_block(READ, 1, &bh); -#endif } } if ((bh = bh_use[ra_ptr++]) == NULL) @@ -1976,6 +1972,10 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ + ocfs2_error(dir->i_sb, "reading directory %llu, " + "offset %lu\n", + (unsigned long long)OCFS2_I(dir)->ip_blkno, + block); brelse(bh); goto next; } diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index b8a00a793326..9707ed7a3206 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, } /* Warning: even if it returns true, this does *not* guarantee that - * the block is stored in our inode metadata cache. */ + * the block is stored in our inode metadata cache. + * + * This can be called under lock_buffer() + */ int ocfs2_buffer_uptodate(struct inode *inode, struct buffer_head *bh) { @@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode, return ocfs2_buffer_cached(OCFS2_I(inode), bh); } +/* + * Determine whether a buffer is currently out on a read-ahead request. + * ip_io_sem should be held to serialize submitters with the logic here. + */ +int ocfs2_buffer_read_ahead(struct inode *inode, + struct buffer_head *bh) +{ + return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh); +} + /* Requires ip_lock */ static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, sector_t block) @@ -403,7 +416,11 @@ out_free: * * Note that this function may actually fail to insert the block if * memory cannot be allocated. This is not fatal however (but may - * result in a performance penalty) */ + * result in a performance penalty) + * + * Readahead buffers can be passed in here before the I/O request is + * completed. + */ void ocfs2_set_buffer_uptodate(struct inode *inode, struct buffer_head *bh) { diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 01cd32d26b06..2e73206059a8 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h @@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, struct buffer_head *bh); void ocfs2_remove_from_cache(struct inode *inode, struct buffer_head *bh); +int ocfs2_buffer_read_ahead(struct inode *inode, + struct buffer_head *bh); #endif /* OCFS2_UPTODATE_H */ From 02ed8416fe5b7e33b5bbf2d73f9af1d316806822 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 14 Sep 2006 10:28:06 -0700 Subject: [PATCH 0272/1063] ocfs2: Remove EXPERIMENTAL dependency Things have been working pretty well for a while now. We should've probably done this at least one kernel revision ago, but it doesn't hurt to be paranoid. Signed-off-by: Mark Fasheh --- fs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 3f00a9faabcb..530581628311 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -325,8 +325,8 @@ config FS_POSIX_ACL source "fs/xfs/Kconfig" config OCFS2_FS - tristate "OCFS2 file system support (EXPERIMENTAL)" - depends on NET && SYSFS && EXPERIMENTAL + tristate "OCFS2 file system support" + depends on NET && SYSFS select CONFIGFS_FS select JBD select CRC32 From f12033d206ea48928d8124cdd5d35d8008c18935 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 13 Sep 2006 18:57:57 -0700 Subject: [PATCH 0273/1063] ocfs2: Don't print on unknown remote blocking call Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmast.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 42775e2bbe2c..f13a4bac41f0 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) goto do_ast; } - mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, " - "name=%.*s, namelen=%u\n", - past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " + "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", + dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie), + locklen, name, locklen); ret = DLM_NORMAL; unlock_out: From eb35746ca5e2211569b91ebb44d55b88ec91f3b0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 9 Aug 2006 13:23:08 -0700 Subject: [PATCH 0274/1063] ocfs2: Remove overzealous BUG_ON() The truncate code was never supposed to BUG() on an allocator it doesn't know about, but rather to ignore it. Right now, this does nothing, but when we change our allocation paths to use all suballocator files, this will allow current versions of the fs module to work fine. Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index edaab05a93e0..f43bc5f18a35 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, ocfs2_remove_from_cache(inode, eb_bh); - BUG_ON(eb->h_suballoc_slot); BUG_ON(el->l_recs[0].e_clusters); BUG_ON(el->l_recs[0].e_cpos); BUG_ON(el->l_recs[0].e_blkno); - status = ocfs2_free_extent_block(handle, - tc->tc_ext_alloc_inode, - tc->tc_ext_alloc_bh, - eb); - if (status < 0) { - mlog_errno(status); - goto bail; + if (eb->h_suballoc_slot == 0) { + /* + * This code only understands how to + * lock the suballocator in slot 0, + * which is fine because allocation is + * only ever done out of that + * suballocator too. A future version + * might change that however, so avoid + * a free if we don't know how to + * handle it. This way an fs incompat + * bit will not be necessary. + */ + status = ocfs2_free_extent_block(handle, + tc->tc_ext_alloc_inode, + tc->tc_ext_alloc_bh, + eb); + if (status < 0) { + mlog_errno(status); + goto bail; + } } } brelse(eb_bh); From 799111020c66c41aef621a3b53ad112543754124 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:03:52 +1000 Subject: [PATCH 0275/1063] [CRYPTO] api: Fixed crypto_tfm context alignment Previously the __aligned__ attribute was added to the crypto_tfm context member to ensure it is alinged correctly on architectures such as arm. Unfortunately kmalloc does not use the same minimum alignment rules as gcc so this is useless. This patch changes it to use kmalloc's minimum alignment. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7f946241b879..cb1e6631b132 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -21,8 +21,9 @@ #include #include #include +#include #include -#include +#include /* * Algorithm masks and types. @@ -61,6 +62,26 @@ #define CRYPTO_DIR_ENCRYPT 1 #define CRYPTO_DIR_DECRYPT 0 +/* + * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual + * declaration) is used to ensure that the crypto_tfm context structure is + * aligned correctly for the given architecture so that there are no alignment + * faults for C data types. In particular, this is required on platforms such + * as arm where pointers are 32-bit aligned but there are data types such as + * u64 which require 64-bit alignment. + */ +#if defined(ARCH_KMALLOC_MINALIGN) +#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN +#elif defined(ARCH_SLAB_MINALIGN) +#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN +#endif + +#ifdef CRYPTO_MINALIGN +#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) +#else +#define CRYPTO_MINALIGN_ATTR +#endif + struct scatterlist; struct crypto_tfm; @@ -231,7 +252,7 @@ struct crypto_tfm { struct crypto_alg *__crt_alg; - char __crt_ctx[] __attribute__ ((__aligned__)); + void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; /* From 2729bb427f686e47970406d6bde6b11892885f29 Mon Sep 17 00:00:00 2001 From: Joachim Fritschi Date: Tue, 20 Jun 2006 20:37:23 +1000 Subject: [PATCH 0276/1063] [CRYPTO] twofish: Split out common c code This patch splits up the twofish crypto routine into a common part ( key setup ) which will be uses by all twofish crypto modules ( generic-c , i586 assembler and x86_64 assembler ) and generic-c part. It also creates a new header file which will be used by all 3 modules. This eliminates all code duplication. Correctness was verified with the tcrypt module and automated test scripts. Signed-off-by: Joachim Fritschi Signed-off-by: Herbert Xu --- crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/twofish.c | 698 +----------------------------------- crypto/twofish_common.c | 744 +++++++++++++++++++++++++++++++++++++++ include/crypto/twofish.h | 23 ++ 5 files changed, 777 insertions(+), 697 deletions(-) create mode 100644 crypto/twofish_common.c create mode 100644 include/crypto/twofish.h diff --git a/crypto/Kconfig b/crypto/Kconfig index ba133d557045..5472f693e6ec 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -131,6 +131,7 @@ config CRYPTO_BLOWFISH config CRYPTO_TWOFISH tristate "Twofish cipher algorithm" depends on CRYPTO + select CRYPTO_TWOFISH_COMMON help Twofish cipher algorithm. @@ -142,6 +143,13 @@ config CRYPTO_TWOFISH See also: +config CRYPTO_TWOFISH_COMMON + tristate + depends on CRYPTO + help + Common parts of the Twofish cipher algorithm shared by the + generic c and the assembler implementations. + config CRYPTO_SERPENT tristate "Serpent cipher algorithm" depends on CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index d287b9e60c47..fe934f1001c6 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_DES) += des.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o +obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o obj-$(CONFIG_CRYPTO_AES) += aes.o obj-$(CONFIG_CRYPTO_CAST5) += cast5.o diff --git a/crypto/twofish.c b/crypto/twofish.c index ec2488242e2d..e3b3a0a6cb4d 100644 --- a/crypto/twofish.c +++ b/crypto/twofish.c @@ -39,6 +39,7 @@ */ #include +#include #include #include #include @@ -46,534 +47,6 @@ #include #include - -/* The large precomputed tables for the Twofish cipher (twofish.c) - * Taken from the same source as twofish.c - * Marc Mutz - */ - -/* These two tables are the q0 and q1 permutations, exactly as described in - * the Twofish paper. */ - -static const u8 q0[256] = { - 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, - 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, - 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, - 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, - 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, - 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, - 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, - 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, - 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, - 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, - 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, - 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, - 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, - 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, - 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, - 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, - 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, - 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, - 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, - 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, - 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, - 0x4A, 0x5E, 0xC1, 0xE0 -}; - -static const u8 q1[256] = { - 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, - 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, - 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, - 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, - 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, - 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, - 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, - 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, - 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, - 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, - 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, - 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, - 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, - 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, - 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, - 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, - 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, - 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, - 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, - 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, - 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, - 0x55, 0x09, 0xBE, 0x91 -}; - -/* These MDS tables are actually tables of MDS composed with q0 and q1, - * because it is only ever used that way and we can save some time by - * precomputing. Of course the main saving comes from precomputing the - * GF(2^8) multiplication involved in the MDS matrix multiply; by looking - * things up in these tables we reduce the matrix multiply to four lookups - * and three XORs. Semi-formally, the definition of these tables is: - * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T - * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T - * where ^T means "transpose", the matrix multiply is performed in GF(2^8) - * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described - * by Schneier et al, and I'm casually glossing over the byte/word - * conversion issues. */ - -static const u32 mds[4][256] = { - {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, - 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, - 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, - 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, - 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, - 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, - 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, - 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, - 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, - 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, - 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, - 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, - 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, - 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, - 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, - 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, - 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, - 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, - 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, - 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, - 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, - 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, - 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, - 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, - 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, - 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, - 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, - 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, - 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, - 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, - 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, - 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, - 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, - 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, - 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, - 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, - 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, - 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, - 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, - 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, - 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, - 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, - 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91}, - - {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, - 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, - 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, - 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, - 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, - 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, - 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, - 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, - 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, - 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, - 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, - 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, - 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, - 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, - 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, - 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, - 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, - 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, - 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, - 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, - 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, - 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, - 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, - 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, - 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, - 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, - 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, - 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, - 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, - 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, - 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, - 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, - 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, - 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, - 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, - 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, - 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, - 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, - 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, - 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, - 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, - 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, - 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8}, - - {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, - 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, - 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, - 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, - 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, - 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, - 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, - 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, - 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, - 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, - 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, - 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, - 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, - 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, - 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, - 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, - 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, - 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, - 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, - 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, - 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, - 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, - 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, - 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, - 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, - 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, - 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, - 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, - 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, - 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, - 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, - 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, - 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, - 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, - 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, - 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, - 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, - 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, - 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, - 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, - 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, - 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, - 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF}, - - {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, - 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, - 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, - 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, - 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, - 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, - 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, - 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, - 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, - 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, - 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, - 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, - 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, - 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, - 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, - 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, - 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, - 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, - 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, - 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, - 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, - 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, - 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, - 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, - 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, - 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, - 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, - 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, - 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, - 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, - 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, - 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, - 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, - 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, - 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, - 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, - 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, - 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, - 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, - 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, - 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, - 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, - 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8} -}; - -/* The exp_to_poly and poly_to_exp tables are used to perform efficient - * operations in GF(2^8) represented as GF(2)[x]/w(x) where - * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the - * definition of the RS matrix in the key schedule. Elements of that field - * are polynomials of degree not greater than 7 and all coefficients 0 or 1, - * which can be represented naturally by bytes (just substitute x=2). In that - * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8) - * multiplication is inefficient without hardware support. To multiply - * faster, I make use of the fact x is a generator for the nonzero elements, - * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for - * some n in 0..254. Note that that caret is exponentiation in GF(2^8), - * *not* polynomial notation. So if I want to compute pq where p and q are - * in GF(2^8), I can just say: - * 1. if p=0 or q=0 then pq=0 - * 2. otherwise, find m and n such that p=x^m and q=x^n - * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq - * The translations in steps 2 and 3 are looked up in the tables - * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this - * in action, look at the CALC_S macro. As additional wrinkles, note that - * one of my operands is always a constant, so the poly_to_exp lookup on it - * is done in advance; I included the original values in the comments so - * readers can have some chance of recognizing that this *is* the RS matrix - * from the Twofish paper. I've only included the table entries I actually - * need; I never do a lookup on a variable input of zero and the biggest - * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll - * never sum to more than 491. I'm repeating part of the exp_to_poly table - * so that I don't have to do mod-255 reduction in the exponent arithmetic. - * Since I know my constant operands are never zero, I only have to worry - * about zero values in the variable operand, and I do it with a simple - * conditional branch. I know conditionals are expensive, but I couldn't - * see a non-horrible way of avoiding them, and I did manage to group the - * statements so that each if covers four group multiplications. */ - -static const u8 poly_to_exp[255] = { - 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, - 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, - 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, - 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, - 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, - 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, - 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, - 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, - 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, - 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, - 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, - 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, - 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, - 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, - 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, - 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, - 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, - 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, - 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, - 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, - 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, - 0x85, 0xC8, 0xA1 -}; - -static const u8 exp_to_poly[492] = { - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, - 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, - 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, - 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, - 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, - 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, - 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, - 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, - 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, - 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, - 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, - 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, - 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, - 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, - 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, - 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, - 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, - 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, - 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, - 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, - 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, - 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, - 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, - 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, - 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, - 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, - 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, - 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, - 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, - 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, - 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, - 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, - 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, - 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, - 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, - 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, - 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, - 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, - 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, - 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, - 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB -}; - - -/* The table constants are indices of - * S-box entries, preprocessed through q0 and q1. */ -static const u8 calc_sb_tbl[512] = { - 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4, - 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8, - 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B, - 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B, - 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD, - 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1, - 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B, - 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F, - 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B, - 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D, - 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E, - 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5, - 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14, - 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3, - 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54, - 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51, - 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A, - 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96, - 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10, - 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C, - 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7, - 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70, - 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB, - 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8, - 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF, - 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC, - 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF, - 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2, - 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82, - 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9, - 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97, - 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17, - 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D, - 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3, - 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C, - 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E, - 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F, - 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49, - 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21, - 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9, - 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD, - 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01, - 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F, - 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48, - 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E, - 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19, - 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57, - 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64, - 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE, - 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5, - 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44, - 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69, - 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15, - 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E, - 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34, - 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC, - 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B, - 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB, - 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52, - 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9, - 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4, - 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2, - 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, - 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 -}; - -/* Macro to perform one column of the RS matrix multiplication. The - * parameters a, b, c, and d are the four bytes of output; i is the index - * of the key bytes, and w, x, y, and z, are the column of constants from - * the RS matrix, preprocessed through the poly_to_exp table. */ - -#define CALC_S(a, b, c, d, i, w, x, y, z) \ - if (key[i]) { \ - tmp = poly_to_exp[key[i] - 1]; \ - (a) ^= exp_to_poly[tmp + (w)]; \ - (b) ^= exp_to_poly[tmp + (x)]; \ - (c) ^= exp_to_poly[tmp + (y)]; \ - (d) ^= exp_to_poly[tmp + (z)]; \ - } - -/* Macros to calculate the key-dependent S-boxes for a 128-bit key using - * the S vector from CALC_S. CALC_SB_2 computes a single entry in all - * four S-boxes, where i is the index of the entry to compute, and a and b - * are the index numbers preprocessed through the q0 and q1 tables - * respectively. */ - -#define CALC_SB_2(i, a, b) \ - ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \ - ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \ - ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \ - ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh] - -/* Macro exactly like CALC_SB_2, but for 192-bit keys. */ - -#define CALC_SB192_2(i, a, b) \ - ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \ - ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \ - ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \ - ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl]; - -/* Macro exactly like CALC_SB_2, but for 256-bit keys. */ - -#define CALC_SB256_2(i, a, b) \ - ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \ - ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \ - ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \ - ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp]; - -/* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the - * last two stages of the h() function for a given index (either 2i or 2i+1). - * a, b, c, and d are the four bytes going into the last two stages. For - * 128-bit keys, this is the entire h() function and a and c are the index - * preprocessed through q0 and q1 respectively; for longer keys they are the - * output of previous stages. j is the index of the first key byte to use. - * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 - * twice, doing the Pseudo-Hadamard Transform, and doing the necessary - * rotations. Its parameters are: a, the array to write the results into, - * j, the index of the first output entry, k and l, the preprocessed indices - * for index 2i, and m and n, the preprocessed indices for index 2i+1. - * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an - * additional lookup-and-XOR stage. The parameters a, b, c and d are the - * four bytes going into the last three stages. For 192-bit keys, c = d - * are the index preprocessed through q0, and a = b are the index - * preprocessed through q1; j is the index of the first key byte to use. - * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro - * instead of CALC_K_2. - * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an - * additional lookup-and-XOR stage. The parameters a and b are the index - * preprocessed through q0 and q1 respectively; j is the index of the first - * key byte to use. CALC_K256 is identical to CALC_K but for using the - * CALC_K256_2 macro instead of CALC_K_2. */ - -#define CALC_K_2(a, b, c, d, j) \ - mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \ - ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \ - ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \ - ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]] - -#define CALC_K(a, j, k, l, m, n) \ - x = CALC_K_2 (k, l, k, l, 0); \ - y = CALC_K_2 (m, n, m, n, 4); \ - y = rol32(y, 8); \ - x += y; y += x; ctx->a[j] = x; \ - ctx->a[(j) + 1] = rol32(y, 9) - -#define CALC_K192_2(a, b, c, d, j) \ - CALC_K_2 (q0[a ^ key[(j) + 16]], \ - q1[b ^ key[(j) + 17]], \ - q0[c ^ key[(j) + 18]], \ - q1[d ^ key[(j) + 19]], j) - -#define CALC_K192(a, j, k, l, m, n) \ - x = CALC_K192_2 (l, l, k, k, 0); \ - y = CALC_K192_2 (n, n, m, m, 4); \ - y = rol32(y, 8); \ - x += y; y += x; ctx->a[j] = x; \ - ctx->a[(j) + 1] = rol32(y, 9) - -#define CALC_K256_2(a, b, j) \ - CALC_K192_2 (q1[b ^ key[(j) + 24]], \ - q1[a ^ key[(j) + 25]], \ - q0[a ^ key[(j) + 26]], \ - q0[b ^ key[(j) + 27]], j) - -#define CALC_K256(a, j, k, l, m, n) \ - x = CALC_K256_2 (k, l, 0); \ - y = CALC_K256_2 (m, n, 4); \ - y = rol32(y, 8); \ - x += y; y += x; ctx->a[j] = x; \ - ctx->a[(j) + 1] = rol32(y, 9) - - /* Macros to compute the g() function in the encryption and decryption * rounds. G1 is the straight g() function; G2 includes the 8-bit * rotation for the high 32-bit word. */ @@ -630,176 +103,7 @@ static const u8 calc_sb_tbl[512] = { x ^= ctx->w[m]; \ dst[n] = cpu_to_le32(x) -#define TF_MIN_KEY_SIZE 16 -#define TF_MAX_KEY_SIZE 32 -#define TF_BLOCK_SIZE 16 -/* Structure for an expanded Twofish key. s contains the key-dependent - * S-boxes composed with the MDS matrix; w contains the eight "whitening" - * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note - * that k[i] corresponds to what the Twofish paper calls K[i+8]. */ -struct twofish_ctx { - u32 s[4][256], w[8], k[32]; -}; - -/* Perform the key setup. */ -static int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len, u32 *flags) -{ - - struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); - - int i, j, k; - - /* Temporaries for CALC_K. */ - u32 x, y; - - /* The S vector used to key the S-boxes, split up into individual bytes. - * 128-bit keys use only sa through sh; 256-bit use all of them. */ - u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0; - u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; - - /* Temporary for CALC_S. */ - u8 tmp; - - /* Check key length. */ - if (key_len != 16 && key_len != 24 && key_len != 32) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; /* unsupported key length */ - } - - /* Compute the first two words of the S vector. The magic numbers are - * the entries of the RS matrix, preprocessed through poly_to_exp. The - * numbers in the comments are the original (polynomial form) matrix - * entries. */ - CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ - CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ - CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ - CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ - CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ - CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ - CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ - CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ - CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ - CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ - CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ - CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ - CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ - CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ - CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ - CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ - - if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */ - /* Calculate the third word of the S vector */ - CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ - CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ - CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ - CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ - CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ - CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ - CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ - CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ - } - - if (key_len == 32) { /* 256-bit key */ - /* Calculate the fourth word of the S vector */ - CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ - CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ - CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ - CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ - CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ - CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ - CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ - CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ - - /* Compute the S-boxes. */ - for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { - CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); - } - - /* Calculate whitening and round subkeys. The constants are - * indices of subkeys, preprocessed through q0 and q1. */ - CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3); - CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); - CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); - CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); - CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); - CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B); - CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); - CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); - CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); - CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD); - CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71); - CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); - CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F); - CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B); - CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA); - CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F); - CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); - CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); - CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00); - CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); - } else if (key_len == 24) { /* 192-bit key */ - /* Compute the S-boxes. */ - for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { - CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); - } - - /* Calculate whitening and round subkeys. The constants are - * indices of subkeys, preprocessed through q0 and q1. */ - CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3); - CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); - CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); - CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); - CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); - CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B); - CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); - CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); - CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); - CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD); - CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71); - CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); - CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F); - CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B); - CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA); - CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F); - CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); - CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); - CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00); - CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); - } else { /* 128-bit key */ - /* Compute the S-boxes. */ - for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { - CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); - } - - /* Calculate whitening and round subkeys. The constants are - * indices of subkeys, preprocessed through q0 and q1. */ - CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3); - CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); - CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B); - CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8); - CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3); - CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B); - CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D); - CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B); - CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32); - CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD); - CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71); - CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); - CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F); - CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B); - CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA); - CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F); - CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); - CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B); - CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00); - CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D); - } - - return 0; -} /* Encrypt one block. in and out may be the same. */ static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c new file mode 100644 index 000000000000..1ae0280c2513 --- /dev/null +++ b/crypto/twofish_common.c @@ -0,0 +1,744 @@ +/* + * Common Twofish algorithm parts shared between the c and assembler + * implementations + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The large precomputed tables for the Twofish cipher (twofish.c) + * Taken from the same source as twofish.c + * Marc Mutz + */ + +/* These two tables are the q0 and q1 permutations, exactly as described in + * the Twofish paper. */ + +static const u8 q0[256] = { + 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, + 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, + 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, + 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, + 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, + 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, + 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, + 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, + 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, + 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, + 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, + 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, + 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, + 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, + 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, + 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, + 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, + 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, + 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, + 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, + 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, + 0x4A, 0x5E, 0xC1, 0xE0 +}; + +static const u8 q1[256] = { + 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, + 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, + 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, + 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, + 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, + 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, + 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, + 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, + 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, + 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, + 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, + 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, + 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, + 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, + 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, + 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, + 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, + 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, + 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, + 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, + 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, + 0x55, 0x09, 0xBE, 0x91 +}; + +/* These MDS tables are actually tables of MDS composed with q0 and q1, + * because it is only ever used that way and we can save some time by + * precomputing. Of course the main saving comes from precomputing the + * GF(2^8) multiplication involved in the MDS matrix multiply; by looking + * things up in these tables we reduce the matrix multiply to four lookups + * and three XORs. Semi-formally, the definition of these tables is: + * mds[0][i] = MDS (q1[i] 0 0 0)^T mds[1][i] = MDS (0 q0[i] 0 0)^T + * mds[2][i] = MDS (0 0 q1[i] 0)^T mds[3][i] = MDS (0 0 0 q0[i])^T + * where ^T means "transpose", the matrix multiply is performed in GF(2^8) + * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described + * by Schneier et al, and I'm casually glossing over the byte/word + * conversion issues. */ + +static const u32 mds[4][256] = { + { + 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, + 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, + 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, + 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, + 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, + 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, + 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, + 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, + 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, + 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, + 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, + 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, + 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, + 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, + 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, + 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, + 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, + 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, + 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, + 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, + 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, + 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, + 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, + 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, + 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, + 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, + 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, + 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, + 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, + 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, + 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, + 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, + 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, + 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, + 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, + 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, + 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, + 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, + 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, + 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, + 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, + 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, + 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91}, + + { + 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, + 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, + 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, + 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, + 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, + 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, + 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, + 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, + 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, + 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, + 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, + 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, + 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, + 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, + 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, + 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, + 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, + 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, + 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, + 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, + 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, + 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, + 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, + 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, + 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, + 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, + 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, + 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, + 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, + 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, + 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, + 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, + 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, + 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, + 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, + 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, + 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, + 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, + 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, + 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, + 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, + 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, + 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8}, + + { + 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, + 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, + 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, + 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, + 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, + 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, + 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, + 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, + 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, + 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, + 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, + 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, + 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, + 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, + 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, + 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, + 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, + 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, + 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, + 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, + 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, + 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, + 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, + 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, + 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, + 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, + 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, + 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, + 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, + 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, + 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, + 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, + 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, + 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, + 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, + 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, + 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, + 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, + 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, + 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, + 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, + 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, + 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF}, + + { + 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, + 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, + 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, + 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, + 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, + 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, + 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, + 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, + 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, + 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, + 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, + 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, + 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, + 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, + 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, + 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, + 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, + 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, + 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, + 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, + 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, + 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, + 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, + 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, + 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, + 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, + 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, + 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, + 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, + 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, + 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, + 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, + 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, + 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, + 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, + 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, + 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, + 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, + 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, + 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, + 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, + 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, + 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8} +}; + +/* The exp_to_poly and poly_to_exp tables are used to perform efficient + * operations in GF(2^8) represented as GF(2)[x]/w(x) where + * w(x)=x^8+x^6+x^3+x^2+1. We care about doing that because it's part of the + * definition of the RS matrix in the key schedule. Elements of that field + * are polynomials of degree not greater than 7 and all coefficients 0 or 1, + * which can be represented naturally by bytes (just substitute x=2). In that + * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8) + * multiplication is inefficient without hardware support. To multiply + * faster, I make use of the fact x is a generator for the nonzero elements, + * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for + * some n in 0..254. Note that that caret is exponentiation in GF(2^8), + * *not* polynomial notation. So if I want to compute pq where p and q are + * in GF(2^8), I can just say: + * 1. if p=0 or q=0 then pq=0 + * 2. otherwise, find m and n such that p=x^m and q=x^n + * 3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq + * The translations in steps 2 and 3 are looked up in the tables + * poly_to_exp (for step 2) and exp_to_poly (for step 3). To see this + * in action, look at the CALC_S macro. As additional wrinkles, note that + * one of my operands is always a constant, so the poly_to_exp lookup on it + * is done in advance; I included the original values in the comments so + * readers can have some chance of recognizing that this *is* the RS matrix + * from the Twofish paper. I've only included the table entries I actually + * need; I never do a lookup on a variable input of zero and the biggest + * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll + * never sum to more than 491. I'm repeating part of the exp_to_poly table + * so that I don't have to do mod-255 reduction in the exponent arithmetic. + * Since I know my constant operands are never zero, I only have to worry + * about zero values in the variable operand, and I do it with a simple + * conditional branch. I know conditionals are expensive, but I couldn't + * see a non-horrible way of avoiding them, and I did manage to group the + * statements so that each if covers four group multiplications. */ + +static const u8 poly_to_exp[255] = { + 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, + 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, + 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, + 0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B, + 0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47, + 0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D, + 0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8, + 0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C, + 0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83, + 0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48, + 0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26, + 0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E, + 0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3, + 0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9, + 0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A, + 0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D, + 0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75, + 0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84, + 0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64, + 0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49, + 0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF, + 0x85, 0xC8, 0xA1 +}; + +static const u8 exp_to_poly[492] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, + 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, + 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, + 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A, + 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63, + 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C, + 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07, + 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88, + 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12, + 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7, + 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C, + 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8, + 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25, + 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A, + 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE, + 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC, + 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, + 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, + 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89, + 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB, + 0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1, + 0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, + 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, + 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, + 0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, + 0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, + 0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, + 0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, + 0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, + 0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, + 0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, + 0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, + 0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, + 0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, + 0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, + 0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, + 0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, + 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, + 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, + 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, + 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB +}; + + +/* The table constants are indices of + * S-box entries, preprocessed through q0 and q1. */ +static const u8 calc_sb_tbl[512] = { + 0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4, + 0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8, + 0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B, + 0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B, + 0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD, + 0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1, + 0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B, + 0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F, + 0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B, + 0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D, + 0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E, + 0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5, + 0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14, + 0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3, + 0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54, + 0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51, + 0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A, + 0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96, + 0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10, + 0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C, + 0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7, + 0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70, + 0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB, + 0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8, + 0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF, + 0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC, + 0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF, + 0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2, + 0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82, + 0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9, + 0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97, + 0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17, + 0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D, + 0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3, + 0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C, + 0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E, + 0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F, + 0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49, + 0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21, + 0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9, + 0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD, + 0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01, + 0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F, + 0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48, + 0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E, + 0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19, + 0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57, + 0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64, + 0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE, + 0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5, + 0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44, + 0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69, + 0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15, + 0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E, + 0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34, + 0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC, + 0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B, + 0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB, + 0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52, + 0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9, + 0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4, + 0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2, + 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, + 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 +}; + +/* Macro to perform one column of the RS matrix multiplication. The + * parameters a, b, c, and d are the four bytes of output; i is the index + * of the key bytes, and w, x, y, and z, are the column of constants from + * the RS matrix, preprocessed through the poly_to_exp table. */ + +#define CALC_S(a, b, c, d, i, w, x, y, z) \ + if (key[i]) { \ + tmp = poly_to_exp[key[i] - 1]; \ + (a) ^= exp_to_poly[tmp + (w)]; \ + (b) ^= exp_to_poly[tmp + (x)]; \ + (c) ^= exp_to_poly[tmp + (y)]; \ + (d) ^= exp_to_poly[tmp + (z)]; \ + } + +/* Macros to calculate the key-dependent S-boxes for a 128-bit key using + * the S vector from CALC_S. CALC_SB_2 computes a single entry in all + * four S-boxes, where i is the index of the entry to compute, and a and b + * are the index numbers preprocessed through the q0 and q1 tables + * respectively. */ + +#define CALC_SB_2(i, a, b) \ + ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \ + ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \ + ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \ + ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh] + +/* Macro exactly like CALC_SB_2, but for 192-bit keys. */ + +#define CALC_SB192_2(i, a, b) \ + ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \ + ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \ + ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \ + ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl]; + +/* Macro exactly like CALC_SB_2, but for 256-bit keys. */ + +#define CALC_SB256_2(i, a, b) \ + ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \ + ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \ + ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \ + ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp]; + +/* Macros to calculate the whitening and round subkeys. CALC_K_2 computes the + * last two stages of the h() function for a given index (either 2i or 2i+1). + * a, b, c, and d are the four bytes going into the last two stages. For + * 128-bit keys, this is the entire h() function and a and c are the index + * preprocessed through q0 and q1 respectively; for longer keys they are the + * output of previous stages. j is the index of the first key byte to use. + * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 + * twice, doing the Pseudo-Hadamard Transform, and doing the necessary + * rotations. Its parameters are: a, the array to write the results into, + * j, the index of the first output entry, k and l, the preprocessed indices + * for index 2i, and m and n, the preprocessed indices for index 2i+1. + * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an + * additional lookup-and-XOR stage. The parameters a, b, c and d are the + * four bytes going into the last three stages. For 192-bit keys, c = d + * are the index preprocessed through q0, and a = b are the index + * preprocessed through q1; j is the index of the first key byte to use. + * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro + * instead of CALC_K_2. + * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an + * additional lookup-and-XOR stage. The parameters a and b are the index + * preprocessed through q0 and q1 respectively; j is the index of the first + * key byte to use. CALC_K256 is identical to CALC_K but for using the + * CALC_K256_2 macro instead of CALC_K_2. */ + +#define CALC_K_2(a, b, c, d, j) \ + mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \ + ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \ + ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \ + ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]] + +#define CALC_K(a, j, k, l, m, n) \ + x = CALC_K_2 (k, l, k, l, 0); \ + y = CALC_K_2 (m, n, m, n, 4); \ + y = rol32(y, 8); \ + x += y; y += x; ctx->a[j] = x; \ + ctx->a[(j) + 1] = rol32(y, 9) + +#define CALC_K192_2(a, b, c, d, j) \ + CALC_K_2 (q0[a ^ key[(j) + 16]], \ + q1[b ^ key[(j) + 17]], \ + q0[c ^ key[(j) + 18]], \ + q1[d ^ key[(j) + 19]], j) + +#define CALC_K192(a, j, k, l, m, n) \ + x = CALC_K192_2 (l, l, k, k, 0); \ + y = CALC_K192_2 (n, n, m, m, 4); \ + y = rol32(y, 8); \ + x += y; y += x; ctx->a[j] = x; \ + ctx->a[(j) + 1] = rol32(y, 9) + +#define CALC_K256_2(a, b, j) \ + CALC_K192_2 (q1[b ^ key[(j) + 24]], \ + q1[a ^ key[(j) + 25]], \ + q0[a ^ key[(j) + 26]], \ + q0[b ^ key[(j) + 27]], j) + +#define CALC_K256(a, j, k, l, m, n) \ + x = CALC_K256_2 (k, l, 0); \ + y = CALC_K256_2 (m, n, 4); \ + y = rol32(y, 8); \ + x += y; y += x; ctx->a[j] = x; \ + ctx->a[(j) + 1] = rol32(y, 9) + +/* Perform the key setup. */ +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len, u32 *flags) +{ + + struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); + + int i, j, k; + + /* Temporaries for CALC_K. */ + u32 x, y; + + /* The S vector used to key the S-boxes, split up into individual bytes. + * 128-bit keys use only sa through sh; 256-bit use all of them. */ + u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0; + u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; + + /* Temporary for CALC_S. */ + u8 tmp; + + /* Check key length. */ + if (key_len != 16 && key_len != 24 && key_len != 32) + { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; /* unsupported key length */ + } + + /* Compute the first two words of the S vector. The magic numbers are + * the entries of the RS matrix, preprocessed through poly_to_exp. The + * numbers in the comments are the original (polynomial form) matrix + * entries. */ + CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + + if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */ + /* Calculate the third word of the S vector */ + CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + } + + if (key_len == 32) { /* 256-bit key */ + /* Calculate the fourth word of the S vector */ + CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */ + CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */ + CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */ + CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */ + CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */ + CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */ + CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */ + CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */ + + /* Compute the S-boxes. */ + for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { + CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); + } + + /* Calculate whitening and round subkeys. The constants are + * indices of subkeys, preprocessed through q0 and q1. */ + CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3); + CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); + CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); + CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); + CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); + CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B); + CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); + CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); + CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); + CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD); + CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71); + CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); + CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F); + CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B); + CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA); + CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F); + CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); + CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); + CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00); + CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); + } else if (key_len == 24) { /* 192-bit key */ + /* Compute the S-boxes. */ + for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { + CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); + } + + /* Calculate whitening and round subkeys. The constants are + * indices of subkeys, preprocessed through q0 and q1. */ + CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3); + CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); + CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); + CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); + CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); + CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B); + CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); + CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); + CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); + CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD); + CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71); + CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); + CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F); + CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B); + CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA); + CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F); + CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); + CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); + CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00); + CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); + } else { /* 128-bit key */ + /* Compute the S-boxes. */ + for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { + CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); + } + + /* Calculate whitening and round subkeys. The constants are + * indices of subkeys, preprocessed through q0 and q1. */ + CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3); + CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); + CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B); + CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8); + CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3); + CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B); + CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D); + CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B); + CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32); + CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD); + CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71); + CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); + CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F); + CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B); + CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA); + CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F); + CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); + CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B); + CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00); + CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D); + } + + return 0; +} + +EXPORT_SYMBOL_GPL(twofish_setkey); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish cipher common functions"); diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h new file mode 100644 index 000000000000..e4328cfaaf64 --- /dev/null +++ b/include/crypto/twofish.h @@ -0,0 +1,23 @@ +#ifndef _CRYPTO_TWOFISH_H +#define _CRYPTO_TWOFISH_H + +#include + +#define TF_MIN_KEY_SIZE 16 +#define TF_MAX_KEY_SIZE 32 +#define TF_BLOCK_SIZE 16 + +struct crypto_tfm; + +/* Structure for an expanded Twofish key. s contains the key-dependent + * S-boxes composed with the MDS matrix; w contains the eight "whitening" + * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys. Note + * that k[i] corresponds to what the Twofish paper calls K[i+8]. */ +struct twofish_ctx { + u32 s[4][256], w[8], k[32]; +}; + +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len, u32 *flags); + +#endif From 758f570ea785a5fbcdca026dfab2e9e1a3f89726 Mon Sep 17 00:00:00 2001 From: Joachim Fritschi Date: Tue, 20 Jun 2006 20:39:29 +1000 Subject: [PATCH 0277/1063] [CRYPTO] twofish: Fix the priority This patch adds a proper driver name and priority to the generic c implemtation to allow coexistance of c and assembler modules. Signed-off-by: Joachim Fritschi Signed-off-by: Herbert Xu --- crypto/twofish.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/twofish.c b/crypto/twofish.c index e3b3a0a6cb4d..4979a2be48a9 100644 --- a/crypto/twofish.c +++ b/crypto/twofish.c @@ -181,6 +181,8 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static struct crypto_alg alg = { .cra_name = "twofish", + .cra_driver_name = "twofish-generic", + .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = TF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct twofish_ctx), From b9f535ffe38f7eb61ac2219d32d97c377b69f70d Mon Sep 17 00:00:00 2001 From: Joachim Fritschi Date: Tue, 20 Jun 2006 20:59:16 +1000 Subject: [PATCH 0278/1063] [CRYPTO] twofish: i586 assembly version The patch passed the trycpt tests and automated filesystem tests. This rewrite resulted in some nice perfomance increase over my last patch. Short summary of the tcrypt benchmarks: Twofish Assembler vs. Twofish C (256bit 8kb block CBC) encrypt: -33% Cycles decrypt: -45% Cycles Twofish Assembler vs. AES Assembler (128bit 8kb block CBC) encrypt: +3% Cycles decrypt: -22% Cycles Twofish Assembler vs. AES Assembler (256bit 8kb block CBC) encrypt: -20% Cycles decrypt: -36% Cycles Full Output: http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-i586.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-i586.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-i586.txt Here is another bonnie++ benchmark with encrypted filesystems. All runs with the twofish assembler modules max out the drivespeed. It should give some idea what the module can do for encrypted filesystem performance even though you can't see the full numbers. http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060611_205432_x86.html Signed-off-by: Joachim Fritschi Signed-off-by: Herbert Xu --- arch/i386/crypto/Makefile | 3 + arch/i386/crypto/twofish-i586-asm.S | 335 ++++++++++++++++++++++++++++ arch/i386/crypto/twofish.c | 97 ++++++++ crypto/Kconfig | 15 ++ 4 files changed, 450 insertions(+) create mode 100644 arch/i386/crypto/twofish-i586-asm.S create mode 100644 arch/i386/crypto/twofish.c diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile index 103c353d0a63..3fd19af18e34 100644 --- a/arch/i386/crypto/Makefile +++ b/arch/i386/crypto/Makefile @@ -5,5 +5,8 @@ # obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o aes-i586-y := aes-i586-asm.o aes.o +twofish-i586-y := twofish-i586-asm.o twofish.o + diff --git a/arch/i386/crypto/twofish-i586-asm.S b/arch/i386/crypto/twofish-i586-asm.S new file mode 100644 index 000000000000..39b98ed2c1b9 --- /dev/null +++ b/arch/i386/crypto/twofish-i586-asm.S @@ -0,0 +1,335 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include + +/* return adress at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define tfm 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +.align 4 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret + +twofish_dec_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret diff --git a/arch/i386/crypto/twofish.c b/arch/i386/crypto/twofish.c new file mode 100644 index 000000000000..e3004dfe9c7a --- /dev/null +++ b/arch/i386/crypto/twofish.c @@ -0,0 +1,97 @@ +/* + * Glue Code for optimized 586 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); +MODULE_ALIAS("twofish"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 5472f693e6ec..306738ceecb4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -150,6 +150,21 @@ config CRYPTO_TWOFISH_COMMON Common parts of the Twofish cipher algorithm shared by the generic c and the assembler implementations. +config CRYPTO_TWOFISH_586 + tristate "Twofish cipher algorithms (i586)" + depends on CRYPTO && ((X86 || UML_X86) && !64BIT) + select CRYPTO_TWOFISH_COMMON + help + Twofish cipher algorithm. + + Twofish was submitted as an AES (Advanced Encryption Standard) + candidate cipher by researchers at CounterPane Systems. It is a + 16 round block cipher supporting key sizes of 128, 192, and 256 + bits. + + See also: + + config CRYPTO_SERPENT tristate "Serpent cipher algorithm" depends on CRYPTO From eaf44088ff467410dd15a033fef118888002ffe6 Mon Sep 17 00:00:00 2001 From: Joachim Fritschi Date: Tue, 20 Jun 2006 21:12:02 +1000 Subject: [PATCH 0279/1063] [CRYPTO] twofish: x86-64 assembly version The patch passed the trycpt tests and automated filesystem tests. This rewrite resulted in some nice perfomance increase over my last patch. Short summary of the tcrypt benchmarks: Twofish Assembler vs. Twofish C (256bit 8kb block CBC) encrypt: -27% Cycles decrypt: -23% Cycles Twofish Assembler vs. AES Assembler (128bit 8kb block CBC) encrypt: +18% Cycles decrypt: +15% Cycles Twofish Assembler vs. AES Assembler (256bit 8kb block CBC) encrypt: -9% Cycles decrypt: -8% Cycles Full Output: http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-x86_64.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-x86_64.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-x86_64.txt Here is another bonnie++ benchmark with encrypted filesystems. Most runs maxed out the hd. It should give some idea what the module can do for encrypted filesystem performance even though you can't see the full numbers. http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060610_130806_x86_64.html Signed-off-by: Joachim Fritschi Signed-off-by: Herbert Xu --- arch/x86_64/crypto/Makefile | 3 + arch/x86_64/crypto/twofish-x86_64-asm.S | 324 ++++++++++++++++++++++++ arch/x86_64/crypto/twofish.c | 97 +++++++ crypto/Kconfig | 15 ++ 4 files changed, 439 insertions(+) create mode 100644 arch/x86_64/crypto/twofish-x86_64-asm.S create mode 100644 arch/x86_64/crypto/twofish.c diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile index 426d20f4b72e..15b538a8b7f7 100644 --- a/arch/x86_64/crypto/Makefile +++ b/arch/x86_64/crypto/Makefile @@ -5,5 +5,8 @@ # obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o aes-x86_64-y := aes-x86_64-asm.o aes.o +twofish-x86_64-y := twofish-x86_64-asm.o twofish.o + diff --git a/arch/x86_64/crypto/twofish-x86_64-asm.S b/arch/x86_64/crypto/twofish-x86_64-asm.S new file mode 100644 index 000000000000..35974a586615 --- /dev/null +++ b/arch/x86_64/crypto/twofish-x86_64-asm.S @@ -0,0 +1,324 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-x86_64-asm.S" +.text + +#include + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0 %rax +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1 %rbx +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2 %rcx +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3 %rdx +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D; + +/* + * a input register containing a(rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define encrypt_last_round(a,b,c,d,round)\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + xor a, %r10;\ + movzx b ## H, %edi;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define decrypt_last_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + xor a, %r10;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + xor s3(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + ror $1, d ## D; + +.align 8 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + pushq R1 + + /* %rdi contains the crypto tfm adress */ + /* %rsi contains the output adress */ + /* %rdx contains the input adress */ + add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ + /* ctx adress is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + input_whitening(R1,%r11,a_offset) + input_whitening(R3,%r11,c_offset) + mov R1D, R0D + rol $16, R0D + shr $32, R1 + mov R3D, R2D + shr $32, R3 + rol $1, R3D + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + + output_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + output_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movq $1,%rax + ret + +twofish_dec_blk: + pushq R1 + + /* %rdi contains the crypto tfm adress */ + /* %rsi contains the output adress */ + /* %rdx contains the input adress */ + add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */ + /* ctx adress is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + output_whitening(R1,%r11,a_offset) + output_whitening(R3,%r11,c_offset) + mov R1D, R0D + shr $32, R1 + rol $16, R1D + mov R3D, R2D + shr $32, R3 + rol $1, R2D + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + input_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movq $1,%rax + ret diff --git a/arch/x86_64/crypto/twofish.c b/arch/x86_64/crypto/twofish.c new file mode 100644 index 000000000000..182d91d5cfb9 --- /dev/null +++ b/arch/x86_64/crypto/twofish.c @@ -0,0 +1,97 @@ +/* + * Glue Code for optimized x86_64 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-x86_64", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized"); +MODULE_ALIAS("twofish"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 306738ceecb4..fa927a287a1d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -165,6 +165,21 @@ config CRYPTO_TWOFISH_586 See also: +config CRYPTO_TWOFISH_X86_64 + tristate "Twofish cipher algorithm (x86_64)" + depends on CRYPTO && ((X86 || UML_X86) && 64BIT) + select CRYPTO_TWOFISH_COMMON + help + Twofish cipher algorithm (x86_64). + + Twofish was submitted as an AES (Advanced Encryption Standard) + candidate cipher by researchers at CounterPane Systems. It is a + 16 round block cipher supporting key sizes of 128, 192, and 256 + bits. + + See also: + + config CRYPTO_SERPENT tristate "Serpent cipher algorithm" depends on CRYPTO From 72fa491912689ca69dd15f4266945d2c2f2819f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 May 2006 09:05:24 +1000 Subject: [PATCH 0280/1063] [CRYPTO] api: Rename crypto_alg_get to crypto_mod_get The functions crypto_alg_get and crypto_alg_put operates on the crypto modules rather than the algorithms. Therefore it makes sense to call them crypto_mod_get and crypto_alg_put respectively. This is needed because we need to have real algorithm reference counters for parameterised algorithms as they can be unregistered from below by when their parameter algorithms are themselves unregistered. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index c11ec1fd4f18..8c2743a05f90 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -29,12 +29,12 @@ LIST_HEAD(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); -static inline int crypto_alg_get(struct crypto_alg *alg) +static inline int crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module); } -static inline void crypto_alg_put(struct crypto_alg *alg) +static inline void crypto_mod_put(struct crypto_alg *alg) { module_put(alg->cra_module); } @@ -57,12 +57,12 @@ static struct crypto_alg *crypto_alg_lookup(const char *name) if (!exact && !(fuzzy && q->cra_priority > best)) continue; - if (unlikely(!crypto_alg_get(q))) + if (unlikely(!crypto_mod_get(q))) continue; best = q->cra_priority; if (alg) - crypto_alg_put(alg); + crypto_mod_put(alg); alg = q; if (exact) @@ -202,7 +202,7 @@ out_free_tfm: kfree(tfm); tfm = NULL; out_put: - crypto_alg_put(alg); + crypto_mod_put(alg); out: return tfm; } @@ -221,7 +221,7 @@ void crypto_free_tfm(struct crypto_tfm *tfm) if (alg->cra_exit) alg->cra_exit(tfm); crypto_exit_ops(tfm); - crypto_alg_put(alg); + crypto_mod_put(alg); memset(tfm, 0, size); kfree(tfm); } @@ -305,7 +305,7 @@ int crypto_alg_available(const char *name, u32 flags) struct crypto_alg *alg = crypto_alg_mod_lookup(name); if (alg) { - crypto_alg_put(alg); + crypto_mod_put(alg); ret = 1; } From 6521f30273fbec65146a0f16de74b7b402b0f7b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 20:28:44 +1000 Subject: [PATCH 0281/1063] [CRYPTO] api: Add crypto_alg reference counting Up until now we've relied on module reference counting to ensure that the crypto_alg structures don't disappear from under us. This was good enough as long as each crypto_alg came from exactly one module. However, with parameterised crypto algorithms a crypto_alg object may need two or more modules to operate. This means that we need to count the references to the crypto_alg object directly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 ++++++++++++++++++++++++++------ crypto/proc.c | 3 +++ include/linux/crypto.h | 3 +++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 8c2743a05f90..5994a58ef954 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -29,13 +29,26 @@ LIST_HEAD(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); -static inline int crypto_mod_get(struct crypto_alg *alg) +static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { - return try_module_get(alg->cra_module); + atomic_inc(&alg->cra_refcnt); + return alg; } -static inline void crypto_mod_put(struct crypto_alg *alg) +static inline void crypto_alg_put(struct crypto_alg *alg) { + if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) + alg->cra_destroy(alg); +} + +static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) +{ + return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; +} + +static void crypto_mod_put(struct crypto_alg *alg) +{ + crypto_alg_put(alg); module_put(alg->cra_module); } @@ -274,6 +287,7 @@ int crypto_register_alg(struct crypto_alg *alg) } list_add(&alg->cra_list, &crypto_alg_list); + atomic_set(&alg->cra_refcnt, 1); out: up_write(&crypto_alg_sem); return ret; @@ -284,8 +298,6 @@ int crypto_unregister_alg(struct crypto_alg *alg) int ret = -ENOENT; struct crypto_alg *q; - BUG_ON(!alg->cra_module); - down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (alg == q) { @@ -296,7 +308,15 @@ int crypto_unregister_alg(struct crypto_alg *alg) } out: up_write(&crypto_alg_sem); - return ret; + + if (ret) + return ret; + + BUG_ON(atomic_read(&alg->cra_refcnt) != 1); + if (alg->cra_destroy) + alg->cra_destroy(alg); + + return 0; } int crypto_alg_available(const char *name, u32 flags) diff --git a/crypto/proc.c b/crypto/proc.c index c0a5dd7ce2cc..8543b7a157d6 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -12,6 +12,8 @@ * any later version. * */ + +#include #include #include #include @@ -54,6 +56,7 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "driver : %s\n", alg->cra_driver_name); seq_printf(m, "module : %s\n", module_name(alg->cra_module)); seq_printf(m, "priority : %d\n", alg->cra_priority); + seq_printf(m, "refcnt : %d\n", atomic_read(&alg->cra_refcnt)); switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cb1e6631b132..7f57ff8ec975 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -17,6 +17,7 @@ #ifndef _LINUX_CRYPTO_H #define _LINUX_CRYPTO_H +#include #include #include #include @@ -148,6 +149,7 @@ struct crypto_alg { unsigned int cra_alignmask; int cra_priority; + atomic_t cra_refcnt; char cra_name[CRYPTO_MAX_ALG_NAME]; char cra_driver_name[CRYPTO_MAX_ALG_NAME]; @@ -160,6 +162,7 @@ struct crypto_alg { int (*cra_init)(struct crypto_tfm *tfm); void (*cra_exit)(struct crypto_tfm *tfm); + void (*cra_destroy)(struct crypto_alg *alg); struct module *cra_module; }; From 9409f38a0c8773c04bff8dda8c552d7ea013d956 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 19:49:12 +1000 Subject: [PATCH 0282/1063] [IPSEC]: Move linux/crypto.h inclusion out of net/xfrm.h The header file linux/crypto.h is only needed by a few files so including it in net/xfrm.h (which is included by half of the networking stack) is a waste. This patch moves it out of net/xfrm.h and into the specific header files that actually need it. Signed-off-by: Herbert Xu --- include/net/ah.h | 1 + include/net/esp.h | 1 + include/net/ipcomp.h | 4 ++++ include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/ah.h b/include/net/ah.h index ceff00afae09..8e27c9ba8b84 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -1,6 +1,7 @@ #ifndef _NET_AH_H #define _NET_AH_H +#include #include /* This is the maximum truncated ICV length that we know of. */ diff --git a/include/net/esp.h b/include/net/esp.h index 90cd94fad7d9..6eb837973c84 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -1,6 +1,7 @@ #ifndef _NET_ESP_H #define _NET_ESP_H +#include #include #include diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index e651a57ecdd5..b94e3047b4d9 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,8 +1,12 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include + #define IPCOMP_SCRATCH_SIZE 65400 +struct crypto_tfm; + struct ipcomp_data { u16 threshold; struct crypto_tfm **tfms; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c5ee9f20b65..10396b4bde14 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -985,6 +984,7 @@ extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe); struct crypto_tfm; +struct scatterlist; typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int); extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3e6a722d072e..7d18ca03c80d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include From cce9e06d100df19a327b19f23adad76e7bf63edd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:08:13 +1000 Subject: [PATCH 0283/1063] [CRYPTO] api: Split out low-level API The crypto API is made up of the part facing users such as IPsec and the low-level part which is used by cryptographic entities such as algorithms. This patch splits out the latter so that the two APIs are more clearly delineated. As a bonus the low-level API can now be modularised if all algorithms are built as modules. Signed-off-by: Herbert Xu --- crypto/Kconfig | 84 ++++++++++++++++------------ crypto/Makefile | 7 ++- crypto/algapi.c | 118 ++++++++++++++++++++++++++++++++++++++++ crypto/api.c | 97 +-------------------------------- crypto/internal.h | 6 +- crypto/proc.c | 5 ++ drivers/crypto/Kconfig | 3 +- include/crypto/algapi.h | 18 ++++++ 8 files changed, 204 insertions(+), 134 deletions(-) create mode 100644 crypto/algapi.c create mode 100644 include/crypto/algapi.h diff --git a/crypto/Kconfig b/crypto/Kconfig index fa927a287a1d..aabc63195222 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -9,47 +9,54 @@ config CRYPTO help This option provides the core Cryptographic API. +if CRYPTO + +config CRYPTO_ALGAPI + tristate + help + This option provides the API for cryptographic algorithms. + config CRYPTO_HMAC bool "HMAC support" - depends on CRYPTO help HMAC: Keyed-Hashing for Message Authentication (RFC2104). This is required for IPSec. config CRYPTO_NULL tristate "Null algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help These are 'Null' algorithms, used by IPsec, which do nothing. config CRYPTO_MD4 tristate "MD4 digest algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help MD4 message digest algorithm (RFC1320). config CRYPTO_MD5 tristate "MD5 digest algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help MD5 message digest algorithm (RFC1321). config CRYPTO_SHA1 tristate "SHA1 digest algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). config CRYPTO_SHA1_S390 tristate "SHA1 digest algorithm (s390)" - depends on CRYPTO && S390 + depends on S390 + select CRYPTO_ALGAPI help This is the s390 hardware accelerated implementation of the SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). config CRYPTO_SHA256 tristate "SHA256 digest algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help SHA256 secure hash standard (DFIPS 180-2). @@ -58,7 +65,8 @@ config CRYPTO_SHA256 config CRYPTO_SHA256_S390 tristate "SHA256 digest algorithm (s390)" - depends on CRYPTO && S390 + depends on S390 + select CRYPTO_ALGAPI help This is the s390 hardware accelerated implementation of the SHA256 secure hash standard (DFIPS 180-2). @@ -68,7 +76,7 @@ config CRYPTO_SHA256_S390 config CRYPTO_SHA512 tristate "SHA384 and SHA512 digest algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help SHA512 secure hash standard (DFIPS 180-2). @@ -80,7 +88,7 @@ config CRYPTO_SHA512 config CRYPTO_WP512 tristate "Whirlpool digest algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help Whirlpool hash algorithm 512, 384 and 256-bit hashes @@ -92,7 +100,7 @@ config CRYPTO_WP512 config CRYPTO_TGR192 tristate "Tiger digest algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help Tiger hash algorithm 192, 160 and 128-bit hashes @@ -105,19 +113,20 @@ config CRYPTO_TGR192 config CRYPTO_DES tristate "DES and Triple DES EDE cipher algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). config CRYPTO_DES_S390 tristate "DES and Triple DES cipher algorithms (s390)" - depends on CRYPTO && S390 + depends on S390 + select CRYPTO_ALGAPI help DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). config CRYPTO_BLOWFISH tristate "Blowfish cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help Blowfish cipher algorithm, by Bruce Schneier. @@ -130,7 +139,7 @@ config CRYPTO_BLOWFISH config CRYPTO_TWOFISH tristate "Twofish cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON help Twofish cipher algorithm. @@ -145,14 +154,14 @@ config CRYPTO_TWOFISH config CRYPTO_TWOFISH_COMMON tristate - depends on CRYPTO help Common parts of the Twofish cipher algorithm shared by the generic c and the assembler implementations. config CRYPTO_TWOFISH_586 tristate "Twofish cipher algorithms (i586)" - depends on CRYPTO && ((X86 || UML_X86) && !64BIT) + depends on (X86 || UML_X86) && !64BIT + select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON help Twofish cipher algorithm. @@ -167,7 +176,8 @@ config CRYPTO_TWOFISH_586 config CRYPTO_TWOFISH_X86_64 tristate "Twofish cipher algorithm (x86_64)" - depends on CRYPTO && ((X86 || UML_X86) && 64BIT) + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI select CRYPTO_TWOFISH_COMMON help Twofish cipher algorithm (x86_64). @@ -182,7 +192,7 @@ config CRYPTO_TWOFISH_X86_64 config CRYPTO_SERPENT tristate "Serpent cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help Serpent cipher algorithm, by Anderson, Biham & Knudsen. @@ -195,7 +205,7 @@ config CRYPTO_SERPENT config CRYPTO_AES tristate "AES cipher algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help AES cipher algorithms (FIPS-197). AES uses the Rijndael algorithm. @@ -215,7 +225,8 @@ config CRYPTO_AES config CRYPTO_AES_586 tristate "AES cipher algorithms (i586)" - depends on CRYPTO && ((X86 || UML_X86) && !64BIT) + depends on (X86 || UML_X86) && !64BIT + select CRYPTO_ALGAPI help AES cipher algorithms (FIPS-197). AES uses the Rijndael algorithm. @@ -235,7 +246,8 @@ config CRYPTO_AES_586 config CRYPTO_AES_X86_64 tristate "AES cipher algorithms (x86_64)" - depends on CRYPTO && ((X86 || UML_X86) && 64BIT) + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_ALGAPI help AES cipher algorithms (FIPS-197). AES uses the Rijndael algorithm. @@ -255,7 +267,8 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_S390 tristate "AES cipher algorithms (s390)" - depends on CRYPTO && S390 + depends on S390 + select CRYPTO_ALGAPI help This is the s390 hardware accelerated implementation of the AES cipher algorithms (FIPS-197). AES uses the Rijndael @@ -275,21 +288,21 @@ config CRYPTO_AES_S390 config CRYPTO_CAST5 tristate "CAST5 (CAST-128) cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help The CAST5 encryption algorithm (synonymous with CAST-128) is described in RFC2144. config CRYPTO_CAST6 tristate "CAST6 (CAST-256) cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help The CAST6 encryption algorithm (synonymous with CAST-256) is described in RFC2612. config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" - depends on CRYPTO + select CRYPTO_ALGAPI help TEA cipher algorithm. @@ -306,7 +319,7 @@ config CRYPTO_TEA config CRYPTO_ARC4 tristate "ARC4 cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help ARC4 cipher algorithm. @@ -317,7 +330,7 @@ config CRYPTO_ARC4 config CRYPTO_KHAZAD tristate "Khazad cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help Khazad cipher algorithm. @@ -330,7 +343,7 @@ config CRYPTO_KHAZAD config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help Anubis cipher algorithm. @@ -345,7 +358,7 @@ config CRYPTO_ANUBIS config CRYPTO_DEFLATE tristate "Deflate compression algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI select ZLIB_INFLATE select ZLIB_DEFLATE help @@ -356,7 +369,7 @@ config CRYPTO_DEFLATE config CRYPTO_MICHAEL_MIC tristate "Michael MIC keyed digest algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI help Michael MIC is used for message integrity protection in TKIP (IEEE 802.11i). This algorithm is required for TKIP, but it @@ -365,7 +378,7 @@ config CRYPTO_MICHAEL_MIC config CRYPTO_CRC32C tristate "CRC32c CRC algorithm" - depends on CRYPTO + select CRYPTO_ALGAPI select LIBCRC32C help Castagnoli, et al Cyclic Redundancy-Check Algorithm. Used @@ -375,10 +388,13 @@ config CRYPTO_CRC32C config CRYPTO_TEST tristate "Testing module" - depends on CRYPTO && m + depends on m + select CRYPTO_ALGAPI help Quick & dirty crypto test module. source "drivers/crypto/Kconfig" -endmenu +endif # if CRYPTO + +endmenu diff --git a/crypto/Makefile b/crypto/Makefile index fe934f1001c6..6d51f80753a1 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -2,10 +2,11 @@ # Cryptographic API # -proc-crypto-$(CONFIG_PROC_FS) = proc.o +obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o -obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o \ - $(proc-crypto-y) +crypto_algapi-$(CONFIG_PROC_FS) += proc.o +crypto_algapi-objs := algapi.o $(crypto_algapi-y) +obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o diff --git a/crypto/algapi.c b/crypto/algapi.c new file mode 100644 index 000000000000..a65c6ccfbe17 --- /dev/null +++ b/crypto/algapi.c @@ -0,0 +1,118 @@ +/* + * Cryptographic API for algorithms (i.e., low-level API). + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include + +#include "internal.h" + +static inline int crypto_set_driver_name(struct crypto_alg *alg) +{ + static const char suffix[] = "-generic"; + char *driver_name = alg->cra_driver_name; + int len; + + if (*driver_name) + return 0; + + len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); + if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + memcpy(driver_name + len, suffix, sizeof(suffix)); + return 0; +} + +int crypto_register_alg(struct crypto_alg *alg) +{ + int ret; + struct crypto_alg *q; + + if (alg->cra_alignmask & (alg->cra_alignmask + 1)) + return -EINVAL; + + if (alg->cra_alignmask & alg->cra_blocksize) + return -EINVAL; + + if (alg->cra_blocksize > PAGE_SIZE / 8) + return -EINVAL; + + if (alg->cra_priority < 0) + return -EINVAL; + + ret = crypto_set_driver_name(alg); + if (unlikely(ret)) + return ret; + + down_write(&crypto_alg_sem); + + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (q == alg) { + ret = -EEXIST; + goto out; + } + } + + list_add(&alg->cra_list, &crypto_alg_list); + atomic_set(&alg->cra_refcnt, 1); +out: + up_write(&crypto_alg_sem); + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_alg); + +int crypto_unregister_alg(struct crypto_alg *alg) +{ + int ret = -ENOENT; + struct crypto_alg *q; + + down_write(&crypto_alg_sem); + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (alg == q) { + list_del(&alg->cra_list); + ret = 0; + goto out; + } + } +out: + up_write(&crypto_alg_sem); + + if (ret) + return ret; + + BUG_ON(atomic_read(&alg->cra_refcnt) != 1); + if (alg->cra_destroy) + alg->cra_destroy(alg); + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_unregister_alg); + +static int __init crypto_algapi_init(void) +{ + crypto_init_proc(); + return 0; +} + +static void __exit crypto_algapi_exit(void) +{ + crypto_exit_proc(); +} + +module_init(crypto_algapi_init); +module_exit(crypto_algapi_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Cryptographic algorithms API"); diff --git a/crypto/api.c b/crypto/api.c index 5994a58ef954..c922090b4842 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -15,19 +15,17 @@ * */ -#include -#include -#include #include #include #include -#include #include #include #include "internal.h" LIST_HEAD(crypto_alg_list); +EXPORT_SYMBOL_GPL(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); +EXPORT_SYMBOL_GPL(crypto_alg_sem); static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { @@ -239,86 +237,6 @@ void crypto_free_tfm(struct crypto_tfm *tfm) kfree(tfm); } -static inline int crypto_set_driver_name(struct crypto_alg *alg) -{ - static const char suffix[] = "-generic"; - char *driver_name = alg->cra_driver_name; - int len; - - if (*driver_name) - return 0; - - len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); - if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - memcpy(driver_name + len, suffix, sizeof(suffix)); - return 0; -} - -int crypto_register_alg(struct crypto_alg *alg) -{ - int ret; - struct crypto_alg *q; - - if (alg->cra_alignmask & (alg->cra_alignmask + 1)) - return -EINVAL; - - if (alg->cra_alignmask & alg->cra_blocksize) - return -EINVAL; - - if (alg->cra_blocksize > PAGE_SIZE / 8) - return -EINVAL; - - if (alg->cra_priority < 0) - return -EINVAL; - - ret = crypto_set_driver_name(alg); - if (unlikely(ret)) - return ret; - - down_write(&crypto_alg_sem); - - list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (q == alg) { - ret = -EEXIST; - goto out; - } - } - - list_add(&alg->cra_list, &crypto_alg_list); - atomic_set(&alg->cra_refcnt, 1); -out: - up_write(&crypto_alg_sem); - return ret; -} - -int crypto_unregister_alg(struct crypto_alg *alg) -{ - int ret = -ENOENT; - struct crypto_alg *q; - - down_write(&crypto_alg_sem); - list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (alg == q) { - list_del(&alg->cra_list); - ret = 0; - goto out; - } - } -out: - up_write(&crypto_alg_sem); - - if (ret) - return ret; - - BUG_ON(atomic_read(&alg->cra_refcnt) != 1); - if (alg->cra_destroy) - alg->cra_destroy(alg); - - return 0; -} - int crypto_alg_available(const char *name, u32 flags) { int ret = 0; @@ -332,17 +250,6 @@ int crypto_alg_available(const char *name, u32 flags) return ret; } -static int __init init_crypto(void) -{ - printk(KERN_INFO "Initializing Cryptographic API\n"); - crypto_init_proc(); - return 0; -} - -__initcall(init_crypto); - -EXPORT_SYMBOL_GPL(crypto_register_alg); -EXPORT_SYMBOL_GPL(crypto_unregister_alg); EXPORT_SYMBOL_GPL(crypto_alloc_tfm); EXPORT_SYMBOL_GPL(crypto_free_tfm); EXPORT_SYMBOL_GPL(crypto_alg_available); diff --git a/crypto/internal.h b/crypto/internal.h index 959e602909a6..26f47d331551 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -12,7 +12,8 @@ */ #ifndef _CRYPTO_INTERNAL_H #define _CRYPTO_INTERNAL_H -#include + +#include #include #include #include @@ -64,9 +65,12 @@ static inline void crypto_free_hmac_block(struct crypto_tfm *tfm) #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); +void __exit crypto_exit_proc(void); #else static inline void crypto_init_proc(void) { } +static inline void crypto_exit_proc(void) +{ } #endif static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, diff --git a/crypto/proc.c b/crypto/proc.c index 8543b7a157d6..9e573b17e887 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -113,3 +113,8 @@ void __init crypto_init_proc(void) if (proc) proc->proc_fops = &proc_crypto_ops; } + +void __exit crypto_exit_proc(void) +{ + remove_proc_entry("crypto", NULL); +} diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 4263935443cc..ba23683ab8c4 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -2,7 +2,8 @@ menu "Hardware crypto devices" config CRYPTO_DEV_PADLOCK tristate "Support for VIA PadLock ACE" - depends on CRYPTO && X86_32 + depends on X86_32 + select CRYPTO_ALGAPI help Some VIA processors come with an integrated crypto engine (so called VIA PadLock ACE, Advanced Cryptography Engine) diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h new file mode 100644 index 000000000000..ed68d494b364 --- /dev/null +++ b/include/crypto/algapi.h @@ -0,0 +1,18 @@ +/* + * Cryptographic API for algorithms (i.e., low-level API). + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ALGAPI_H +#define _CRYPTO_ALGAPI_H + +#include + +#endif /* _CRYPTO_ALGAPI_H */ + From 4cc7720cd165273b08a72b4193146dffee58e34b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 21:16:34 +1000 Subject: [PATCH 0284/1063] [CRYPTO] api: Add template registration A crypto_template generates a crypto_alg object when given a set of parameters. this patch adds the basic data structure fo templates and code to handle their registration/deregistration. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 156 ++++++++++++++++++++++++++++++++++------ crypto/internal.h | 17 +++++ include/crypto/algapi.h | 31 ++++++++ 3 files changed, 182 insertions(+), 22 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index a65c6ccfbe17..232b37d81613 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -13,11 +13,14 @@ #include #include #include +#include #include #include #include "internal.h" +static LIST_HEAD(crypto_template_list); + static inline int crypto_set_driver_name(struct crypto_alg *alg) { static const char suffix[] = "-generic"; @@ -35,11 +38,8 @@ static inline int crypto_set_driver_name(struct crypto_alg *alg) return 0; } -int crypto_register_alg(struct crypto_alg *alg) +static int crypto_check_alg(struct crypto_alg *alg) { - int ret; - struct crypto_alg *q; - if (alg->cra_alignmask & (alg->cra_alignmask + 1)) return -EINVAL; @@ -51,42 +51,52 @@ int crypto_register_alg(struct crypto_alg *alg) if (alg->cra_priority < 0) return -EINVAL; - - ret = crypto_set_driver_name(alg); - if (unlikely(ret)) - return ret; - down_write(&crypto_alg_sem); - + return crypto_set_driver_name(alg); +} + +static int __crypto_register_alg(struct crypto_alg *alg) +{ + struct crypto_alg *q; + int ret = -EEXIST; + list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (q == alg) { - ret = -EEXIST; + if (q == alg) goto out; - } } list_add(&alg->cra_list, &crypto_alg_list); atomic_set(&alg->cra_refcnt, 1); + ret = 0; out: - up_write(&crypto_alg_sem); return ret; } + +int crypto_register_alg(struct crypto_alg *alg) +{ + int err; + + err = crypto_check_alg(alg); + if (err) + return err; + + down_write(&crypto_alg_sem); + err = __crypto_register_alg(alg); + up_write(&crypto_alg_sem); + + return err; +} EXPORT_SYMBOL_GPL(crypto_register_alg); int crypto_unregister_alg(struct crypto_alg *alg) { int ret = -ENOENT; - struct crypto_alg *q; down_write(&crypto_alg_sem); - list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (alg == q) { - list_del(&alg->cra_list); - ret = 0; - goto out; - } + if (likely(!list_empty(&alg->cra_list))) { + list_del_init(&alg->cra_list); + ret = 0; } -out: up_write(&crypto_alg_sem); if (ret) @@ -100,6 +110,108 @@ out: } EXPORT_SYMBOL_GPL(crypto_unregister_alg); +int crypto_register_template(struct crypto_template *tmpl) +{ + struct crypto_template *q; + int err = -EEXIST; + + down_write(&crypto_alg_sem); + + list_for_each_entry(q, &crypto_template_list, list) { + if (q == tmpl) + goto out; + } + + list_add(&tmpl->list, &crypto_template_list); + err = 0; +out: + up_write(&crypto_alg_sem); + return err; +} +EXPORT_SYMBOL_GPL(crypto_register_template); + +void crypto_unregister_template(struct crypto_template *tmpl) +{ + struct crypto_instance *inst; + struct hlist_node *p, *n; + struct hlist_head *list; + + down_write(&crypto_alg_sem); + + BUG_ON(list_empty(&tmpl->list)); + list_del_init(&tmpl->list); + + list = &tmpl->instances; + hlist_for_each_entry(inst, p, list, list) { + BUG_ON(list_empty(&inst->alg.cra_list)); + list_del_init(&inst->alg.cra_list); + } + + up_write(&crypto_alg_sem); + + hlist_for_each_entry_safe(inst, p, n, list, list) { + BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1); + tmpl->free(inst); + } +} +EXPORT_SYMBOL_GPL(crypto_unregister_template); + +static struct crypto_template *__crypto_lookup_template(const char *name) +{ + struct crypto_template *q, *tmpl = NULL; + + down_read(&crypto_alg_sem); + list_for_each_entry(q, &crypto_template_list, list) { + if (strcmp(q->name, name)) + continue; + if (unlikely(!crypto_tmpl_get(q))) + continue; + + tmpl = q; + break; + } + up_read(&crypto_alg_sem); + + return tmpl; +} + +struct crypto_template *crypto_lookup_template(const char *name) +{ + return try_then_request_module(__crypto_lookup_template(name), name); +} +EXPORT_SYMBOL_GPL(crypto_lookup_template); + +int crypto_register_instance(struct crypto_template *tmpl, + struct crypto_instance *inst) +{ + int err = -EINVAL; + + if (inst->alg.cra_destroy) + goto err; + + err = crypto_check_alg(&inst->alg); + if (err) + goto err; + + inst->alg.cra_module = tmpl->module; + + down_write(&crypto_alg_sem); + + err = __crypto_register_alg(&inst->alg); + if (err) + goto unlock; + + hlist_add_head(&inst->list, &tmpl->instances); + inst->tmpl = tmpl; + +unlock: + up_write(&crypto_alg_sem); + +err: + return err; +} +EXPORT_SYMBOL_GPL(crypto_register_instance); + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/internal.h b/crypto/internal.h index 26f47d331551..c3ab4a950f30 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -19,11 +19,15 @@ #include #include #include +#include #include #include #include #include +struct crypto_instance; +struct crypto_template; + extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; @@ -112,5 +116,18 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm); void crypto_exit_cipher_ops(struct crypto_tfm *tfm); void crypto_exit_compress_ops(struct crypto_tfm *tfm); +int crypto_register_instance(struct crypto_template *tmpl, + struct crypto_instance *inst); + +static inline int crypto_tmpl_get(struct crypto_template *tmpl) +{ + return try_module_get(tmpl->module); +} + +static inline void crypto_tmpl_put(struct crypto_template *tmpl) +{ + module_put(tmpl->module); +} + #endif /* _CRYPTO_INTERNAL_H */ diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index ed68d494b364..ffec530d52fb 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -14,5 +14,36 @@ #include +struct module; + +struct crypto_instance { + struct crypto_alg alg; + + struct crypto_template *tmpl; + struct hlist_node list; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +struct crypto_template { + struct list_head list; + struct hlist_head instances; + struct module *module; + + struct crypto_instance *(*alloc)(void *param, unsigned int len); + void (*free)(struct crypto_instance *inst); + + char name[CRYPTO_MAX_ALG_NAME]; +}; + +int crypto_register_template(struct crypto_template *tmpl); +void crypto_unregister_template(struct crypto_template *tmpl); +struct crypto_template *crypto_lookup_template(const char *name); + +static inline void *crypto_instance_ctx(struct crypto_instance *inst) +{ + return inst->__ctx; +} + #endif /* _CRYPTO_ALGAPI_H */ From 2825982d9d66ebba4b532a07391dfbb357f71c5f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 21:23:26 +1000 Subject: [PATCH 0285/1063] [CRYPTO] api: Added event notification This patch adds a notifier chain for algorithm/template registration events. This will be used to register compound algorithms such as cbc(aes). In future this will also be passed onto user-space through netlink. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 50 ++++++++++++++++- crypto/api.c | 122 +++++++++++++++++++++++++++++++++++++---- crypto/internal.h | 37 +++++++++++++ include/linux/crypto.h | 4 +- 4 files changed, 199 insertions(+), 14 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 232b37d81613..f0df85fc1f50 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -21,6 +21,24 @@ static LIST_HEAD(crypto_template_list); +void crypto_larval_error(const char *name) +{ + struct crypto_alg *alg; + + down_read(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); + up_read(&crypto_alg_sem); + + if (alg) { + if (crypto_is_larval(alg)) { + struct crypto_larval *larval = (void *)alg; + complete(&larval->completion); + } + crypto_mod_put(alg); + } +} +EXPORT_SYMBOL_GPL(crypto_larval_error); + static inline int crypto_set_driver_name(struct crypto_alg *alg) { static const char suffix[] = "-generic"; @@ -60,14 +78,27 @@ static int __crypto_register_alg(struct crypto_alg *alg) struct crypto_alg *q; int ret = -EEXIST; + atomic_set(&alg->cra_refcnt, 1); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) goto out; + if (crypto_is_larval(q) && + (!strcmp(alg->cra_name, q->cra_name) || + !strcmp(alg->cra_driver_name, q->cra_name))) { + struct crypto_larval *larval = (void *)q; + + if (!crypto_mod_get(alg)) + continue; + larval->adult = alg; + complete(&larval->completion); + } } list_add(&alg->cra_list, &crypto_alg_list); - atomic_set(&alg->cra_refcnt, 1); + + crypto_notify(CRYPTO_MSG_ALG_REGISTER, alg); ret = 0; + out: return ret; } @@ -97,6 +128,7 @@ int crypto_unregister_alg(struct crypto_alg *alg) list_del_init(&alg->cra_list); ret = 0; } + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); up_write(&crypto_alg_sem); if (ret) @@ -123,6 +155,7 @@ int crypto_register_template(struct crypto_template *tmpl) } list_add(&tmpl->list, &crypto_template_list); + crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl); err = 0; out: up_write(&crypto_alg_sem); @@ -145,8 +178,11 @@ void crypto_unregister_template(struct crypto_template *tmpl) hlist_for_each_entry(inst, p, list, list) { BUG_ON(list_empty(&inst->alg.cra_list)); list_del_init(&inst->alg.cra_list); + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); } + crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl); + up_write(&crypto_alg_sem); hlist_for_each_entry_safe(inst, p, n, list, list) { @@ -212,6 +248,18 @@ err: } EXPORT_SYMBOL_GPL(crypto_register_instance); +int crypto_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&crypto_chain, nb); +} +EXPORT_SYMBOL_GPL(crypto_register_notifier); + +int crypto_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&crypto_chain, nb); +} +EXPORT_SYMBOL_GPL(crypto_unregister_notifier); + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/api.c b/crypto/api.c index c922090b4842..5a0d6a17cfd7 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -27,6 +28,9 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); EXPORT_SYMBOL_GPL(crypto_alg_sem); +BLOCKING_NOTIFIER_HEAD(crypto_chain); +EXPORT_SYMBOL_GPL(crypto_chain); + static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { atomic_inc(&alg->cra_refcnt); @@ -39,27 +43,24 @@ static inline void crypto_alg_put(struct crypto_alg *alg) alg->cra_destroy(alg); } -static struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) +struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; } +EXPORT_SYMBOL_GPL(crypto_mod_get); -static void crypto_mod_put(struct crypto_alg *alg) +void crypto_mod_put(struct crypto_alg *alg) { crypto_alg_put(alg); module_put(alg->cra_module); } +EXPORT_SYMBOL_GPL(crypto_mod_put); -static struct crypto_alg *crypto_alg_lookup(const char *name) +struct crypto_alg *__crypto_alg_lookup(const char *name) { struct crypto_alg *q, *alg = NULL; - int best = -1; + int best = -2; - if (!name) - return NULL; - - down_read(&crypto_alg_sem); - list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; @@ -79,16 +80,113 @@ static struct crypto_alg *crypto_alg_lookup(const char *name) if (exact) break; } - + + return alg; +} +EXPORT_SYMBOL_GPL(__crypto_alg_lookup); + +static void crypto_larval_destroy(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + BUG_ON(!crypto_is_larval(alg)); + if (larval->adult) + crypto_mod_put(larval->adult); + kfree(larval); +} + +static struct crypto_alg *crypto_larval_alloc(const char *name) +{ + struct crypto_alg *alg; + struct crypto_larval *larval; + + larval = kzalloc(sizeof(*larval), GFP_KERNEL); + if (!larval) + return NULL; + + larval->alg.cra_flags = CRYPTO_ALG_LARVAL; + larval->alg.cra_priority = -1; + larval->alg.cra_destroy = crypto_larval_destroy; + + atomic_set(&larval->alg.cra_refcnt, 2); + strlcpy(larval->alg.cra_name, name, CRYPTO_MAX_ALG_NAME); + init_completion(&larval->completion); + + down_write(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); + if (!alg) { + alg = &larval->alg; + list_add(&alg->cra_list, &crypto_alg_list); + } + up_write(&crypto_alg_sem); + + if (alg != &larval->alg) + kfree(larval); + + return alg; +} + +static void crypto_larval_kill(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + down_write(&crypto_alg_sem); + list_del(&alg->cra_list); + up_write(&crypto_alg_sem); + complete(&larval->completion); + crypto_alg_put(alg); +} + +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) +{ + struct crypto_larval *larval = (void *)alg; + + wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ); + alg = larval->adult; + if (alg && !crypto_mod_get(alg)) + alg = NULL; + crypto_mod_put(&larval->alg); + + return alg; +} + +static struct crypto_alg *crypto_alg_lookup(const char *name) +{ + struct crypto_alg *alg; + + if (!name) + return NULL; + + down_read(&crypto_alg_sem); + alg = __crypto_alg_lookup(name); up_read(&crypto_alg_sem); + return alg; } /* A far more intelligent version of this is planned. For now, just * try an exact match on the name of the algorithm. */ -static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) +static struct crypto_alg *crypto_alg_mod_lookup(const char *name) { - return try_then_request_module(crypto_alg_lookup(name), name); + struct crypto_alg *alg; + struct crypto_alg *larval; + + alg = try_then_request_module(crypto_alg_lookup(name), name); + if (alg) + return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; + + larval = crypto_larval_alloc(name); + if (!larval || !crypto_is_larval(larval)) + return larval; + + if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP) + alg = crypto_larval_wait(larval); + else { + crypto_mod_put(larval); + alg = NULL; + } + crypto_larval_kill(larval); + return alg; } static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) diff --git a/crypto/internal.h b/crypto/internal.h index c3ab4a950f30..3a08d25fba45 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -14,6 +14,7 @@ #define _CRYPTO_INTERNAL_H #include +#include #include #include #include @@ -21,15 +22,32 @@ #include #include #include +#include #include #include #include +/* Crypto notification events. */ +enum { + CRYPTO_MSG_ALG_REQUEST, + CRYPTO_MSG_ALG_REGISTER, + CRYPTO_MSG_ALG_UNREGISTER, + CRYPTO_MSG_TMPL_REGISTER, + CRYPTO_MSG_TMPL_UNREGISTER, +}; + struct crypto_instance; struct crypto_template; +struct crypto_larval { + struct crypto_alg alg; + struct crypto_alg *adult; + struct completion completion; +}; + extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; +extern struct blocking_notifier_head crypto_chain; extern enum km_type crypto_km_types[]; @@ -104,6 +122,10 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, return alg->cra_ctxsize; } +struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); +void crypto_mod_put(struct crypto_alg *alg); +struct crypto_alg *__crypto_alg_lookup(const char *name); + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); @@ -116,9 +138,14 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm); void crypto_exit_cipher_ops(struct crypto_tfm *tfm); void crypto_exit_compress_ops(struct crypto_tfm *tfm); +void crypto_larval_error(const char *name); + int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); +int crypto_register_notifier(struct notifier_block *nb); +int crypto_unregister_notifier(struct notifier_block *nb); + static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); @@ -129,5 +156,15 @@ static inline void crypto_tmpl_put(struct crypto_template *tmpl) module_put(tmpl->module); } +static inline int crypto_is_larval(struct crypto_alg *alg) +{ + return alg->cra_flags & CRYPTO_ALG_LARVAL; +} + +static inline int crypto_notify(unsigned long val, void *v) +{ + return blocking_notifier_call_chain(&crypto_chain, val, v); +} + #endif /* _CRYPTO_INTERNAL_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7f57ff8ec975..3e3e95aff133 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -29,11 +29,13 @@ /* * Algorithm masks and types. */ -#define CRYPTO_ALG_TYPE_MASK 0x000000ff +#define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 +#define CRYPTO_ALG_LARVAL 0x00000010 + /* * Transform masks and values (for crt_flags). */ From 2b8c19dbdc692e81243a328725a02efb77b144a5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:31:44 +1000 Subject: [PATCH 0286/1063] [CRYPTO] api: Add cryptomgr The cryptomgr module is a simple manager of crypto algorithm instances. It ensures that parameterised algorithms of the type tmpl(alg) (e.g., cbc(aes)) are always created. This is meant to satisfy the needs for most users. For more complex cases such as deeper combinations or multiple parameters, a netlink module will be created which allows arbitrary expressions to be parsed in user-space. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/Kconfig | 8 +++ crypto/Makefile | 1 + crypto/api.c | 10 ++- crypto/cryptomgr.c | 146 +++++++++++++++++++++++++++++++++++++++++ include/linux/crypto.h | 9 +++ 5 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 crypto/cryptomgr.c diff --git a/crypto/Kconfig b/crypto/Kconfig index aabc63195222..4ce509dba329 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -16,6 +16,14 @@ config CRYPTO_ALGAPI help This option provides the API for cryptographic algorithms. +config CRYPTO_MANAGER + tristate "Cryptographic algorithm manager" + select CRYPTO_ALGAPI + default m + help + Create default cryptographic template instantiations such as + cbc(aes). + config CRYPTO_HMAC bool "HMAC support" help diff --git a/crypto/Makefile b/crypto/Makefile index 6d51f80753a1..b8745f3d3595 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -8,6 +8,7 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o crypto_algapi-objs := algapi.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o +obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o obj-$(CONFIG_CRYPTO_MD4) += md4.o diff --git a/crypto/api.c b/crypto/api.c index 5a0d6a17cfd7..67cd6f87b74a 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name) { struct crypto_alg *alg; struct crypto_alg *larval; + int ok; alg = try_then_request_module(crypto_alg_lookup(name), name); if (alg) @@ -179,7 +181,13 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name) if (!larval || !crypto_is_larval(larval)) return larval; - if (crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval) == NOTIFY_STOP) + ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); + if (ok == NOTIFY_DONE) { + request_module("cryptomgr"); + ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); + } + + if (ok == NOTIFY_STOP) alg = crypto_larval_wait(larval); else { crypto_mod_put(larval); diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c new file mode 100644 index 000000000000..e0ebe1b44b99 --- /dev/null +++ b/crypto/cryptomgr.c @@ -0,0 +1,146 @@ +/* + * Create default crypto algorithm instances. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +struct cryptomgr_param { + struct work_struct work; + + struct { + struct rtattr attr; + struct crypto_attr_alg data; + } alg; + + struct { + char name[CRYPTO_MAX_ALG_NAME]; + } larval; + + char template[CRYPTO_MAX_ALG_NAME]; +}; + +static void cryptomgr_probe(void *data) +{ + struct cryptomgr_param *param = data; + struct crypto_template *tmpl; + struct crypto_instance *inst; + + tmpl = crypto_lookup_template(param->template); + if (!tmpl) + goto err; + + inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); + if (IS_ERR(inst)) + goto err; + else if ((err = crypto_register_instance(tmpl, inst))) { + tmpl->free(inst); + goto err; + } + + crypto_tmpl_put(tmpl); + +out: + kfree(param); + return; + +err: + crypto_larval_error(param->larval.name); + goto out; +} + +static int cryptomgr_schedule_probe(struct crypto_larval *larval) +{ + struct cryptomgr_param *param; + const char *name = larval->alg.cra_name; + const char *p; + unsigned int len; + + param = kmalloc(sizeof(*param), GFP_KERNEL); + if (!param) + goto err; + + for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++) + ; + + len = p - name; + if (!len || *p != '(') + goto err_free_param; + + memcpy(param->template, name, len); + param->template[len] = 0; + + name = p + 1; + for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++) + ; + + len = p - name; + if (!len || *p != ')' || p[1]) + goto err_free_param; + + param->alg.attr.rta_len = sizeof(param->alg); + param->alg.attr.rta_type = CRYPTOA_ALG; + memcpy(param->alg.data.name, name, len); + param->alg.data.name[len] = 0; + + memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME); + + INIT_WORK(¶m->work, cryptomgr_probe, param); + schedule_work(¶m->work); + + return NOTIFY_STOP; + +err_free_param: + kfree(param); +err: + return NOTIFY_OK; +} + +static int cryptomgr_notify(struct notifier_block *this, unsigned long msg, + void *data) +{ + switch (msg) { + case CRYPTO_MSG_ALG_REQUEST: + return cryptomgr_schedule_probe(data); + } + + return NOTIFY_DONE; +} + +static struct notifier_block cryptomgr_notifier = { + .notifier_call = cryptomgr_notify, +}; + +static int __init cryptomgr_init(void) +{ + return crypto_register_notifier(&cryptomgr_notifier); +} + +static void __exit cryptomgr_exit(void) +{ + int err = crypto_unregister_notifier(&cryptomgr_notifier); + BUG_ON(err); +} + +module_init(cryptomgr_init); +module_exit(cryptomgr_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Crypto Algorithm Manager"); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3e3e95aff133..85f73c381913 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -260,6 +260,15 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; +enum { + CRYPTOA_UNSPEC, + CRYPTOA_ALG, +}; + +struct crypto_attr_alg { + char name[CRYPTO_MAX_ALG_NAME]; +}; + /* * Transform user interface. */ From 492e2b63eb10c28f4f0b694264d74a8755cd1be0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:35:17 +1000 Subject: [PATCH 0287/1063] [CRYPTO] api: Allow algorithm lookup by type This patch also adds the infrastructure to pick an algorithm based on their type. For example, this allows you to select the encryption algorithm "aes", instead of any algorithm registered under the name "aes". For now this is only accessible internally. Eventually it will be made available through crypto_alloc_tfm. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 6 ++++-- crypto/api.c | 39 ++++++++++++++++++++++++++------------- crypto/cryptomgr.c | 7 ++++++- crypto/internal.h | 6 ++++-- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index f0df85fc1f50..acea250677c0 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -21,12 +21,12 @@ static LIST_HEAD(crypto_template_list); -void crypto_larval_error(const char *name) +void crypto_larval_error(const char *name, u32 type, u32 mask) { struct crypto_alg *alg; down_read(&crypto_alg_sem); - alg = __crypto_alg_lookup(name); + alg = __crypto_alg_lookup(name, type, mask); up_read(&crypto_alg_sem); if (alg) { @@ -87,6 +87,8 @@ static int __crypto_register_alg(struct crypto_alg *alg) !strcmp(alg->cra_driver_name, q->cra_name))) { struct crypto_larval *larval = (void *)q; + if ((q->cra_flags ^ alg->cra_flags) & larval->mask) + continue; if (!crypto_mod_get(alg)) continue; larval->adult = alg; diff --git a/crypto/api.c b/crypto/api.c index 67cd6f87b74a..ddf6a767acdd 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -57,7 +57,7 @@ void crypto_mod_put(struct crypto_alg *alg) } EXPORT_SYMBOL_GPL(crypto_mod_put); -struct crypto_alg *__crypto_alg_lookup(const char *name) +struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask) { struct crypto_alg *q, *alg = NULL; int best = -2; @@ -65,6 +65,13 @@ struct crypto_alg *__crypto_alg_lookup(const char *name) list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; + if ((q->cra_flags ^ type) & mask) + continue; + + if (crypto_is_larval(q) && + ((struct crypto_larval *)q)->mask != mask) + continue; + exact = !strcmp(q->cra_driver_name, name); fuzzy = !strcmp(q->cra_name, name); if (!exact && !(fuzzy && q->cra_priority > best)) @@ -96,7 +103,8 @@ static void crypto_larval_destroy(struct crypto_alg *alg) kfree(larval); } -static struct crypto_alg *crypto_larval_alloc(const char *name) +static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type, + u32 mask) { struct crypto_alg *alg; struct crypto_larval *larval; @@ -105,7 +113,8 @@ static struct crypto_alg *crypto_larval_alloc(const char *name) if (!larval) return NULL; - larval->alg.cra_flags = CRYPTO_ALG_LARVAL; + larval->mask = mask; + larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type; larval->alg.cra_priority = -1; larval->alg.cra_destroy = crypto_larval_destroy; @@ -114,7 +123,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name) init_completion(&larval->completion); down_write(&crypto_alg_sem); - alg = __crypto_alg_lookup(name); + alg = __crypto_alg_lookup(name, type, mask); if (!alg) { alg = &larval->alg; list_add(&alg->cra_list, &crypto_alg_list); @@ -151,7 +160,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) return alg; } -static struct crypto_alg *crypto_alg_lookup(const char *name) +static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, + u32 mask) { struct crypto_alg *alg; @@ -159,25 +169,27 @@ static struct crypto_alg *crypto_alg_lookup(const char *name) return NULL; down_read(&crypto_alg_sem); - alg = __crypto_alg_lookup(name); + alg = __crypto_alg_lookup(name, type, mask); up_read(&crypto_alg_sem); return alg; } -/* A far more intelligent version of this is planned. For now, just - * try an exact match on the name of the algorithm. */ -static struct crypto_alg *crypto_alg_mod_lookup(const char *name) +struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) { struct crypto_alg *alg; struct crypto_alg *larval; int ok; - alg = try_then_request_module(crypto_alg_lookup(name), name); + mask &= ~CRYPTO_ALG_LARVAL; + type &= mask; + + alg = try_then_request_module(crypto_alg_lookup(name, type, mask), + name); if (alg) return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; - larval = crypto_larval_alloc(name); + larval = crypto_larval_alloc(name, type, mask); if (!larval || !crypto_is_larval(larval)) return larval; @@ -196,6 +208,7 @@ static struct crypto_alg *crypto_alg_mod_lookup(const char *name) crypto_larval_kill(larval); return alg; } +EXPORT_SYMBOL_GPL(crypto_alg_mod_lookup); static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) { @@ -291,7 +304,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) struct crypto_alg *alg; unsigned int tfm_size; - alg = crypto_alg_mod_lookup(name); + alg = crypto_alg_mod_lookup(name, 0, 0); if (alg == NULL) goto out; @@ -346,7 +359,7 @@ void crypto_free_tfm(struct crypto_tfm *tfm) int crypto_alg_available(const char *name, u32 flags) { int ret = 0; - struct crypto_alg *alg = crypto_alg_mod_lookup(name); + struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0); if (alg) { crypto_mod_put(alg); diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c index e0ebe1b44b99..ae54942e3b31 100644 --- a/crypto/cryptomgr.c +++ b/crypto/cryptomgr.c @@ -31,6 +31,8 @@ struct cryptomgr_param { } alg; struct { + u32 type; + u32 mask; char name[CRYPTO_MAX_ALG_NAME]; } larval; @@ -62,7 +64,8 @@ out: return; err: - crypto_larval_error(param->larval.name); + crypto_larval_error(param->larval.name, param->larval.type, + param->larval.mask); goto out; } @@ -101,6 +104,8 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval) param->alg.data.name[len] = 0; memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME); + param->larval.type = larval->alg.cra_flags; + param->larval.mask = larval->mask; INIT_WORK(¶m->work, cryptomgr_probe, param); schedule_work(¶m->work); diff --git a/crypto/internal.h b/crypto/internal.h index 3a08d25fba45..c08d93bdadc4 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -43,6 +43,7 @@ struct crypto_larval { struct crypto_alg alg; struct crypto_alg *adult; struct completion completion; + u32 mask; }; extern struct list_head crypto_alg_list; @@ -124,7 +125,8 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); void crypto_mod_put(struct crypto_alg *alg); -struct crypto_alg *__crypto_alg_lookup(const char *name); +struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask); +struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); @@ -138,7 +140,7 @@ void crypto_exit_digest_ops(struct crypto_tfm *tfm); void crypto_exit_cipher_ops(struct crypto_tfm *tfm); void crypto_exit_compress_ops(struct crypto_tfm *tfm); -void crypto_larval_error(const char *name); +void crypto_larval_error(const char *name, u32 type, u32 mask); int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); From 6bfd48096ff8ecabf955958b51ddfa7988eb0a14 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:39:29 +1000 Subject: [PATCH 0288/1063] [CRYPTO] api: Added spawns Spawns lock a specific crypto algorithm in place. They can then be used with crypto_spawn_tfm to allocate a tfm for that algorithm. When the base algorithm of a spawn is deregistered, all its spawns will be automatically removed. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algapi.c | 185 ++++++++++++++++++++++++++++++++++++---- crypto/api.c | 95 ++++++++++++++------- crypto/cryptomgr.c | 19 +++-- crypto/internal.h | 19 +++++ include/crypto/algapi.h | 11 +++ include/linux/crypto.h | 4 + 6 files changed, 280 insertions(+), 53 deletions(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index acea250677c0..36c4f1bdb521 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -73,27 +74,96 @@ static int crypto_check_alg(struct crypto_alg *alg) return crypto_set_driver_name(alg); } -static int __crypto_register_alg(struct crypto_alg *alg) +static void crypto_destroy_instance(struct crypto_alg *alg) +{ + struct crypto_instance *inst = (void *)alg; + struct crypto_template *tmpl = inst->tmpl; + + tmpl->free(inst); + crypto_tmpl_put(tmpl); +} + +static void crypto_remove_spawns(struct list_head *spawns, + struct list_head *list) +{ + struct crypto_spawn *spawn, *n; + + list_for_each_entry_safe(spawn, n, spawns, list) { + struct crypto_instance *inst = spawn->inst; + struct crypto_template *tmpl = inst->tmpl; + + list_del_init(&spawn->list); + spawn->alg = NULL; + + if (crypto_is_dead(&inst->alg)) + continue; + + inst->alg.cra_flags |= CRYPTO_ALG_DEAD; + if (!tmpl || !crypto_tmpl_get(tmpl)) + continue; + + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); + list_move(&inst->alg.cra_list, list); + hlist_del(&inst->list); + inst->alg.cra_destroy = crypto_destroy_instance; + + if (!list_empty(&inst->alg.cra_users)) { + if (&n->list == spawns) + n = list_entry(inst->alg.cra_users.next, + typeof(*n), list); + __list_splice(&inst->alg.cra_users, spawns->prev); + } + } +} + +static int __crypto_register_alg(struct crypto_alg *alg, + struct list_head *list) { struct crypto_alg *q; - int ret = -EEXIST; + int ret = -EAGAIN; + + if (crypto_is_dead(alg)) + goto out; + + INIT_LIST_HEAD(&alg->cra_users); + + ret = -EEXIST; atomic_set(&alg->cra_refcnt, 1); list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) goto out; - if (crypto_is_larval(q) && - (!strcmp(alg->cra_name, q->cra_name) || - !strcmp(alg->cra_driver_name, q->cra_name))) { + + if (crypto_is_moribund(q)) + continue; + + if (crypto_is_larval(q)) { struct crypto_larval *larval = (void *)q; + if (strcmp(alg->cra_name, q->cra_name) && + strcmp(alg->cra_driver_name, q->cra_name)) + continue; + + if (larval->adult) + continue; if ((q->cra_flags ^ alg->cra_flags) & larval->mask) continue; if (!crypto_mod_get(alg)) continue; + larval->adult = alg; complete(&larval->completion); + continue; } + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (strcmp(alg->cra_driver_name, q->cra_driver_name) && + q->cra_priority > alg->cra_priority) + continue; + + crypto_remove_spawns(&q->cra_users, list); } list_add(&alg->cra_list, &crypto_alg_list); @@ -105,8 +175,20 @@ out: return ret; } +static void crypto_remove_final(struct list_head *list) +{ + struct crypto_alg *alg; + struct crypto_alg *n; + + list_for_each_entry_safe(alg, n, list, cra_list) { + list_del_init(&alg->cra_list); + crypto_alg_put(alg); + } +} + int crypto_register_alg(struct crypto_alg *alg) { + LIST_HEAD(list); int err; err = crypto_check_alg(alg); @@ -114,23 +196,35 @@ int crypto_register_alg(struct crypto_alg *alg) return err; down_write(&crypto_alg_sem); - err = __crypto_register_alg(alg); + err = __crypto_register_alg(alg, &list); up_write(&crypto_alg_sem); + crypto_remove_final(&list); return err; } EXPORT_SYMBOL_GPL(crypto_register_alg); +static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list) +{ + if (unlikely(list_empty(&alg->cra_list))) + return -ENOENT; + + alg->cra_flags |= CRYPTO_ALG_DEAD; + + crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); + list_del_init(&alg->cra_list); + crypto_remove_spawns(&alg->cra_users, list); + + return 0; +} + int crypto_unregister_alg(struct crypto_alg *alg) { - int ret = -ENOENT; + int ret; + LIST_HEAD(list); down_write(&crypto_alg_sem); - if (likely(!list_empty(&alg->cra_list))) { - list_del_init(&alg->cra_list); - ret = 0; - } - crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg); + ret = crypto_remove_alg(alg, &list); up_write(&crypto_alg_sem); if (ret) @@ -140,6 +234,7 @@ int crypto_unregister_alg(struct crypto_alg *alg) if (alg->cra_destroy) alg->cra_destroy(alg); + crypto_remove_final(&list); return 0; } EXPORT_SYMBOL_GPL(crypto_unregister_alg); @@ -170,6 +265,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) struct crypto_instance *inst; struct hlist_node *p, *n; struct hlist_head *list; + LIST_HEAD(users); down_write(&crypto_alg_sem); @@ -178,9 +274,8 @@ void crypto_unregister_template(struct crypto_template *tmpl) list = &tmpl->instances; hlist_for_each_entry(inst, p, list, list) { - BUG_ON(list_empty(&inst->alg.cra_list)); - list_del_init(&inst->alg.cra_list); - crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg); + int err = crypto_remove_alg(&inst->alg, &users); + BUG_ON(err); } crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl); @@ -191,6 +286,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1); tmpl->free(inst); } + crypto_remove_final(&users); } EXPORT_SYMBOL_GPL(crypto_unregister_template); @@ -222,6 +318,7 @@ EXPORT_SYMBOL_GPL(crypto_lookup_template); int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst) { + LIST_HEAD(list); int err = -EINVAL; if (inst->alg.cra_destroy) @@ -235,7 +332,7 @@ int crypto_register_instance(struct crypto_template *tmpl, down_write(&crypto_alg_sem); - err = __crypto_register_alg(&inst->alg); + err = __crypto_register_alg(&inst->alg, &list); if (err) goto unlock; @@ -245,11 +342,67 @@ int crypto_register_instance(struct crypto_template *tmpl, unlock: up_write(&crypto_alg_sem); + crypto_remove_final(&list); + err: return err; } EXPORT_SYMBOL_GPL(crypto_register_instance); +int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, + struct crypto_instance *inst) +{ + int err = -EAGAIN; + + spawn->inst = inst; + + down_write(&crypto_alg_sem); + if (!crypto_is_moribund(alg)) { + list_add(&spawn->list, &alg->cra_users); + spawn->alg = alg; + err = 0; + } + up_write(&crypto_alg_sem); + + return err; +} +EXPORT_SYMBOL_GPL(crypto_init_spawn); + +void crypto_drop_spawn(struct crypto_spawn *spawn) +{ + down_write(&crypto_alg_sem); + list_del(&spawn->list); + up_write(&crypto_alg_sem); +} +EXPORT_SYMBOL_GPL(crypto_drop_spawn); + +struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn) +{ + struct crypto_alg *alg; + struct crypto_alg *alg2; + struct crypto_tfm *tfm; + + down_read(&crypto_alg_sem); + alg = spawn->alg; + alg2 = alg; + if (alg2) + alg2 = crypto_mod_get(alg2); + up_read(&crypto_alg_sem); + + if (!alg2) { + if (alg) + crypto_shoot_alg(alg); + return ERR_PTR(-EAGAIN); + } + + tfm = __crypto_alloc_tfm(alg, 0); + if (IS_ERR(tfm)) + crypto_mod_put(alg); + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_spawn_tfm); + int crypto_register_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&crypto_chain, nb); diff --git a/crypto/api.c b/crypto/api.c index ddf6a767acdd..7e5522cf856e 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -15,11 +15,13 @@ * */ +#include #include #include #include #include #include +#include #include #include #include "internal.h" @@ -38,12 +40,6 @@ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) return alg; } -static inline void crypto_alg_put(struct crypto_alg *alg) -{ - if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) - alg->cra_destroy(alg); -} - struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; @@ -65,6 +61,9 @@ struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask) list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; + if (crypto_is_moribund(q)) + continue; + if ((q->cra_flags ^ type) & mask) continue; @@ -111,7 +110,7 @@ static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type, larval = kzalloc(sizeof(*larval), GFP_KERNEL); if (!larval) - return NULL; + return ERR_PTR(-ENOMEM); larval->mask = mask; larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type; @@ -153,8 +152,11 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ); alg = larval->adult; - if (alg && !crypto_mod_get(alg)) - alg = NULL; + if (alg) { + if (!crypto_mod_get(alg)) + alg = ERR_PTR(-EAGAIN); + } else + alg = ERR_PTR(-ENOENT); crypto_mod_put(&larval->alg); return alg; @@ -165,9 +167,6 @@ static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, { struct crypto_alg *alg; - if (!name) - return NULL; - down_read(&crypto_alg_sem); alg = __crypto_alg_lookup(name, type, mask); up_read(&crypto_alg_sem); @@ -181,7 +180,10 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) struct crypto_alg *larval; int ok; - mask &= ~CRYPTO_ALG_LARVAL; + if (!name) + return ERR_PTR(-ENOENT); + + mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); type &= mask; alg = try_then_request_module(crypto_alg_lookup(name, type, mask), @@ -190,7 +192,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; larval = crypto_larval_alloc(name, type, mask); - if (!larval || !crypto_is_larval(larval)) + if (IS_ERR(larval) || !crypto_is_larval(larval)) return larval; ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval); @@ -203,7 +205,7 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) alg = crypto_larval_wait(larval); else { crypto_mod_put(larval); - alg = NULL; + alg = ERR_PTR(-ENOENT); } crypto_larval_kill(larval); return alg; @@ -298,31 +300,40 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1)); } -struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) +void crypto_shoot_alg(struct crypto_alg *alg) +{ + down_write(&crypto_alg_sem); + alg->cra_flags |= CRYPTO_ALG_DYING; + up_write(&crypto_alg_sem); +} +EXPORT_SYMBOL_GPL(crypto_shoot_alg); + +struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags) { struct crypto_tfm *tfm = NULL; - struct crypto_alg *alg; unsigned int tfm_size; - - alg = crypto_alg_mod_lookup(name, 0, 0); - if (alg == NULL) - goto out; + int err = -ENOMEM; tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); tfm = kzalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) - goto out_put; + goto out; tfm->__crt_alg = alg; - - if (crypto_init_flags(tfm, flags)) + + err = crypto_init_flags(tfm, flags); + if (err) goto out_free_tfm; - if (crypto_init_ops(tfm)) + err = crypto_init_ops(tfm); + if (err) goto out_free_tfm; - if (alg->cra_init && alg->cra_init(tfm)) + if (alg->cra_init && (err = alg->cra_init(tfm))) { + if (err == -EAGAIN) + crypto_shoot_alg(alg); goto cra_init_failed; + } goto out; @@ -330,12 +341,36 @@ cra_init_failed: crypto_exit_ops(tfm); out_free_tfm: kfree(tfm); - tfm = NULL; -out_put: - crypto_mod_put(alg); + tfm = ERR_PTR(err); out: return tfm; } +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); + +struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) +{ + struct crypto_tfm *tfm = NULL; + int err; + + do { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(name, 0, 0); + err = PTR_ERR(alg); + if (IS_ERR(alg)) + continue; + + tfm = __crypto_alloc_tfm(alg, flags); + err = 0; + if (IS_ERR(tfm)) { + crypto_mod_put(alg); + err = PTR_ERR(tfm); + tfm = NULL; + } + } while (err == -EAGAIN && !signal_pending(current)); + + return tfm; +} void crypto_free_tfm(struct crypto_tfm *tfm) { @@ -361,7 +396,7 @@ int crypto_alg_available(const char *name, u32 flags) int ret = 0; struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0); - if (alg) { + if (!IS_ERR(alg)) { crypto_mod_put(alg); ret = 1; } diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c index ae54942e3b31..9b5b15601068 100644 --- a/crypto/cryptomgr.c +++ b/crypto/cryptomgr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -44,21 +45,25 @@ static void cryptomgr_probe(void *data) struct cryptomgr_param *param = data; struct crypto_template *tmpl; struct crypto_instance *inst; + int err; tmpl = crypto_lookup_template(param->template); if (!tmpl) goto err; - inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); - if (IS_ERR(inst)) - goto err; - else if ((err = crypto_register_instance(tmpl, inst))) { - tmpl->free(inst); - goto err; - } + do { + inst = tmpl->alloc(¶m->alg, sizeof(param->alg)); + if (IS_ERR(inst)) + err = PTR_ERR(inst); + else if ((err = crypto_register_instance(tmpl, inst))) + tmpl->free(inst); + } while (err == -EAGAIN && !signal_pending(current)); crypto_tmpl_put(tmpl); + if (err) + goto err; + out: kfree(param); return; diff --git a/crypto/internal.h b/crypto/internal.h index c08d93bdadc4..03c00b0e6b60 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -142,12 +142,21 @@ void crypto_exit_compress_ops(struct crypto_tfm *tfm); void crypto_larval_error(const char *name, u32 type, u32 mask); +void crypto_shoot_alg(struct crypto_alg *alg); +struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags); + int crypto_register_instance(struct crypto_template *tmpl, struct crypto_instance *inst); int crypto_register_notifier(struct notifier_block *nb); int crypto_unregister_notifier(struct notifier_block *nb); +static inline void crypto_alg_put(struct crypto_alg *alg) +{ + if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy) + alg->cra_destroy(alg); +} + static inline int crypto_tmpl_get(struct crypto_template *tmpl) { return try_module_get(tmpl->module); @@ -163,6 +172,16 @@ static inline int crypto_is_larval(struct crypto_alg *alg) return alg->cra_flags & CRYPTO_ALG_LARVAL; } +static inline int crypto_is_dead(struct crypto_alg *alg) +{ + return alg->cra_flags & CRYPTO_ALG_DEAD; +} + +static inline int crypto_is_moribund(struct crypto_alg *alg) +{ + return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING); +} + static inline int crypto_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&crypto_chain, val, v); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index ffec530d52fb..b20f4bdb23ba 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -36,10 +36,21 @@ struct crypto_template { char name[CRYPTO_MAX_ALG_NAME]; }; +struct crypto_spawn { + struct list_head list; + struct crypto_alg *alg; + struct crypto_instance *inst; +}; + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); +int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, + struct crypto_instance *inst); +void crypto_drop_spawn(struct crypto_spawn *spawn); +struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn); + static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 85f73c381913..40a6330abc8d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -35,6 +35,8 @@ #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 #define CRYPTO_ALG_LARVAL 0x00000010 +#define CRYPTO_ALG_DEAD 0x00000020 +#define CRYPTO_ALG_DYING 0x00000040 /* * Transform masks and values (for crt_flags). @@ -145,6 +147,8 @@ struct compress_alg { struct crypto_alg { struct list_head cra_list; + struct list_head cra_users; + u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; From b3be9a6d9a78bb820f5242f43b98f38b0ca610a6 Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sun, 9 Jul 2006 08:59:38 +1000 Subject: [PATCH 0289/1063] [CRYPTO] sha: Add module aliases for sha1 / sha256 Crypto modules should be loadable by their .cra_driver_name, so we should make MODULE_ALIAS()es with these names. This patch adds aliases for SHA1 and SHA256 only as that's what we need for PadLock-SHA driver. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- crypto/sha1.c | 3 +++ crypto/sha256.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/crypto/sha1.c b/crypto/sha1.c index 6c77b689f87e..1bba551e5b45 100644 --- a/crypto/sha1.c +++ b/crypto/sha1.c @@ -109,6 +109,7 @@ static void sha1_final(struct crypto_tfm *tfm, u8 *out) static struct crypto_alg alg = { .cra_name = "sha1", + .cra_driver_name= "sha1-generic", .cra_flags = CRYPTO_ALG_TYPE_DIGEST, .cra_blocksize = SHA1_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sha1_ctx), @@ -137,3 +138,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); + +MODULE_ALIAS("sha1-generic"); diff --git a/crypto/sha256.c b/crypto/sha256.c index bc71d85a7d02..716195bb54f2 100644 --- a/crypto/sha256.c +++ b/crypto/sha256.c @@ -309,6 +309,7 @@ static void sha256_final(struct crypto_tfm *tfm, u8 *out) static struct crypto_alg alg = { .cra_name = "sha256", + .cra_driver_name= "sha256-generic", .cra_flags = CRYPTO_ALG_TYPE_DIGEST, .cra_blocksize = SHA256_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sha256_ctx), @@ -337,3 +338,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm"); + +MODULE_ALIAS("sha256-generic"); From b14cdd6704c96474ba5c74b5959487beaa5ee1cd Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sun, 9 Jul 2006 09:02:24 +1000 Subject: [PATCH 0290/1063] [CRYPTO] api: Add missing accessors for new crypto_alg fields Add missing accessors for cra_driver_name and cra_priority. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- include/linux/crypto.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 40a6330abc8d..d6e184c876b5 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -297,6 +297,16 @@ static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_name; } +static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_driver_name; +} + +static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_priority; +} + static inline const char *crypto_tfm_alg_modname(struct crypto_tfm *tfm) { return module_name(tfm->__crt_alg->cra_module); From 1191f0a49390caf16f4a2831a4fc373757471ad6 Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sun, 6 Aug 2006 22:46:20 +1000 Subject: [PATCH 0291/1063] [CRYPTO] padlock: Get rid of padlock-generic.c Merge padlock-generic.c into padlock-aes.c and compile AES as a standalone module. We won't make a monolithic padlock.ko with all supported algorithms, instead we'll compile each driver into its own module. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 16 +++++--- drivers/crypto/Makefile | 8 +--- drivers/crypto/padlock-aes.c | 34 +++++++++++++++-- drivers/crypto/padlock-generic.c | 63 -------------------------------- 4 files changed, 42 insertions(+), 79 deletions(-) delete mode 100644 drivers/crypto/padlock-generic.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index ba23683ab8c4..d260c86218fa 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -1,24 +1,30 @@ menu "Hardware crypto devices" config CRYPTO_DEV_PADLOCK - tristate "Support for VIA PadLock ACE" + bool "Support for VIA PadLock ACE" depends on X86_32 select CRYPTO_ALGAPI + default y help Some VIA processors come with an integrated crypto engine (so called VIA PadLock ACE, Advanced Cryptography Engine) - that provides instructions for very fast {en,de}cryption - with some algorithms. + that provides instructions for very fast cryptographic + operations with supported algorithms. The instructions are used only when the CPU supports them. Otherwise software encryption is used. If you are unsure, say Y. config CRYPTO_DEV_PADLOCK_AES - bool "Support for AES in VIA PadLock" + tristate "PadLock driver for AES algorithm" depends on CRYPTO_DEV_PADLOCK - default y + default m help Use VIA PadLock for AES algorithm. + Available in VIA C3 and newer CPUs. + + If unsure say M. The compiled module will be + called padlock-aes.ko + endmenu diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 45426ca19a23..5e7d7d5e805a 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,7 +1 @@ - -obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o - -padlock-objs-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o - -padlock-objs := padlock-generic.o $(padlock-objs-y) - +obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index b643d71298a9..ee33bd6c1b77 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -495,15 +495,41 @@ static struct crypto_alg aes_alg = { } }; -int __init padlock_init_aes(void) +static int __init padlock_init(void) { - printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); + int ret; + + if (!cpu_has_xcrypt) { + printk(KERN_ERR PFX "VIA PadLock not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xcrypt_enabled) { + printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n"); + return -ENODEV; + } gen_tabs(); - return crypto_register_alg(&aes_alg); + if ((ret = crypto_register_alg(&aes_alg))) { + printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); + return ret; + } + + printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); + + return ret; } -void __exit padlock_fini_aes(void) +static void __exit padlock_fini(void) { crypto_unregister_alg(&aes_alg); } + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); + +MODULE_ALIAS("aes-padlock"); diff --git a/drivers/crypto/padlock-generic.c b/drivers/crypto/padlock-generic.c deleted file mode 100644 index 18cf0e8274a7..000000000000 --- a/drivers/crypto/padlock-generic.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Cryptographic API. - * - * Support for VIA PadLock hardware crypto engine. - * - * Copyright (c) 2004 Michal Ludvig - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include "padlock.h" - -static int __init -padlock_init(void) -{ - int ret = -ENOSYS; - - if (!cpu_has_xcrypt) { - printk(KERN_ERR PFX "VIA PadLock not detected.\n"); - return -ENODEV; - } - - if (!cpu_has_xcrypt_enabled) { - printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n"); - return -ENODEV; - } - -#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES - if ((ret = padlock_init_aes())) { - printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); - return ret; - } -#endif - - if (ret == -ENOSYS) - printk(KERN_ERR PFX "Hmm, VIA PadLock was compiled without any algorithm.\n"); - - return ret; -} - -static void __exit -padlock_fini(void) -{ -#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES - padlock_fini_aes(); -#endif -} - -module_init(padlock_init); -module_exit(padlock_fini); - -MODULE_DESCRIPTION("VIA PadLock crypto engine support."); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Michal Ludvig"); From db5e9a42373ae6d84c4b0179c2fe0aba866474e8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Jul 2006 10:35:49 +1000 Subject: [PATCH 0292/1063] [CRYPTO] padlock: Add compatibility alias after rename Whenever we rename modules we should add an alias to ensure that existing users can still locate the new module. This patch also gets rid of the now unused module function prototypes from padlock.h. Signed-off-by: Herbert Xu --- drivers/crypto/padlock-aes.c | 3 +++ drivers/crypto/padlock.h | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index ee33bd6c1b77..241052da2787 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -533,3 +533,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); MODULE_ALIAS("aes-padlock"); + +/* This module used to be called padlock. */ +MODULE_ALIAS("padlock"); diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h index b78489bc298a..e2ee3b689dbd 100644 --- a/drivers/crypto/padlock.h +++ b/drivers/crypto/padlock.h @@ -28,9 +28,4 @@ struct cword { #define PFX "padlock: " -#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES -int padlock_init_aes(void); -void padlock_fini_aes(void); -#endif - #endif /* _CRYPTO_PADLOCK_H */ From ccc17c34d676f116bd09dd36a3b01627bc6a2f8a Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sat, 15 Jul 2006 10:23:49 +1000 Subject: [PATCH 0293/1063] [CRYPTO] padlock: Update private header file PADLOCK_CRA_PRIORITY is shared between padlock-aes and padlock-sha so it should be in the header. On the other hand "struct cword" is only used in padlock-aes.c so it's unnecessary to have it in padlock.h Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- drivers/crypto/padlock-aes.c | 13 ++++++++++++- drivers/crypto/padlock.h | 13 ++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 241052da2787..149e54b0ea2e 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -59,6 +59,17 @@ #define AES_EXTENDED_KEY_SIZE 64 /* in uint32_t units */ #define AES_EXTENDED_KEY_SIZE_B (AES_EXTENDED_KEY_SIZE * sizeof(uint32_t)) +/* Control word. */ +struct cword { + unsigned int __attribute__ ((__packed__)) + rounds:4, + algo:3, + keygen:1, + interm:1, + encdec:1, + ksize:2; +} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); + /* Whenever making any changes to the following * structure *make sure* you keep E, d_data * and cword aligned on 16 Bytes boundaries!!! */ @@ -473,7 +484,7 @@ static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-padlock", - .cra_priority = 300, + .cra_priority = PADLOCK_CRA_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aes_ctx), diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h index e2ee3b689dbd..7e3385b0904d 100644 --- a/drivers/crypto/padlock.h +++ b/drivers/crypto/padlock.h @@ -15,17 +15,8 @@ #define PADLOCK_ALIGNMENT 16 -/* Control word. */ -struct cword { - unsigned int __attribute__ ((__packed__)) - rounds:4, - algo:3, - keygen:1, - interm:1, - encdec:1, - ksize:2; -} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); - #define PFX "padlock: " +#define PADLOCK_CRA_PRIORITY 300 + #endif /* _CRYPTO_PADLOCK_H */ From 6c833275152b454d311f0e70b5e6bf028b4a2aaf Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Wed, 12 Jul 2006 12:29:38 +1000 Subject: [PATCH 0294/1063] [CRYPTO] padlock: Driver for SHA1 / SHA256 algorithms Support for SHA1 / SHA256 algorithms in VIA C7 processors. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 14 ++ drivers/crypto/Makefile | 1 + drivers/crypto/padlock-sha.c | 339 +++++++++++++++++++++++++++++++++++ 3 files changed, 354 insertions(+) create mode 100644 drivers/crypto/padlock-sha.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index d260c86218fa..910c715325be 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -27,4 +27,18 @@ config CRYPTO_DEV_PADLOCK_AES If unsure say M. The compiled module will be called padlock-aes.ko +config CRYPTO_DEV_PADLOCK_SHA + tristate "PadLock driver for SHA1 and SHA256 algorithms" + depends on CRYPTO_DEV_PADLOCK + select CRYPTO_SHA1 + select CRYPTO_SHA256 + default m + help + Use VIA PadLock for SHA1/SHA256 algorithms. + + Available in VIA C7 and newer processors. + + If unsure say M. The compiled module will be + called padlock-sha.ko + endmenu diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 5e7d7d5e805a..df498c7d97ab 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o +obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c new file mode 100644 index 000000000000..f7010038033b --- /dev/null +++ b/drivers/crypto/padlock-sha.c @@ -0,0 +1,339 @@ +/* + * Cryptographic API. + * + * Support for VIA PadLock hardware crypto engine. + * + * Copyright (c) 2006 Michal Ludvig + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "padlock.h" + +#define SHA1_DEFAULT_FALLBACK "sha1-generic" +#define SHA1_DIGEST_SIZE 20 +#define SHA1_HMAC_BLOCK_SIZE 64 + +#define SHA256_DEFAULT_FALLBACK "sha256-generic" +#define SHA256_DIGEST_SIZE 32 +#define SHA256_HMAC_BLOCK_SIZE 64 + +static char *sha1_fallback = SHA1_DEFAULT_FALLBACK; +static char *sha256_fallback = SHA256_DEFAULT_FALLBACK; + +module_param(sha1_fallback, charp, 0644); +module_param(sha256_fallback, charp, 0644); + +MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is " + SHA1_DEFAULT_FALLBACK); +MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is " + SHA256_DEFAULT_FALLBACK); + +struct padlock_sha_ctx { + char *data; + size_t used; + int bypass; + void (*f_sha_padlock)(const char *in, char *out, int count); + struct crypto_tfm *fallback_tfm; +}; + +static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm) +{ + return (struct padlock_sha_ctx *)(crypto_tfm_ctx(tfm)); +} + +/* We'll need aligned address on the stack */ +#define NEAREST_ALIGNED(ptr) \ + ((void *)ALIGN((size_t)(ptr), PADLOCK_ALIGNMENT)) + +static struct crypto_alg sha1_alg, sha256_alg; + +static void padlock_sha_bypass(struct crypto_tfm *tfm) +{ + if (ctx(tfm)->bypass) + return; + + BUG_ON(!ctx(tfm)->fallback_tfm); + + crypto_digest_init(ctx(tfm)->fallback_tfm); + if (ctx(tfm)->data && ctx(tfm)->used) { + struct scatterlist sg; + + sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used); + crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + } + + ctx(tfm)->used = 0; + ctx(tfm)->bypass = 1; +} + +static void padlock_sha_init(struct crypto_tfm *tfm) +{ + ctx(tfm)->used = 0; + ctx(tfm)->bypass = 0; +} + +static void padlock_sha_update(struct crypto_tfm *tfm, + const uint8_t *data, unsigned int length) +{ + /* Our buffer is always one page. */ + if (unlikely(!ctx(tfm)->bypass && + (ctx(tfm)->used + length > PAGE_SIZE))) + padlock_sha_bypass(tfm); + + if (unlikely(ctx(tfm)->bypass)) { + struct scatterlist sg; + BUG_ON(!ctx(tfm)->fallback_tfm); + sg_set_buf(&sg, (uint8_t *)data, length); + crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + return; + } + + memcpy(ctx(tfm)->data + ctx(tfm)->used, data, length); + ctx(tfm)->used += length; +} + +static inline void padlock_output_block(uint32_t *src, + uint32_t *dst, size_t count) +{ + while (count--) + *dst++ = swab32(*src++); +} + +void padlock_do_sha1(const char *in, char *out, int count) +{ + /* We can't store directly to *out as it may be unaligned. */ + /* BTW Don't reduce the buffer size below 128 Bytes! + * PadLock microcode needs it that big. */ + char buf[128+16]; + char *result = NEAREST_ALIGNED(buf); + + ((uint32_t *)result)[0] = 0x67452301; + ((uint32_t *)result)[1] = 0xEFCDAB89; + ((uint32_t *)result)[2] = 0x98BADCFE; + ((uint32_t *)result)[3] = 0x10325476; + ((uint32_t *)result)[4] = 0xC3D2E1F0; + + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ + : "+S"(in), "+D"(result) + : "c"(count), "a"(0)); + + padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); +} + +void padlock_do_sha256(const char *in, char *out, int count) +{ + /* We can't store directly to *out as it may be unaligned. */ + /* BTW Don't reduce the buffer size below 128 Bytes! + * PadLock microcode needs it that big. */ + char buf[128+16]; + char *result = NEAREST_ALIGNED(buf); + + ((uint32_t *)result)[0] = 0x6A09E667; + ((uint32_t *)result)[1] = 0xBB67AE85; + ((uint32_t *)result)[2] = 0x3C6EF372; + ((uint32_t *)result)[3] = 0xA54FF53A; + ((uint32_t *)result)[4] = 0x510E527F; + ((uint32_t *)result)[5] = 0x9B05688C; + ((uint32_t *)result)[6] = 0x1F83D9AB; + ((uint32_t *)result)[7] = 0x5BE0CD19; + + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ + : "+S"(in), "+D"(result) + : "c"(count), "a"(0)); + + padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); +} + +static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out) +{ + if (unlikely(ctx(tfm)->bypass)) { + BUG_ON(!ctx(tfm)->fallback_tfm); + crypto_digest_final(ctx(tfm)->fallback_tfm, out); + ctx(tfm)->bypass = 0; + return; + } + + /* Pass the input buffer to PadLock microcode... */ + ctx(tfm)->f_sha_padlock(ctx(tfm)->data, out, ctx(tfm)->used); + + ctx(tfm)->used = 0; +} + +static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_name) +{ + /* For now we'll allocate one page. This + * could eventually be configurable one day. */ + ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL); + if (!ctx(tfm)->data) + return -ENOMEM; + + /* Allocate a fallback and abort if it failed. */ + ctx(tfm)->fallback_tfm = crypto_alloc_tfm(fallback_driver_name, 0); + if (!ctx(tfm)->fallback_tfm) { + printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", + fallback_driver_name); + free_page((unsigned long)(ctx(tfm)->data)); + return -ENOENT; + } + + return 0; +} + +static int padlock_sha1_cra_init(struct crypto_tfm *tfm) +{ + ctx(tfm)->f_sha_padlock = padlock_do_sha1; + + return padlock_cra_init(tfm, sha1_fallback); +} + +static int padlock_sha256_cra_init(struct crypto_tfm *tfm) +{ + ctx(tfm)->f_sha_padlock = padlock_do_sha256; + + return padlock_cra_init(tfm, sha256_fallback); +} + +static void padlock_cra_exit(struct crypto_tfm *tfm) +{ + if (ctx(tfm)->data) { + free_page((unsigned long)(ctx(tfm)->data)); + ctx(tfm)->data = NULL; + } + + BUG_ON(!ctx(tfm)->fallback_tfm); + crypto_free_tfm(ctx(tfm)->fallback_tfm); + ctx(tfm)->fallback_tfm = NULL; +} + +static struct crypto_alg sha1_alg = { + .cra_name = "sha1", + .cra_driver_name = "sha1-padlock", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_blocksize = SHA1_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct padlock_sha_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha1_alg.cra_list), + .cra_init = padlock_sha1_cra_init, + .cra_exit = padlock_cra_exit, + .cra_u = { + .digest = { + .dia_digestsize = SHA1_DIGEST_SIZE, + .dia_init = padlock_sha_init, + .dia_update = padlock_sha_update, + .dia_final = padlock_sha_final, + } + } +}; + +static struct crypto_alg sha256_alg = { + .cra_name = "sha256", + .cra_driver_name = "sha256-padlock", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_blocksize = SHA256_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct padlock_sha_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha256_alg.cra_list), + .cra_init = padlock_sha256_cra_init, + .cra_exit = padlock_cra_exit, + .cra_u = { + .digest = { + .dia_digestsize = SHA256_DIGEST_SIZE, + .dia_init = padlock_sha_init, + .dia_update = padlock_sha_update, + .dia_final = padlock_sha_final, + } + } +}; + +static void __init padlock_sha_check_fallbacks(void) +{ + static struct crypto_tfm *tfm_sha1, *tfm_sha256; + + /* We'll try to allocate one TFM for each fallback + * to test that the modules are available. */ + tfm_sha1 = crypto_alloc_tfm(sha1_fallback, 0); + if (!tfm_sha1) { + printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", + sha1_alg.cra_name, sha1_fallback); + } else { + printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name, + crypto_tfm_alg_driver_name(tfm_sha1), crypto_tfm_alg_priority(tfm_sha1)); + crypto_free_tfm(tfm_sha1); + } + + tfm_sha256 = crypto_alloc_tfm(sha256_fallback, 0); + if (!tfm_sha256) { + printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", + sha256_alg.cra_name, sha256_fallback); + } else { + printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name, + crypto_tfm_alg_driver_name(tfm_sha256), crypto_tfm_alg_priority(tfm_sha256)); + crypto_free_tfm(tfm_sha256); + } +} + +static int __init padlock_init(void) +{ + int rc = -ENODEV; + + if (!cpu_has_phe) { + printk(KERN_ERR PFX "VIA PadLock Hash Engine not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_phe_enabled) { + printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n"); + return -ENODEV; + } + + padlock_sha_check_fallbacks(); + + rc = crypto_register_alg(&sha1_alg); + if (rc) + goto out; + + rc = crypto_register_alg(&sha256_alg); + if (rc) + goto out_unreg1; + + printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n"); + + return 0; + +out_unreg1: + crypto_unregister_alg(&sha1_alg); +out: + printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n"); + return rc; +} + +static void __exit padlock_fini(void) +{ + crypto_unregister_alg(&sha1_alg); + crypto_unregister_alg(&sha256_alg); +} + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); + +MODULE_ALIAS("sha1-padlock"); +MODULE_ALIAS("sha256-padlock"); From cb17530b0a4e01bd595a7ac437467a1a9833a15c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 15 Jul 2006 11:31:25 +1000 Subject: [PATCH 0295/1063] [CRYPTO] padlock-sha: Make 2 functions static This patch makes two needlessly global functions static. Signed-off-by: Adrian Bunk Signed-off-by: Herbert Xu --- drivers/crypto/padlock-sha.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index f7010038033b..95e9971d1a70 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -112,7 +112,7 @@ static inline void padlock_output_block(uint32_t *src, *dst++ = swab32(*src++); } -void padlock_do_sha1(const char *in, char *out, int count) +static void padlock_do_sha1(const char *in, char *out, int count) { /* We can't store directly to *out as it may be unaligned. */ /* BTW Don't reduce the buffer size below 128 Bytes! @@ -133,7 +133,7 @@ void padlock_do_sha1(const char *in, char *out, int count) padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); } -void padlock_do_sha256(const char *in, char *out, int count) +static void padlock_do_sha256(const char *in, char *out, int count) { /* We can't store directly to *out as it may be unaligned. */ /* BTW Don't reduce the buffer size below 128 Bytes! From 5644bda5d6aa17a70b8842eb56365d501a5da159 Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Sun, 6 Aug 2006 22:50:30 +1000 Subject: [PATCH 0296/1063] [CRYPTO] padlock: Helper module padlock.ko Compile a helper module padlock.ko that will try to autoload all configured padlock algorithms. This also provides backward compatibility with the ancient times before padlock.ko was renamed to padlock-aes.ko Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 17 ++++++++--- drivers/crypto/Makefile | 1 + drivers/crypto/padlock-aes.c | 3 -- drivers/crypto/padlock.c | 58 ++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 drivers/crypto/padlock.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 910c715325be..86c99cd333fa 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -1,10 +1,10 @@ menu "Hardware crypto devices" config CRYPTO_DEV_PADLOCK - bool "Support for VIA PadLock ACE" + tristate "Support for VIA PadLock ACE" depends on X86_32 select CRYPTO_ALGAPI - default y + default m help Some VIA processors come with an integrated crypto engine (so called VIA PadLock ACE, Advanced Cryptography Engine) @@ -12,8 +12,17 @@ config CRYPTO_DEV_PADLOCK operations with supported algorithms. The instructions are used only when the CPU supports them. - Otherwise software encryption is used. If you are unsure, - say Y. + Otherwise software encryption is used. + + Selecting M for this option will compile a helper module + padlock.ko that should autoload all below configured + algorithms. Don't worry if your hardware does not support + some or all of them. In such case padlock.ko will + simply write a single line into the kernel log informing + about its failure but everything will keep working fine. + + If you are unsure, say M. The compiled module will be + called padlock.ko config CRYPTO_DEV_PADLOCK_AES tristate "PadLock driver for AES algorithm" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index df498c7d97ab..4c3d0ec1cf80 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -1,2 +1,3 @@ +obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 149e54b0ea2e..3a2a71108d35 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -544,6 +544,3 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); MODULE_ALIAS("aes-padlock"); - -/* This module used to be called padlock. */ -MODULE_ALIAS("padlock"); diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c new file mode 100644 index 000000000000..ce581684f4b4 --- /dev/null +++ b/drivers/crypto/padlock.c @@ -0,0 +1,58 @@ +/* + * Cryptographic API. + * + * Support for VIA PadLock hardware crypto engine. + * + * Copyright (c) 2006 Michal Ludvig + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "padlock.h" + +static int __init padlock_init(void) +{ + int success = 0; + + if (crypto_alg_available("aes-padlock", 0)) + success++; + + if (crypto_alg_available("sha1-padlock", 0)) + success++; + + if (crypto_alg_available("sha256-padlock", 0)) + success++; + + if (!success) { + printk(KERN_WARNING PFX "No VIA PadLock drivers have been loaded.\n"); + return -ENODEV; + } + + printk(KERN_NOTICE PFX "%d drivers are available.\n", success); + + return 0; +} + +static void __exit padlock_fini(void) +{ +} + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("Load all configured PadLock algorithms."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); + From 58ec4152895b96f047dcf5e490ee49b4c574dec3 Mon Sep 17 00:00:00 2001 From: Michal Ludvig Date: Mon, 17 Jul 2006 08:14:58 +1000 Subject: [PATCH 0297/1063] [CRYPTO] padlock-sha: TFMs don't need to be static TFMs are local variables. No need to declare them static. After all one is enough. Signed-off-by: Michal Ludvig Signed-off-by: Herbert Xu --- drivers/crypto/padlock-sha.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 95e9971d1a70..b028db61c301 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -262,28 +262,28 @@ static struct crypto_alg sha256_alg = { static void __init padlock_sha_check_fallbacks(void) { - static struct crypto_tfm *tfm_sha1, *tfm_sha256; + struct crypto_tfm *tfm; /* We'll try to allocate one TFM for each fallback * to test that the modules are available. */ - tfm_sha1 = crypto_alloc_tfm(sha1_fallback, 0); - if (!tfm_sha1) { + tfm = crypto_alloc_tfm(sha1_fallback, 0); + if (!tfm) { printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", sha1_alg.cra_name, sha1_fallback); } else { printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name, - crypto_tfm_alg_driver_name(tfm_sha1), crypto_tfm_alg_priority(tfm_sha1)); - crypto_free_tfm(tfm_sha1); + crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); + crypto_free_tfm(tfm); } - tfm_sha256 = crypto_alloc_tfm(sha256_fallback, 0); - if (!tfm_sha256) { + tfm = crypto_alloc_tfm(sha256_fallback, 0); + if (!tfm) { printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", sha256_alg.cra_name, sha256_fallback); } else { printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name, - crypto_tfm_alg_driver_name(tfm_sha256), crypto_tfm_alg_priority(tfm_sha256)); - crypto_free_tfm(tfm_sha256); + crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); + crypto_free_tfm(tfm); } } From 25cdbcd9e5d20e431f829cafce48a418830011f4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 23:03:08 +1000 Subject: [PATCH 0298/1063] [CRYPTO] crc32c: Fix unconventional setkey usage The convention for setkey is that once it is set it should not change, in particular, init must not wipe out the key set by it. In fact, init should always be used after setkey before any digestion is performed. The only user of crc32c that sets the key is tcrypt. This patch adds the necessary init calls there. Signed-off-by: Herbert Xu --- crypto/crc32c.c | 25 +++++++++++++++---------- crypto/tcrypt.c | 4 ++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/crypto/crc32c.c b/crypto/crc32c.c index f2660123aeb4..91ecd895e957 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -16,14 +16,14 @@ #include #include #include -#include -#include +#include #define CHKSUM_BLOCK_SIZE 32 #define CHKSUM_DIGEST_SIZE 4 struct chksum_ctx { u32 crc; + u32 key; }; /* @@ -35,7 +35,7 @@ static void chksum_init(struct crypto_tfm *tfm) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - mctx->crc = ~(u32)0; /* common usage */ + mctx->crc = mctx->key; } /* @@ -53,7 +53,7 @@ static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key, *flags = CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } - mctx->crc = __cpu_to_le32(*(u32 *)key); + mctx->key = le32_to_cpu(*(__le32 *)key); return 0; } @@ -61,19 +61,23 @@ static void chksum_update(struct crypto_tfm *tfm, const u8 *data, unsigned int length) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - u32 mcrc; - mcrc = crc32c(mctx->crc, data, (size_t)length); - - mctx->crc = mcrc; + mctx->crc = crc32c(mctx->crc, data, length); } static void chksum_final(struct crypto_tfm *tfm, u8 *out) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); - u32 mcrc = (mctx->crc ^ ~(u32)0); - *(u32 *)out = __le32_to_cpu(mcrc); + *(__le32 *)out = ~cpu_to_le32(mctx->crc); +} + +static int crc32c_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; } static struct crypto_alg alg = { @@ -83,6 +87,7 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct chksum_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_init = crc32c_cra_init, .cra_u = { .digest = { .dia_digestsize= CHKSUM_DIGEST_SIZE, diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e52f56c5bd5e..bed225e83231 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -810,6 +810,7 @@ static void test_crc32c(void) seed = SEEDTESTVAL; (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); + crypto_digest_init(tfm); crypto_digest_final(tfm, (u8*)&crc); printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ? "pass" : "ERROR"); @@ -821,6 +822,7 @@ static void test_crc32c(void) for (i = 0; i < NUMVEC; i++) { seed = ~(u32)0; (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); + crypto_digest_init(tfm); crypto_digest_update(tfm, &sg[i], 1); crypto_digest_final(tfm, (u8*)&crc); if (crc == vec_results[i]) { @@ -836,6 +838,7 @@ static void test_crc32c(void) for (i = 0; i < NUMVEC; i++) { seed = (crc ^ ~(u32)0); (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); + crypto_digest_init(tfm); crypto_digest_update(tfm, &sg[i], 1); crypto_digest_final(tfm, (u8*)&crc); } @@ -849,6 +852,7 @@ static void test_crc32c(void) printk("\ntesting crc32c using digest:\n"); seed = ~(u32)0; (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); + crypto_digest_init(tfm); crypto_digest_digest(tfm, sg, NUMVEC, (u8*)&crc); if (crc == tot_vec_results) { printk(" %08x:OK", crc); From 560c06ae1ab7c677002ea3b6ac83521bf12ee07d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 14:16:39 +1000 Subject: [PATCH 0299/1063] [CRYPTO] api: Get rid of flags argument to setkey Now that the tfm is passed directly to setkey instead of the ctx, we no longer need to pass the &tfm->crt_flags pointer. This patch also gets rid of a few unnecessary checks on the key length for ciphers as the cipher layer guarantees that the key length is within the bounds specified by the algorithm. Rather than testing dia_setkey every time, this patch does it only once during crypto_alloc_tfm. The redundant check from crypto_digest_setkey is also removed. Signed-off-by: Herbert Xu --- arch/i386/crypto/aes.c | 3 ++- arch/s390/crypto/aes_s390.c | 3 ++- arch/s390/crypto/des_s390.c | 13 ++++++++----- arch/x86_64/crypto/aes.c | 5 +++-- crypto/aes.c | 5 +++-- crypto/anubis.c | 3 ++- crypto/arc4.c | 2 +- crypto/blowfish.c | 3 +-- crypto/cast5.c | 8 +------- crypto/cast6.c | 5 +++-- crypto/cipher.c | 4 ++-- crypto/crc32c.c | 5 ++--- crypto/crypto_null.c | 2 +- crypto/des.c | 6 ++++-- crypto/digest.c | 15 ++++++++++----- crypto/khazad.c | 8 +------- crypto/michael_mic.c | 5 ++--- crypto/serpent.c | 19 +++---------------- crypto/tcrypt.c | 5 +---- crypto/tea.c | 16 ++-------------- crypto/twofish_common.c | 6 +++--- drivers/crypto/padlock-aes.c | 5 +++-- include/crypto/twofish.h | 3 +-- include/linux/crypto.h | 6 ++---- 24 files changed, 63 insertions(+), 92 deletions(-) diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c index d3806daa3de3..49aad9397f10 100644 --- a/arch/i386/crypto/aes.c +++ b/arch/i386/crypto/aes.c @@ -379,12 +379,13 @@ static void gen_tabs(void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { int i; u32 ss[8]; struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; /* encryption schedule */ diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 5713c7e5bd16..c7c43c9de0d9 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -38,9 +38,10 @@ struct s390_aes_ctx { }; static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; switch (key_len) { case 16: diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index b3f7496a79b4..170757b3451d 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -45,9 +45,10 @@ struct crypt_s390_des3_192_ctx { }; static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; int ret; /* test if key is valid (not a weak key) */ @@ -167,11 +168,12 @@ static struct crypto_alg des_alg = { * */ static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { int i, ret; struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm); - const u8* temp_key = key; + const u8 *temp_key = key; + u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) { *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; @@ -303,11 +305,12 @@ static struct crypto_alg des3_128_alg = { * */ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { int i, ret; struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); - const u8* temp_key = key; + const u8 *temp_key = key; + u32 *flags = &tfm->crt_flags; if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c index 68866fab37aa..5cdb13ea5cc2 100644 --- a/arch/x86_64/crypto/aes.c +++ b/arch/x86_64/crypto/aes.c @@ -228,13 +228,14 @@ static void __init gen_tabs(void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; u32 i, j, t, u, v, w; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/aes.c b/crypto/aes.c index a038711831e7..e2440773878c 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -249,13 +249,14 @@ gen_tabs (void) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; u32 i, t, u, v, w; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/anubis.c b/crypto/anubis.c index 7e2e1a29800e..1c771f7f4dc5 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -461,10 +461,11 @@ static const u32 rc[] = { }; static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct anubis_ctx *ctx = crypto_tfm_ctx(tfm); const __be32 *key = (const __be32 *)in_key; + u32 *flags = &tfm->crt_flags; int N, R, i, r; u32 kappa[ANUBIS_MAX_N]; u32 inter[ANUBIS_MAX_N]; diff --git a/crypto/arc4.c b/crypto/arc4.c index 5edc6a65b987..8be47e13a9e3 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -25,7 +25,7 @@ struct arc4_ctx { }; static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct arc4_ctx *ctx = crypto_tfm_ctx(tfm); int i, j = 0, k = 0; diff --git a/crypto/blowfish.c b/crypto/blowfish.c index 490265f42b3b..55238c4e37f0 100644 --- a/crypto/blowfish.c +++ b/crypto/blowfish.c @@ -399,8 +399,7 @@ static void bf_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) /* * Calculates the blowfish S and P boxes for encryption and decryption. */ -static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) +static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct bf_ctx *ctx = crypto_tfm_ctx(tfm); u32 *P = ctx->p; diff --git a/crypto/cast5.c b/crypto/cast5.c index 08eef58c1d3d..13ea60abc19a 100644 --- a/crypto/cast5.c +++ b/crypto/cast5.c @@ -769,8 +769,7 @@ static void key_schedule(u32 * x, u32 * z, u32 * k) } -static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned key_len, u32 *flags) +static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len) { struct cast5_ctx *c = crypto_tfm_ctx(tfm); int i; @@ -778,11 +777,6 @@ static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, u32 z[4]; u32 k[16]; __be32 p_key[4]; - - if (key_len < 5 || key_len > 16) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } c->rr = key_len <= 10 ? 1 : 0; diff --git a/crypto/cast6.c b/crypto/cast6.c index 08e33bfc3ad1..136ab6dfe8c5 100644 --- a/crypto/cast6.c +++ b/crypto/cast6.c @@ -382,14 +382,15 @@ static inline void W(u32 *key, unsigned int i) { } static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned key_len, u32 *flags) + unsigned key_len) { int i; u32 key[8]; __be32 p_key[8]; /* padded key */ struct cast6_ctx *c = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; - if (key_len < 16 || key_len > 32 || key_len % 4 != 0) { + if (key_len % 4 != 0) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/cipher.c b/crypto/cipher.c index b899eb97abd7..56406a4a88d4 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -264,12 +264,12 @@ static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher; + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } else - return cia->cia_setkey(tfm, key, keylen, - &tfm->crt_flags); + return cia->cia_setkey(tfm, key, keylen); } static int ecb_encrypt(struct crypto_tfm *tfm, diff --git a/crypto/crc32c.c b/crypto/crc32c.c index 91ecd895e957..0fa744392a4c 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c.c @@ -44,13 +44,12 @@ static void chksum_init(struct crypto_tfm *tfm) * the seed. */ static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); if (keylen != sizeof(mctx->crc)) { - if (flags) - *flags = CRYPTO_TFM_RES_BAD_KEY_LEN; + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } mctx->key = le32_to_cpu(*(__le32 *)key); diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index a0d956b52949..24dbb5d8617e 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -48,7 +48,7 @@ static void null_final(struct crypto_tfm *tfm, u8 *out) { } static int null_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { return 0; } static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) diff --git a/crypto/des.c b/crypto/des.c index a9d3c235a6af..1df3a714fa47 100644 --- a/crypto/des.c +++ b/crypto/des.c @@ -784,9 +784,10 @@ static void dkey(u32 *pe, const u8 *k) } static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct des_ctx *dctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; int ret; @@ -864,11 +865,12 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) * */ static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { const u32 *K = (const u32 *)key; struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm); u32 *expkey = dctx->expkey; + u32 *flags = &tfm->crt_flags; if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || !((K[2] ^ K[4]) | (K[3] ^ K[5])))) diff --git a/crypto/digest.c b/crypto/digest.c index 603006a7bef2..0df7f392a56a 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -76,12 +76,16 @@ static void final(struct crypto_tfm *tfm, u8 *out) tfm->__crt_alg->cra_digest.dia_final(tfm, out); } +static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +{ + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + return -ENOSYS; +} + static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { - u32 flags; - if (tfm->__crt_alg->cra_digest.dia_setkey == NULL) - return -ENOSYS; - return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen, &flags); + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); } static void digest(struct crypto_tfm *tfm, @@ -100,12 +104,13 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) int crypto_init_digest_ops(struct crypto_tfm *tfm) { struct digest_tfm *ops = &tfm->crt_digest; + struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; ops->dit_init = init; ops->dit_update = update; ops->dit_final = final; ops->dit_digest = digest; - ops->dit_setkey = setkey; + ops->dit_setkey = dalg->dia_setkey ? setkey : nosetkey; return crypto_alloc_hmac_block(tfm); } diff --git a/crypto/khazad.c b/crypto/khazad.c index d4c9d3657b36..9fa24a2dd6ff 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -755,19 +755,13 @@ static const u64 c[KHAZAD_ROUNDS + 1] = { }; static int khazad_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct khazad_ctx *ctx = crypto_tfm_ctx(tfm); const __be32 *key = (const __be32 *)in_key; int r; const u64 *S = T7; u64 K2, K1; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } /* key is supposed to be 32-bit aligned */ K2 = ((u64)be32_to_cpu(key[0]) << 32) | be32_to_cpu(key[1]); diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index d061da21cfda..094397b48849 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -123,14 +123,13 @@ static void michael_final(struct crypto_tfm *tfm, u8 *out) static int michael_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm); const __le32 *data = (const __le32 *)key; if (keylen != 8) { - if (flags) - *flags = CRYPTO_TFM_RES_BAD_KEY_LEN; + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/crypto/serpent.c b/crypto/serpent.c index de60cdddbf4a..465d091cd3ec 100644 --- a/crypto/serpent.c +++ b/crypto/serpent.c @@ -216,7 +216,7 @@ struct serpent_ctx { static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { struct serpent_ctx *ctx = crypto_tfm_ctx(tfm); u32 *k = ctx->expkey; @@ -224,13 +224,6 @@ static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, u32 r0,r1,r2,r3,r4; int i; - if ((keylen < SERPENT_MIN_KEY_SIZE) - || (keylen > SERPENT_MAX_KEY_SIZE)) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* Copy key, add padding */ for (i = 0; i < keylen; ++i) @@ -497,21 +490,15 @@ static struct crypto_alg serpent_alg = { }; static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen) { u8 rev_key[SERPENT_MAX_KEY_SIZE]; int i; - if ((keylen < SERPENT_MIN_KEY_SIZE) - || (keylen > SERPENT_MAX_KEY_SIZE)) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - for (i = 0; i < keylen; ++i) rev_key[keylen - i - 1] = key[i]; - return serpent_setkey(tfm, rev_key, keylen, flags); + return serpent_setkey(tfm, rev_key, keylen); } static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index bed225e83231..606777074671 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -118,10 +118,7 @@ static void test_hash(char *algo, struct hash_testvec *template, sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize); crypto_digest_init(tfm); - if (tfm->crt_u.digest.dit_setkey) { - crypto_digest_setkey(tfm, hash_tv[i].key, - hash_tv[i].ksize); - } + crypto_digest_setkey(tfm, hash_tv[i].key, hash_tv[i].ksize); crypto_digest_update(tfm, sg, 1); crypto_digest_final(tfm, result); diff --git a/crypto/tea.c b/crypto/tea.c index 5367adc82fc9..1c54e26fa529 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -46,16 +46,10 @@ struct xtea_ctx { }; static int tea_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct tea_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } ctx->KEY[0] = le32_to_cpu(key[0]); ctx->KEY[1] = le32_to_cpu(key[1]); @@ -125,16 +119,10 @@ static void tea_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } static int xtea_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct xtea_ctx *ctx = crypto_tfm_ctx(tfm); const __le32 *key = (const __le32 *)in_key; - - if (key_len != 16) - { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } ctx->KEY[0] = le32_to_cpu(key[0]); ctx->KEY[1] = le32_to_cpu(key[1]); diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c index 1ae0280c2513..b4b9c0c3f4ae 100644 --- a/crypto/twofish_common.c +++ b/crypto/twofish_common.c @@ -580,11 +580,11 @@ static const u8 calc_sb_tbl[512] = { ctx->a[(j) + 1] = rol32(y, 9) /* Perform the key setup. */ -int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len, u32 *flags) +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct twofish_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; int i, j, k; @@ -600,7 +600,7 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, u8 tmp; /* Check key length. */ - if (key_len != 16 && key_len != 24 && key_len != 32) + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; /* unsupported key length */ diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 3a2a71108d35..3e683709243e 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -308,15 +308,16 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) } static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len, u32 *flags) + unsigned int key_len) { struct aes_ctx *ctx = aes_ctx(tfm); const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; uint32_t i, t, u, v, w; uint32_t P[AES_EXTENDED_KEY_SIZE]; uint32_t rounds; - if (key_len != 16 && key_len != 24 && key_len != 32) { + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h index e4328cfaaf64..c408522595c6 100644 --- a/include/crypto/twofish.h +++ b/include/crypto/twofish.h @@ -17,7 +17,6 @@ struct twofish_ctx { u32 s[4][256], w[8], k[32]; }; -int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int key_len, u32 *flags); +int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len); #endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index d6e184c876b5..053bfab43e8d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -106,7 +106,7 @@ struct cipher_alg { unsigned int cia_min_keysize; unsigned int cia_max_keysize; int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags); + unsigned int keylen); void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); @@ -131,7 +131,7 @@ struct digest_alg { unsigned int len); void (*dia_final)(struct crypto_tfm *tfm, u8 *out); int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen, u32 *flags); + unsigned int keylen); }; struct compress_alg { @@ -397,8 +397,6 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - if (tfm->crt_digest.dit_setkey == NULL) - return -ENOSYS; return tfm->crt_digest.dit_setkey(tfm, key, keylen); } From ee7564166da9e218c3f605ee78ff16599d4d5a05 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Jul 2006 14:49:42 +1000 Subject: [PATCH 0300/1063] [CRYPTO] digest: Store temporary digest in tfm When the final result location is unaligned, we store the digest in a temporary buffer before copying it to the final location. Currently that buffer sits on the stack. This patch moves it to an area in the tfm, just like the CBC IV buffer. Signed-off-by: Herbert Xu --- crypto/digest.c | 16 ++++++++++------ crypto/internal.h | 9 ++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/crypto/digest.c b/crypto/digest.c index 0df7f392a56a..19e75563776b 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -66,14 +66,18 @@ static void update(struct crypto_tfm *tfm, static void final(struct crypto_tfm *tfm, u8 *out) { unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + struct digest_alg *digest = &tfm->__crt_alg->cra_digest; + if (unlikely((unsigned long)out & alignmask)) { - unsigned int size = crypto_tfm_alg_digestsize(tfm); - u8 buffer[size + alignmask]; - u8 *dst = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); - tfm->__crt_alg->cra_digest.dia_final(tfm, dst); - memcpy(out, dst, size); + unsigned long align = alignmask + 1; + unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); + u8 *dst = (u8 *)ALIGN(addr, align) + + ALIGN(tfm->__crt_alg->cra_ctxsize, align); + + digest->dia_final(tfm, dst); + memcpy(out, dst, digest->dia_digestsize); } else - tfm->__crt_alg->cra_digest.dia_final(tfm, out); + digest->dia_final(tfm, out); } static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) diff --git a/crypto/internal.h b/crypto/internal.h index 03c00b0e6b60..b110b979b988 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -99,7 +99,14 @@ static inline void crypto_exit_proc(void) static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, int flags) { - return alg->cra_ctxsize; + unsigned int len = alg->cra_ctxsize; + + if (alg->cra_alignmask) { + len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1); + len += alg->cra_digest.dia_digestsize; + } + + return len; } static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg, From c907ee76d8456fe1d98f40b5febfc7802a73b784 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 22:04:03 +1000 Subject: [PATCH 0301/1063] [CRYPTO] tcrypt: Use test_hash for crc32c Now that crc32c has been fixed to conform with standard digest semantics, we can use test_hash for it. I've turned the last test into a chunky test. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 106 +--------------------------- crypto/tcrypt.h | 179 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 180 insertions(+), 105 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 606777074671..56d0d8b3bcf2 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -762,108 +762,6 @@ out: crypto_free_tfm(tfm); } -static void test_crc32c(void) -{ -#define NUMVEC 6 -#define VECSIZE 40 - - int i, j, pass; - u32 crc; - u8 b, test_vec[NUMVEC][VECSIZE]; - static u32 vec_results[NUMVEC] = { - 0x0e2c157f, 0xe980ebf6, 0xde74bded, - 0xd579c862, 0xba979ad0, 0x2b29d913 - }; - static u32 tot_vec_results = 0x24c5d375; - - struct scatterlist sg[NUMVEC]; - struct crypto_tfm *tfm; - char *fmtdata = "testing crc32c initialized to %08x: %s\n"; -#define SEEDTESTVAL 0xedcba987 - u32 seed; - - printk("\ntesting crc32c\n"); - - tfm = crypto_alloc_tfm("crc32c", 0); - if (tfm == NULL) { - printk("failed to load transform for crc32c\n"); - return; - } - - crypto_digest_init(tfm); - crypto_digest_final(tfm, (u8*)&crc); - printk(fmtdata, crc, (crc == 0) ? "pass" : "ERROR"); - - /* - * stuff test_vec with known values, simple incrementing - * byte values. - */ - b = 0; - for (i = 0; i < NUMVEC; i++) { - for (j = 0; j < VECSIZE; j++) - test_vec[i][j] = ++b; - sg_set_buf(&sg[i], test_vec[i], VECSIZE); - } - - seed = SEEDTESTVAL; - (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); - crypto_digest_init(tfm); - crypto_digest_final(tfm, (u8*)&crc); - printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ? - "pass" : "ERROR"); - - printk("testing crc32c using update/final:\n"); - - pass = 1; /* assume all is well */ - - for (i = 0; i < NUMVEC; i++) { - seed = ~(u32)0; - (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); - crypto_digest_init(tfm); - crypto_digest_update(tfm, &sg[i], 1); - crypto_digest_final(tfm, (u8*)&crc); - if (crc == vec_results[i]) { - printk(" %08x:OK", crc); - } else { - printk(" %08x:BAD, wanted %08x\n", crc, vec_results[i]); - pass = 0; - } - } - - printk("\ntesting crc32c using incremental accumulator:\n"); - crc = 0; - for (i = 0; i < NUMVEC; i++) { - seed = (crc ^ ~(u32)0); - (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); - crypto_digest_init(tfm); - crypto_digest_update(tfm, &sg[i], 1); - crypto_digest_final(tfm, (u8*)&crc); - } - if (crc == tot_vec_results) { - printk(" %08x:OK", crc); - } else { - printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results); - pass = 0; - } - - printk("\ntesting crc32c using digest:\n"); - seed = ~(u32)0; - (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); - crypto_digest_init(tfm); - crypto_digest_digest(tfm, sg, NUMVEC, (u8*)&crc); - if (crc == tot_vec_results) { - printk(" %08x:OK", crc); - } else { - printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results); - pass = 0; - } - - printk("\n%s\n", pass ? "pass" : "ERROR"); - - crypto_free_tfm(tfm); - printk("crc32c test complete\n"); -} - static void test_available(void) { char **name = check; @@ -969,7 +867,7 @@ static void do_test(void) test_hash("tgr160", tgr160_tv_template, TGR160_TEST_VECTORS); test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS); test_deflate(); - test_crc32c(); + test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS); #ifdef CONFIG_CRYPTO_HMAC test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); @@ -1065,7 +963,7 @@ static void do_test(void) break; case 18: - test_crc32c(); + test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS); break; case 19: diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index 1fac5602f633..408d5aad5864 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -28,7 +28,7 @@ struct hash_testvec { /* only used with keyed hash algorithms */ char key[128] __attribute__ ((__aligned__(4))); - char plaintext[128]; + char plaintext[240]; char digest[MAX_DIGEST_SIZE]; unsigned char tap[MAX_TAP]; unsigned char psize; @@ -2896,6 +2896,183 @@ static struct hash_testvec michael_mic_tv_template[] = { } }; +/* + * CRC32C test vectors + */ +#define CRC32C_TEST_VECTORS 14 + +static struct hash_testvec crc32c_tv_template[] = { + { + .psize = 0, + .digest = { 0x00, 0x00, 0x00, 0x00 } + }, + { + .key = { 0x87, 0xa9, 0xcb, 0xed }, + .ksize = 4, + .psize = 0, + .digest = { 0x78, 0x56, 0x34, 0x12 }, + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 }, + .psize = 40, + .digest = { 0x7f, 0x15, 0x2c, 0x0e } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 }, + .psize = 40, + .digest = { 0xf6, 0xeb, 0x80, 0xe9 } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 }, + .psize = 40, + .digest = { 0xed, 0xbd, 0x74, 0xde } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, + 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, + 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, + 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 }, + .psize = 40, + .digest = { 0x62, 0xc8, 0x79, 0xd5 } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, + 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, + 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 }, + .psize = 40, + .digest = { 0xd0, 0x9a, 0x97, 0xba } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, + 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, + 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 }, + .psize = 40, + .digest = { 0x13, 0xd9, 0x29, 0x2b } + }, + { + .key = { 0x80, 0xea, 0xd3, 0xf1 }, + .ksize = 4, + .plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 }, + .psize = 40, + .digest = { 0x0c, 0xb5, 0xe2, 0xa2 } + }, + { + .key = { 0xf3, 0x4a, 0x1d, 0x5d }, + .ksize = 4, + .plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 }, + .psize = 40, + .digest = { 0xd1, 0x7f, 0xfb, 0xa6 } + }, + { + .key = { 0x2e, 0x80, 0x04, 0x59 }, + .ksize = 4, + .plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, + 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, + 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, + 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 }, + .psize = 40, + .digest = { 0x59, 0x33, 0xe6, 0x7a } + }, + { + .key = { 0xa6, 0xcc, 0x19, 0x85 }, + .ksize = 4, + .plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, + 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, + 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 }, + .psize = 40, + .digest = { 0xbe, 0x03, 0x01, 0xd2 } + }, + { + .key = { 0x41, 0xfc, 0xfe, 0x2d }, + .ksize = 4, + .plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, + 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, + 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 }, + .psize = 40, + .digest = { 0x75, 0xd3, 0xc5, 0x24 } + }, + { + .key = { 0xff, 0xff, 0xff, 0xff }, + .ksize = 4, + .plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, + 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, + 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, + 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, + 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, + 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, + 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, + 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, + 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, + 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, + 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 }, + .psize = 240, + .digest = { 0x75, 0xd3, 0xc5, 0x24 }, + .np = 2, + .tap = { 31, 209 } + }, +}; + /* * Cipher speed tests */ From df89820ebd5bbf4f3c6b5f8ee7d9e983107f6a91 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 14 Jul 2006 10:42:27 +1000 Subject: [PATCH 0302/1063] [CRYPTO] cipher: Removed special IV checks for ECB This patch makes IV operations on ECB fail through nocrypt_iv rather than calling BUG(). This is needed to generalise CBC/ECB using the template mechanism. Signed-off-by: Herbert Xu --- crypto/cipher.c | 2 ++ include/linux/crypto.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index 56406a4a88d4..aebc4a2adc80 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) case CRYPTO_TFM_MODE_ECB: ops->cit_encrypt = ecb_encrypt; ops->cit_decrypt = ecb_decrypt; + ops->cit_encrypt_iv = nocrypt_iv; + ops->cit_decrypt_iv = nocrypt_iv; break; case CRYPTO_TFM_MODE_CBC: diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 053bfab43e8d..dbdfc7c79367 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -422,7 +422,6 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB); return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv); } @@ -441,7 +440,6 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB); return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv); } From 7fed0bf271b374be4c98a5880faed4b1128e78e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 23:10:45 +1000 Subject: [PATCH 0303/1063] [CRYPTO] api: Add common instance initialisation code This patch adds the helpers crypto_get_attr_alg and crypto_alloc_instance which can be used by simple one-argument templates like hmac to process input parameters and allocate instances. Signed-off-by: Herbert Xu --- crypto/algapi.c | 53 +++++++++++++++++++++++++++++++++++++++++ include/crypto/algapi.h | 5 ++++ 2 files changed, 58 insertions(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 36c4f1bdb521..c91530021e9c 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "internal.h" @@ -415,6 +416,58 @@ int crypto_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(crypto_unregister_notifier); +struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len, + u32 type, u32 mask) +{ + struct rtattr *rta = param; + struct crypto_attr_alg *alga; + + if (!RTA_OK(rta, len)) + return ERR_PTR(-EBADR); + if (rta->rta_type != CRYPTOA_ALG || RTA_PAYLOAD(rta) < sizeof(*alga)) + return ERR_PTR(-EINVAL); + + alga = RTA_DATA(rta); + alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0; + + return crypto_alg_mod_lookup(alga->name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_get_attr_alg); + +struct crypto_instance *crypto_alloc_instance(const char *name, + struct crypto_alg *alg) +{ + struct crypto_instance *inst; + struct crypto_spawn *spawn; + int err; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return ERR_PTR(-ENOMEM); + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name, + alg->cra_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + spawn = crypto_instance_ctx(inst); + err = crypto_init_spawn(spawn, alg, inst); + + if (err) + goto err_free_inst; + + return inst; + +err_free_inst: + kfree(inst); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_instance); + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index b20f4bdb23ba..1a598f829417 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -51,6 +51,11 @@ int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, void crypto_drop_spawn(struct crypto_spawn *spawn); struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn); +struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len, + u32 type, u32 mask); +struct crypto_instance *crypto_alloc_instance(const char *name, + struct crypto_alg *alg); + static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; From f3f632d61ae9af85d436706ee8e33af1a7fb9c28 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 23:12:59 +1000 Subject: [PATCH 0304/1063] [CRYPTO] api: Added asynchronous flag This patch adds the asynchronous flag and changes all existing users to only look up algorithms that are synchronous. Signed-off-by: Herbert Xu --- crypto/api.c | 5 +++-- include/linux/crypto.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 7e5522cf856e..1e4692a13474 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -355,7 +355,7 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) do { struct crypto_alg *alg; - alg = crypto_alg_mod_lookup(name, 0, 0); + alg = crypto_alg_mod_lookup(name, 0, CRYPTO_ALG_ASYNC); err = PTR_ERR(alg); if (IS_ERR(alg)) continue; @@ -394,7 +394,8 @@ void crypto_free_tfm(struct crypto_tfm *tfm) int crypto_alg_available(const char *name, u32 flags) { int ret = 0; - struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, 0); + struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, + CRYPTO_ALG_ASYNC); if (!IS_ERR(alg)) { crypto_mod_put(alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index dbdfc7c79367..530dc4bf363c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -37,6 +37,7 @@ #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 #define CRYPTO_ALG_DYING 0x00000040 +#define CRYPTO_ALG_ASYNC 0x00000080 /* * Transform masks and values (for crt_flags). From 65b75c36f4e8422602826c75c803136e0da94122 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:18:50 +1000 Subject: [PATCH 0305/1063] [CRYPTO] s390: Added missing driver name and priority Accelerated versions of crypto algorithms must carry a distinct driver name and priority in order to distinguish themselves from their generic counter- part. Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 2 ++ arch/s390/crypto/crypt_s390.h | 2 ++ arch/s390/crypto/des_s390.c | 6 ++++++ arch/s390/crypto/sha1_s390.c | 2 ++ arch/s390/crypto/sha256_s390.c | 2 ++ 5 files changed, 14 insertions(+) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index c7c43c9de0d9..220300e760d8 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -222,6 +222,8 @@ static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, static struct crypto_alg aes_alg = { .cra_name = "aes", + .cra_driver_name = "aes-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_aes_ctx), diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index d1c259a7fe33..d1d330797f75 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -20,6 +20,8 @@ #define CRYPT_S390_OP_MASK 0xFF00 #define CRYPT_S390_FUNC_MASK 0x00FF +#define CRYPT_S390_PRIORITY 300 + /* s930 cryptographic operations */ enum crypt_s390_operations { CRYPT_S390_KM = 0x0100, diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 170757b3451d..3fd5d37d5e05 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -135,6 +135,8 @@ static unsigned int des_decrypt_cbc(const struct cipher_desc *desc, u8 *out, static struct crypto_alg des_alg = { .cra_name = "des", + .cra_driver_name = "des-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), @@ -271,6 +273,8 @@ static unsigned int des3_128_decrypt_cbc(const struct cipher_desc *desc, static struct crypto_alg des3_128_alg = { .cra_name = "des3_ede128", + .cra_driver_name = "des3_ede128-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES3_128_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx), @@ -411,6 +415,8 @@ static unsigned int des3_192_decrypt_cbc(const struct cipher_desc *desc, static struct crypto_alg des3_192_alg = { .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES3_192_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 9d34a35b1aa5..49ca8690ee39 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -126,6 +126,8 @@ static void sha1_final(struct crypto_tfm *tfm, u8 *out) static struct crypto_alg alg = { .cra_name = "sha1", + .cra_driver_name = "sha1-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_DIGEST, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypt_s390_sha1_ctx), diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index f573df30f31d..8e4e67503fe7 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -127,6 +127,8 @@ static void sha256_final(struct crypto_tfm *tfm, u8 *out) static struct crypto_alg alg = { .cra_name = "sha256", + .cra_driver_name = "sha256-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_DIGEST, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha256_ctx), From 6d7d684d635ac5a345f075015f2c84169c111c6a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 11:53:01 +1000 Subject: [PATCH 0306/1063] [CRYPTO] api: Added crypto_alloc_base Up until now all crypto transforms have been of the same type, struct crypto_tfm, regardless of whether they are ciphers, digests, or other types. As a result of that, we check the types at run-time before each crypto operation. This is rather cumbersome. We could instead use different C types for each crypto type to ensure that the correct types are used at compile time. That is, we would have crypto_cipher/crypto_digest instead of just crypto_tfm. The appropriate type would then be required for the actual operations such as crypto_digest_digest. Now that we have the type/mask fields when looking up algorithms, it is easy to request for an algorithm of the precise type that the user wants. However, crypto_alloc_tfm currently does not expose these new attributes. This patch introduces the function crypto_alloc_base which will carry these new parameters. It will be renamed to crypto_alloc_tfm once all existing users have been converted. Signed-off-by: Herbert Xu --- crypto/api.c | 60 ++++++++++++++++++++++++++++++++++++++++++ include/linux/crypto.h | 14 +++------- 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 1e4692a13474..bc4b7901acdf 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -372,6 +372,66 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) return tfm; } +/* + * crypto_alloc_base - Locate algorithm and allocate transform + * @alg_name: Name of algorithm + * @type: Type of algorithm + * @mask: Mask for type comparison + * + * crypto_alloc_base() will first attempt to locate an already loaded + * algorithm. If that fails and the kernel supports dynamically loadable + * modules, it will then attempt to load a module of the same name or + * alias. If that fails it will send a query to any loaded crypto manager + * to construct an algorithm on the fly. A refcount is grabbed on the + * algorithm which is then associated with the new transform. + * + * The returned transform is of a non-determinate type. Most people + * should use one of the more specific allocation functions such as + * crypto_alloc_blkcipher. + * + * In case of error the return value is an error pointer. + */ +struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + err = PTR_ERR(alg); + tfm = ERR_PTR(err); + if (IS_ERR(alg)) + goto err; + + tfm = __crypto_alloc_tfm(alg, 0); + if (!IS_ERR(tfm)) + break; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (signal_pending(current)) { + err = -EINTR; + break; + } + }; + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_alloc_base); + +/* + * crypto_free_tfm - Free crypto transform + * @tfm: Transform to free + * + * crypto_free_tfm() frees up the transform and any associated resources, + * then drops the refcount on the associated algorithm. + */ void crypto_free_tfm(struct crypto_tfm *tfm) { struct crypto_alg *alg; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 530dc4bf363c..6847ab0ea30e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -194,8 +194,8 @@ static inline int crypto_alg_available(const char *name, u32 flags) /* * Transforms: user-instantiated objects which encapsulate algorithms - * and core processing logic. Managed via crypto_alloc_tfm() and - * crypto_free_tfm(), as well as the various helpers below. + * and core processing logic. Managed via crypto_alloc_*() and + * crypto_free_*(), as well as the various helpers below. */ struct cipher_tfm { @@ -278,16 +278,8 @@ struct crypto_attr_alg { * Transform user interface. */ -/* - * crypto_alloc_tfm() will first attempt to locate an already loaded algorithm. - * If that fails and the kernel supports dynamically loadable modules, it - * will then attempt to load a module of the same name or alias. A refcount - * is grabbed on the algorithm which is then associated with the new transform. - * - * crypto_free_tfm() frees up the transform and any associated resources, - * then drops the refcount on the associated algorithm. - */ struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags); +struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); void crypto_free_tfm(struct crypto_tfm *tfm); /* From 8f21cf0d2bae04ece761595036c9da8328b279aa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 11:53:45 +1000 Subject: [PATCH 0307/1063] [CRYPTO] api: Feed flag directly to crypto_yield The sleeping flag used to determine whether crypto_yield can actually yield is really a per-operation flag rather than a per-tfm flag. This patch changes crypto_yield to take a flag directly so that we can start using a per-operation flag instead the tfm flag. Signed-off-by: Herbert Xu --- crypto/cipher.c | 2 +- crypto/digest.c | 2 +- crypto/internal.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index aebc4a2adc80..f573c59ed9dc 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -145,7 +145,7 @@ static int crypt(const struct cipher_desc *desc, if (!nbytes) break; - crypto_yield(tfm); + crypto_yield(tfm->crt_flags); } if (buffer) diff --git a/crypto/digest.c b/crypto/digest.c index 19e75563776b..96244a528844 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -55,7 +55,7 @@ static void update(struct crypto_tfm *tfm, tfm->__crt_alg->cra_digest.dia_update(tfm, p, bytes_from_page); crypto_kunmap(src, 0); - crypto_yield(tfm); + crypto_yield(tfm->crt_flags); offset = 0; pg++; l -= bytes_from_page; diff --git a/crypto/internal.h b/crypto/internal.h index b110b979b988..7dc04efb55c6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -67,9 +67,9 @@ static inline void crypto_kunmap(void *vaddr, int out) kunmap_atomic(vaddr, crypto_kmap_type(out)); } -static inline void crypto_yield(struct crypto_tfm *tfm) +static inline void crypto_yield(u32 flags) { - if (tfm->crt_flags & CRYPTO_TFM_REQ_MAY_SLEEP) + if (flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); } From e853c3cfa8cc24869ecd2526e589bcb176bc12e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 00:06:54 +1000 Subject: [PATCH 0308/1063] [CRYPTO] api: Added crypto_type support This patch adds the crypto_type structure which will be used for all new crypto algorithm types, beginning with block ciphers. The primary purpose of this abstraction is to allow different crypto_type objects for crypto algorithms of the same type, in particular, there will be a different crypto_type objects for asynchronous algorithms. Signed-off-by: Herbert Xu --- crypto/api.c | 32 +++++++++++++++++++++++--------- crypto/proc.c | 5 ++++- include/crypto/algapi.h | 8 ++++++++ include/linux/crypto.h | 3 +++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index bc4b7901acdf..edaa843d8e83 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -226,17 +226,18 @@ static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) case CRYPTO_ALG_TYPE_COMPRESS: return crypto_init_compress_flags(tfm, flags); - - default: - break; } - BUG(); - return -EINVAL; + return 0; } static int crypto_init_ops(struct crypto_tfm *tfm) { + const struct crypto_type *type = tfm->__crt_alg->cra_type; + + if (type) + return type->init(tfm); + switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: return crypto_init_cipher_ops(tfm); @@ -257,6 +258,14 @@ static int crypto_init_ops(struct crypto_tfm *tfm) static void crypto_exit_ops(struct crypto_tfm *tfm) { + const struct crypto_type *type = tfm->__crt_alg->cra_type; + + if (type) { + if (type->exit) + type->exit(tfm); + return; + } + switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: crypto_exit_cipher_ops(tfm); @@ -278,26 +287,31 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) { + const struct crypto_type *type = alg->cra_type; unsigned int len; + len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1); + if (type) + return len + type->ctxsize(alg); + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { default: BUG(); case CRYPTO_ALG_TYPE_CIPHER: - len = crypto_cipher_ctxsize(alg, flags); + len += crypto_cipher_ctxsize(alg, flags); break; case CRYPTO_ALG_TYPE_DIGEST: - len = crypto_digest_ctxsize(alg, flags); + len += crypto_digest_ctxsize(alg, flags); break; case CRYPTO_ALG_TYPE_COMPRESS: - len = crypto_compress_ctxsize(alg, flags); + len += crypto_compress_ctxsize(alg, flags); break; } - return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1)); + return len; } void crypto_shoot_alg(struct crypto_alg *alg) diff --git a/crypto/proc.c b/crypto/proc.c index 9e573b17e887..dabce0676f63 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -78,7 +78,10 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "type : compression\n"); break; default: - seq_printf(m, "type : unknown\n"); + if (alg->cra_type && alg->cra_type->show) + alg->cra_type->show(m, alg); + else + seq_printf(m, "type : unknown\n"); break; } diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 1a598f829417..c533c0a291af 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -15,6 +15,14 @@ #include struct module; +struct seq_file; + +struct crypto_type { + unsigned int (*ctxsize)(struct crypto_alg *alg); + int (*init)(struct crypto_tfm *tfm); + void (*exit)(struct crypto_tfm *tfm); + void (*show)(struct seq_file *m, struct crypto_alg *alg); +}; struct crypto_instance { struct crypto_alg alg; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6847ab0ea30e..8e9c407b00d2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -90,6 +90,7 @@ struct scatterlist; struct crypto_tfm; +struct crypto_type; struct cipher_desc { struct crypto_tfm *tfm; @@ -161,6 +162,8 @@ struct crypto_alg { char cra_name[CRYPTO_MAX_ALG_NAME]; char cra_driver_name[CRYPTO_MAX_ALG_NAME]; + const struct crypto_type *cra_type; + union { struct cipher_alg cipher; struct digest_alg digest; From f28776a369b12f9a03a822a8e1090ed670a41f4f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 20:58:18 +1000 Subject: [PATCH 0309/1063] [CRYPTO] cipher: Added encrypt_one/decrypt_one This patch adds two new operations for the simple cipher that encrypts or decrypts a single block at a time. This will be the main interface after the existing block operations have moved over to the new block ciphers. It also adds the crypto_cipher type which is currently only used on the new operations but will be extended to setkey as well once existing users have been converted to use block ciphers where applicable. Signed-off-by: Herbert Xu --- crypto/cipher.c | 48 +++++++++++++++++++++ include/crypto/algapi.h | 5 +++ include/linux/crypto.h | 96 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) diff --git a/crypto/cipher.c b/crypto/cipher.c index f573c59ed9dc..d8ca0ec8d0be 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -388,12 +388,60 @@ int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags) return 0; } +static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *, + const u8 *), + struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + unsigned int size = crypto_tfm_alg_blocksize(tfm); + u8 buffer[size + alignmask]; + u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + + memcpy(tmp, src, size); + fn(tfm, tmp, tmp); + memcpy(dst, tmp, size); +} + +static void cipher_encrypt_unaligned(struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + + if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) { + cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src); + return; + } + + cipher->cia_encrypt(tfm, dst, src); +} + +static void cipher_decrypt_unaligned(struct crypto_tfm *tfm, + u8 *dst, const u8 *src) +{ + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + + if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) { + cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src); + return; + } + + cipher->cia_decrypt(tfm, dst, src); +} + int crypto_init_cipher_ops(struct crypto_tfm *tfm) { int ret = 0; struct cipher_tfm *ops = &tfm->crt_cipher; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; ops->cit_setkey = setkey; + ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ? + cipher_encrypt_unaligned : cipher->cia_encrypt; + ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ? + cipher_decrypt_unaligned : cipher->cia_decrypt; switch (tfm->crt_cipher.cit_mode) { case CRYPTO_TFM_MODE_ECB: diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index c533c0a291af..6f9fb27b2071 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -69,5 +69,10 @@ static inline void *crypto_instance_ctx(struct crypto_instance *inst) return inst->__ctx; } +static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8e9c407b00d2..fdecee83878c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -224,6 +224,8 @@ struct cipher_tfm { struct scatterlist *src, unsigned int nbytes, u8 *iv); void (*cit_xor_block)(u8 *dst, const u8 *src); + void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; struct digest_tfm { @@ -268,6 +270,8 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; +#define crypto_cipher crypto_tfm + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -347,6 +351,21 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) +{ + return tfm->crt_flags; +} + +static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags |= flags; +} + +static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags &= ~flags; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return tfm->__crt_ctx; @@ -361,6 +380,83 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) /* * API wrappers. */ +static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_cipher *)tfm; +} + +static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); + return __crypto_cipher_cast(tfm); +} + +static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) +{ + return tfm; +} + +static inline void crypto_free_cipher(struct crypto_cipher *tfm) +{ + crypto_free_tfm(crypto_cipher_tfm(tfm)); +} + +static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->crt_cipher; +} + +static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); +} + +static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); +} + +static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) +{ + return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); +} + +static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + +static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + static inline void crypto_digest_init(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); From 5c64097aa0f6dc4f27718ef47ca9a12538d62860 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Aug 2006 21:56:17 +1000 Subject: [PATCH 0310/1063] [CRYPTO] scatterwalk: Prepare for block ciphers This patch prepares the scatterwalk code for use by the new block cipher type. Firstly it halves the size of scatter_walk on 32-bit platforms. This is important as we allocate at least two of these objects on the stack for each block cipher operation. It also exports the symbols since the block cipher code can be built as a module. Finally there is a hack in scatterwalk_unmap that relies on progress being made. Unfortunately, for hardware crypto we can't guarantee progress to be made since the hardware can fail. So this also gets rid of the hack by not advancing the address returned by scatterwalk_map. Signed-off-by: Herbert Xu --- crypto/cipher.c | 27 ++++++------- crypto/scatterwalk.c | 89 ++++++++++++++++++----------------------- crypto/scatterwalk.h | 50 ++++++++++++++--------- include/crypto/algapi.h | 5 +++ 4 files changed, 88 insertions(+), 83 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index d8ca0ec8d0be..326461780673 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -45,15 +45,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, u8 buffer[bsize * 2 + alignmask]; u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); u8 *dst = src + bsize; - unsigned int n; - - n = scatterwalk_copychunks(src, in, bsize, 0); - scatterwalk_advance(in, n); + scatterwalk_copychunks(src, in, bsize, 0); desc->prfn(desc, dst, src, bsize); - - n = scatterwalk_copychunks(dst, out, bsize, 1); - scatterwalk_advance(out, n); + scatterwalk_copychunks(dst, out, bsize, 1); return bsize; } @@ -64,12 +59,16 @@ static inline unsigned int crypt_fast(const struct cipher_desc *desc, unsigned int nbytes, u8 *tmp) { u8 *src, *dst; + u8 *real_src, *real_dst; - src = in->data; - dst = scatterwalk_samebuf(in, out) ? src : out->data; + real_src = scatterwalk_map(in, 0); + real_dst = scatterwalk_map(out, 1); + + src = real_src; + dst = scatterwalk_samebuf(in, out) ? src : real_dst; if (tmp) { - memcpy(tmp, in->data, nbytes); + memcpy(tmp, src, nbytes); src = tmp; dst = tmp; } @@ -77,7 +76,10 @@ static inline unsigned int crypt_fast(const struct cipher_desc *desc, nbytes = desc->prfn(desc, dst, src, nbytes); if (tmp) - memcpy(out->data, tmp, nbytes); + memcpy(real_dst, tmp, nbytes); + + scatterwalk_unmap(real_src, 0); + scatterwalk_unmap(real_dst, 1); scatterwalk_advance(in, nbytes); scatterwalk_advance(out, nbytes); @@ -126,9 +128,6 @@ static int crypt(const struct cipher_desc *desc, tmp = (u8 *)buffer; } - scatterwalk_map(&walk_in, 0); - scatterwalk_map(&walk_out, 1); - n = scatterwalk_clamp(&walk_in, n); n = scatterwalk_clamp(&walk_out, n); diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 2953e2cc56f0..35172d3f043b 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -15,9 +15,11 @@ */ #include #include +#include #include #include -#include +#include + #include "internal.h" #include "scatterwalk.h" @@ -27,88 +29,77 @@ enum km_type crypto_km_types[] = { KM_SOFTIRQ0, KM_SOFTIRQ1, }; +EXPORT_SYMBOL_GPL(crypto_km_types); -static void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out) +static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out) { - if (out) - memcpy(sgdata, buf, nbytes); - else - memcpy(buf, sgdata, nbytes); + void *src = out ? buf : sgdata; + void *dst = out ? sgdata : buf; + + memcpy(dst, src, nbytes); } void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg) { - unsigned int rest_of_page; - walk->sg = sg; - walk->page = sg->page; - walk->len_this_segment = sg->length; - BUG_ON(!sg->length); - rest_of_page = PAGE_CACHE_SIZE - (sg->offset & (PAGE_CACHE_SIZE - 1)); - walk->len_this_page = min(sg->length, rest_of_page); walk->offset = sg->offset; } +EXPORT_SYMBOL_GPL(scatterwalk_start); -void scatterwalk_map(struct scatter_walk *walk, int out) +void *scatterwalk_map(struct scatter_walk *walk, int out) { - walk->data = crypto_kmap(walk->page, out) + walk->offset; -} - -static inline void scatterwalk_unmap(struct scatter_walk *walk, int out) -{ - /* walk->data may be pointing the first byte of the next page; - however, we know we transfered at least one byte. So, - walk->data - 1 will be a virtual address in the mapped page. */ - crypto_kunmap(walk->data - 1, out); + return crypto_kmap(scatterwalk_page(walk), out) + + offset_in_page(walk->offset); } +EXPORT_SYMBOL_GPL(scatterwalk_map); static void scatterwalk_pagedone(struct scatter_walk *walk, int out, unsigned int more) { if (out) - flush_dcache_page(walk->page); + flush_dcache_page(scatterwalk_page(walk)); if (more) { - walk->len_this_segment -= walk->len_this_page; - - if (walk->len_this_segment) { - walk->page++; - walk->len_this_page = min(walk->len_this_segment, - (unsigned)PAGE_CACHE_SIZE); - walk->offset = 0; - } - else + walk->offset += PAGE_SIZE - 1; + walk->offset &= PAGE_MASK; + if (walk->offset >= walk->sg->offset + walk->sg->length) scatterwalk_start(walk, sg_next(walk->sg)); } } void scatterwalk_done(struct scatter_walk *walk, int out, int more) { - scatterwalk_unmap(walk, out); - if (walk->len_this_page == 0 || !more) + if (!offset_in_page(walk->offset) || !more) scatterwalk_pagedone(walk, out, more); } +EXPORT_SYMBOL_GPL(scatterwalk_done); -/* - * Do not call this unless the total length of all of the fragments - * has been verified as multiple of the block size. - */ -int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, - size_t nbytes, int out) +void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, + size_t nbytes, int out) { - while (nbytes > walk->len_this_page) { - memcpy_dir(buf, walk->data, walk->len_this_page, out); - buf += walk->len_this_page; - nbytes -= walk->len_this_page; + for (;;) { + unsigned int len_this_page = scatterwalk_pagelen(walk); + u8 *vaddr; + + if (len_this_page > nbytes) + len_this_page = nbytes; + + vaddr = scatterwalk_map(walk, out); + memcpy_dir(buf, vaddr, len_this_page, out); + scatterwalk_unmap(vaddr, out); + + if (nbytes == len_this_page) + break; + + buf += len_this_page; + nbytes -= len_this_page; - scatterwalk_unmap(walk, out); scatterwalk_pagedone(walk, out, 1); - scatterwalk_map(walk, out); } - memcpy_dir(buf, walk->data, nbytes, out); - return nbytes; + scatterwalk_advance(walk, nbytes); } +EXPORT_SYMBOL_GPL(scatterwalk_copychunks); diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index e79925c474a3..ace595a2e119 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -14,17 +14,11 @@ #ifndef _CRYPTO_SCATTERWALK_H #define _CRYPTO_SCATTERWALK_H -#include -#include -struct scatter_walk { - struct scatterlist *sg; - struct page *page; - void *data; - unsigned int len_this_page; - unsigned int len_this_segment; - unsigned int offset; -}; +#include +#include + +#include "internal.h" /* Define sg_next is an inline routine now in case we want to change scatterlist to a linked list later. */ @@ -33,26 +27,31 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) return sg + 1; } -static inline int scatterwalk_samebuf(struct scatter_walk *walk_in, - struct scatter_walk *walk_out) +static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, + struct scatter_walk *walk_out) { - return walk_in->page == walk_out->page && - walk_in->offset == walk_out->offset; + return !(((walk_in->sg->page - walk_out->sg->page) << PAGE_SHIFT) + + (int)(walk_in->offset - walk_out->offset)); +} + +static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk) +{ + unsigned int len = walk->sg->offset + walk->sg->length - walk->offset; + unsigned int len_this_page = offset_in_page(~walk->offset) + 1; + return len_this_page > len ? len : len_this_page; } static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk, unsigned int nbytes) { - return nbytes > walk->len_this_page ? walk->len_this_page : nbytes; + unsigned int len_this_page = scatterwalk_pagelen(walk); + return nbytes > len_this_page ? len_this_page : nbytes; } static inline void scatterwalk_advance(struct scatter_walk *walk, unsigned int nbytes) { - walk->data += nbytes; walk->offset += nbytes; - walk->len_this_page -= nbytes; - walk->len_this_segment -= nbytes; } static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, @@ -61,9 +60,20 @@ static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, return !(walk->offset & alignmask); } +static inline struct page *scatterwalk_page(struct scatter_walk *walk) +{ + return walk->sg->page + (walk->offset >> PAGE_SHIFT); +} + +static inline void scatterwalk_unmap(void *vaddr, int out) +{ + crypto_kunmap(vaddr, out); +} + void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg); -int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); -void scatterwalk_map(struct scatter_walk *walk, int out); +void scatterwalk_copychunks(void *buf, struct scatter_walk *walk, + size_t nbytes, int out); +void *scatterwalk_map(struct scatter_walk *walk, int out); void scatterwalk_done(struct scatter_walk *walk, int out, int more); #endif /* _CRYPTO_SCATTERWALK_H */ diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 6f9fb27b2071..f21ae672e8a8 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -50,6 +50,11 @@ struct crypto_spawn { struct crypto_instance *inst; }; +struct scatter_walk { + struct scatterlist *sg; + unsigned int offset; +}; + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); From 5cde0af2a9825dd1edaca233bd9590566579ef21 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 00:07:53 +1000 Subject: [PATCH 0311/1063] [CRYPTO] cipher: Added block cipher type This patch adds the new type of block ciphers. Unlike current cipher algorithms which operate on a single block at a time, block ciphers operate on an arbitrarily long linear area of data. As it is block-based, it will skip any data remaining at the end which cannot form a block. The block cipher has one major difference when compared to the existing block cipher implementation. The sg walking is now performed by the algorithm rather than the cipher mid-layer. This is needed for drivers that directly support sg lists. It also improves performance for all algorithms as it reduces the total number of indirect calls by one. In future the existing cipher algorithm will be converted to only have a single-block interface. This will be done after all existing users have switched over to the new block cipher type. Signed-off-by: Herbert Xu --- crypto/Kconfig | 4 + crypto/Makefile | 2 + crypto/blkcipher.c | 405 ++++++++++++++++++++++++++++++++++++++++ include/crypto/algapi.h | 65 +++++++ include/linux/crypto.h | 179 ++++++++++++++++++ 5 files changed, 655 insertions(+) create mode 100644 crypto/blkcipher.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 4ce509dba329..68790ad7308d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -16,6 +16,10 @@ config CRYPTO_ALGAPI help This option provides the API for cryptographic algorithms. +config CRYPTO_BLKCIPHER + tristate + select CRYPTO_ALGAPI + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index b8745f3d3595..b5051951c636 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -8,6 +8,8 @@ crypto_algapi-$(CONFIG_PROC_FS) += proc.o crypto_algapi-objs := algapi.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o +obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o + obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c new file mode 100644 index 000000000000..034c939bf91a --- /dev/null +++ b/crypto/blkcipher.c @@ -0,0 +1,405 @@ +/* + * Block chaining cipher operations. + * + * Generic encrypt/decrypt wrapper for ciphers, handles operations across + * multiple page boundaries by using temporary blocks. In user context, + * the kernel is given a chance to schedule us once per page. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" +#include "scatterwalk.h" + +enum { + BLKCIPHER_WALK_PHYS = 1 << 0, + BLKCIPHER_WALK_SLOW = 1 << 1, + BLKCIPHER_WALK_COPY = 1 << 2, + BLKCIPHER_WALK_DIFF = 1 << 3, +}; + +static int blkcipher_walk_next(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); +static int blkcipher_walk_first(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); + +static inline void blkcipher_map_src(struct blkcipher_walk *walk) +{ + walk->src.virt.addr = scatterwalk_map(&walk->in, 0); +} + +static inline void blkcipher_map_dst(struct blkcipher_walk *walk) +{ + walk->dst.virt.addr = scatterwalk_map(&walk->out, 1); +} + +static inline void blkcipher_unmap_src(struct blkcipher_walk *walk) +{ + scatterwalk_unmap(walk->src.virt.addr, 0); +} + +static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk) +{ + scatterwalk_unmap(walk->dst.virt.addr, 1); +} + +static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) +{ + if (offset_in_page(start + len) < len) + return (u8 *)((unsigned long)(start + len) & PAGE_MASK); + return start; +} + +static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm, + struct blkcipher_walk *walk, + unsigned int bsize) +{ + u8 *addr; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + + addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + addr = blkcipher_get_spot(addr, bsize); + scatterwalk_copychunks(addr, &walk->out, bsize, 1); + return bsize; +} + +static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, + unsigned int n) +{ + n = walk->nbytes - n; + + if (walk->flags & BLKCIPHER_WALK_COPY) { + blkcipher_map_dst(walk); + memcpy(walk->dst.virt.addr, walk->page, n); + blkcipher_unmap_dst(walk); + } else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) { + blkcipher_unmap_src(walk); + if (walk->flags & BLKCIPHER_WALK_DIFF) + blkcipher_unmap_dst(walk); + } + + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); + + return n; +} + +int blkcipher_walk_done(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, int err) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int nbytes = 0; + + if (likely(err >= 0)) { + unsigned int bsize = crypto_blkcipher_blocksize(tfm); + unsigned int n; + + if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW))) + n = blkcipher_done_fast(walk, err); + else + n = blkcipher_done_slow(tfm, walk, bsize); + + nbytes = walk->total - n; + err = 0; + } + + scatterwalk_done(&walk->in, 0, nbytes); + scatterwalk_done(&walk->out, 1, nbytes); + + walk->total = nbytes; + walk->nbytes = nbytes; + + if (nbytes) { + crypto_yield(desc->flags); + return blkcipher_walk_next(desc, walk); + } + + if (walk->iv != desc->info) + memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm)); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); + + return err; +} +EXPORT_SYMBOL_GPL(blkcipher_walk_done); + +static inline int blkcipher_next_slow(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + unsigned int bsize, + unsigned int alignmask) +{ + unsigned int n; + + if (walk->buffer) + goto ok; + + walk->buffer = walk->page; + if (walk->buffer) + goto ok; + + n = bsize * 2 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); + walk->buffer = kmalloc(n, GFP_ATOMIC); + if (!walk->buffer) + return blkcipher_walk_done(desc, walk, -ENOMEM); + +ok: + walk->dst.virt.addr = (u8 *)ALIGN((unsigned long)walk->buffer, + alignmask + 1); + walk->dst.virt.addr = blkcipher_get_spot(walk->dst.virt.addr, bsize); + walk->src.virt.addr = blkcipher_get_spot(walk->dst.virt.addr + bsize, + bsize); + + scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0); + + walk->nbytes = bsize; + walk->flags |= BLKCIPHER_WALK_SLOW; + + return 0; +} + +static inline int blkcipher_next_copy(struct blkcipher_walk *walk) +{ + u8 *tmp = walk->page; + + blkcipher_map_src(walk); + memcpy(tmp, walk->src.virt.addr, walk->nbytes); + blkcipher_unmap_src(walk); + + walk->src.virt.addr = tmp; + walk->dst.virt.addr = tmp; + + return 0; +} + +static inline int blkcipher_next_fast(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + unsigned long diff; + + walk->src.phys.page = scatterwalk_page(&walk->in); + walk->src.phys.offset = offset_in_page(walk->in.offset); + walk->dst.phys.page = scatterwalk_page(&walk->out); + walk->dst.phys.offset = offset_in_page(walk->out.offset); + + if (walk->flags & BLKCIPHER_WALK_PHYS) + return 0; + + diff = walk->src.phys.offset - walk->dst.phys.offset; + diff |= walk->src.virt.page - walk->dst.virt.page; + + blkcipher_map_src(walk); + walk->dst.virt.addr = walk->src.virt.addr; + + if (diff) { + walk->flags |= BLKCIPHER_WALK_DIFF; + blkcipher_map_dst(walk); + } + + return 0; +} + +static int blkcipher_walk_next(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + unsigned int bsize = crypto_blkcipher_blocksize(tfm); + unsigned int n; + int err; + + n = walk->total; + if (unlikely(n < bsize)) { + desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; + return blkcipher_walk_done(desc, walk, -EINVAL); + } + + walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | + BLKCIPHER_WALK_DIFF); + if (!scatterwalk_aligned(&walk->in, alignmask) || + !scatterwalk_aligned(&walk->out, alignmask)) { + walk->flags |= BLKCIPHER_WALK_COPY; + if (!walk->page) { + walk->page = (void *)__get_free_page(GFP_ATOMIC); + if (!walk->page) + n = 0; + } + } + + n = scatterwalk_clamp(&walk->in, n); + n = scatterwalk_clamp(&walk->out, n); + + if (unlikely(n < bsize)) { + err = blkcipher_next_slow(desc, walk, bsize, alignmask); + goto set_phys_lowmem; + } + + walk->nbytes = n; + if (walk->flags & BLKCIPHER_WALK_COPY) { + err = blkcipher_next_copy(walk); + goto set_phys_lowmem; + } + + return blkcipher_next_fast(desc, walk); + +set_phys_lowmem: + if (walk->flags & BLKCIPHER_WALK_PHYS) { + walk->src.phys.page = virt_to_page(walk->src.virt.addr); + walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); + walk->src.phys.offset &= PAGE_SIZE - 1; + walk->dst.phys.offset &= PAGE_SIZE - 1; + } + return err; +} + +static inline int blkcipher_copy_iv(struct blkcipher_walk *walk, + struct crypto_blkcipher *tfm, + unsigned int alignmask) +{ + unsigned bs = crypto_blkcipher_blocksize(tfm); + unsigned int ivsize = crypto_blkcipher_ivsize(tfm); + unsigned int size = bs * 2 + ivsize + max(bs, ivsize) - (alignmask + 1); + u8 *iv; + + size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + walk->buffer = kmalloc(size, GFP_ATOMIC); + if (!walk->buffer) + return -ENOMEM; + + iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + iv = blkcipher_get_spot(iv, bs) + bs; + iv = blkcipher_get_spot(iv, bs) + bs; + iv = blkcipher_get_spot(iv, ivsize); + + walk->iv = memcpy(iv, walk->iv, ivsize); + return 0; +} + +int blkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_walk_virt); + +int blkcipher_walk_phys(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + walk->flags |= BLKCIPHER_WALK_PHYS; + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_walk_phys); + +static int blkcipher_walk_first(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct crypto_blkcipher *tfm = desc->tfm; + unsigned int alignmask = crypto_blkcipher_alignmask(tfm); + + walk->nbytes = walk->total; + if (unlikely(!walk->total)) + return 0; + + walk->buffer = NULL; + walk->iv = desc->info; + if (unlikely(((unsigned long)walk->iv & alignmask))) { + int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (err) + return err; + } + + scatterwalk_start(&walk->in, walk->in.sg); + scatterwalk_start(&walk->out, walk->out.sg); + walk->page = NULL; + + return blkcipher_walk_next(desc, walk); +} + +static int setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher; + + if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + return cipher->setkey(tfm, key, keylen); +} + +static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg) +{ + struct blkcipher_alg *cipher = &alg->cra_blkcipher; + unsigned int len = alg->cra_ctxsize; + + if (cipher->ivsize) { + len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1); + len += cipher->ivsize; + } + + return len; +} + +static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm) +{ + struct blkcipher_tfm *crt = &tfm->crt_blkcipher; + struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher; + unsigned long align = crypto_tfm_alg_alignmask(tfm) + 1; + unsigned long addr; + + if (alg->ivsize > PAGE_SIZE / 8) + return -EINVAL; + + crt->setkey = setkey; + crt->encrypt = alg->encrypt; + crt->decrypt = alg->decrypt; + + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + crt->iv = (void *)addr; + + return 0; +} + +static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) + __attribute_used__; +static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_printf(m, "type : blkcipher\n"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "min keysize : %u\n", alg->cra_blkcipher.min_keysize); + seq_printf(m, "max keysize : %u\n", alg->cra_blkcipher.max_keysize); + seq_printf(m, "ivsize : %u\n", alg->cra_blkcipher.ivsize); +} + +const struct crypto_type crypto_blkcipher_type = { + .ctxsize = crypto_blkcipher_ctxsize, + .init = crypto_init_blkcipher_ops, +#ifdef CONFIG_PROC_FS + .show = crypto_blkcipher_show, +#endif +}; +EXPORT_SYMBOL_GPL(crypto_blkcipher_type); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic block chaining cipher type"); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f21ae672e8a8..f3946baf0c07 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -55,6 +55,34 @@ struct scatter_walk { unsigned int offset; }; +struct blkcipher_walk { + union { + struct { + struct page *page; + unsigned long offset; + } phys; + + struct { + u8 *page; + u8 *addr; + } virt; + } src, dst; + + struct scatter_walk in; + unsigned int nbytes; + + struct scatter_walk out; + unsigned int total; + + void *page; + u8 *buffer; + u8 *iv; + + int flags; +}; + +extern const struct crypto_type crypto_blkcipher_type; + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); @@ -69,15 +97,52 @@ struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len, struct crypto_instance *crypto_alloc_instance(const char *name, struct crypto_alg *alg); +int blkcipher_walk_done(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, int err); +int blkcipher_walk_virt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); +int blkcipher_walk_phys(struct blkcipher_desc *desc, + struct blkcipher_walk *walk); + +static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm) +{ + unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); + unsigned long align = crypto_tfm_alg_alignmask(tfm); + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (void *)ALIGN(addr, align); +} + static inline void *crypto_instance_ctx(struct crypto_instance *inst) { return inst->__ctx; } +static inline void *crypto_blkcipher_ctx(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_ctx(&tfm->base); +} + +static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_ctx_aligned(&tfm->base); +} + static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } +static inline void blkcipher_walk_init(struct blkcipher_walk *walk, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + walk->in.sg = src; + walk->out.sg = dst; + walk->total = nbytes; +} + #endif /* _CRYPTO_ALGAPI_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fdecee83878c..5a5466d518e8 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -32,6 +32,7 @@ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 +#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000003 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 #define CRYPTO_ALG_LARVAL 0x00000010 @@ -89,9 +90,16 @@ #endif struct scatterlist; +struct crypto_blkcipher; struct crypto_tfm; struct crypto_type; +struct blkcipher_desc { + struct crypto_blkcipher *tfm; + void *info; + u32 flags; +}; + struct cipher_desc { struct crypto_tfm *tfm; void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); @@ -104,6 +112,21 @@ struct cipher_desc { * Algorithms: modular crypto algorithm implementations, managed * via crypto_register_alg() and crypto_unregister_alg(). */ +struct blkcipher_alg { + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + + unsigned int min_keysize; + unsigned int max_keysize; + unsigned int ivsize; +}; + struct cipher_alg { unsigned int cia_min_keysize; unsigned int cia_max_keysize; @@ -143,6 +166,7 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; +#define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_compress cra_u.compress @@ -165,6 +189,7 @@ struct crypto_alg { const struct crypto_type *cra_type; union { + struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct digest_alg digest; struct compress_alg compress; @@ -201,6 +226,16 @@ static inline int crypto_alg_available(const char *name, u32 flags) * crypto_free_*(), as well as the various helpers below. */ +struct blkcipher_tfm { + void *iv; + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); +}; + struct cipher_tfm { void *cit_iv; unsigned int cit_ivsize; @@ -251,6 +286,7 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; +#define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_digest crt_u.digest #define crt_compress crt_u.compress @@ -260,6 +296,7 @@ struct crypto_tfm { u32 crt_flags; union { + struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct digest_tfm digest; struct compress_tfm compress; @@ -272,6 +309,10 @@ struct crypto_tfm { #define crypto_cipher crypto_tfm +struct crypto_blkcipher { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -380,6 +421,144 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) /* * API wrappers. */ +static inline struct crypto_blkcipher *__crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + return (struct crypto_blkcipher *)tfm; +} + +static inline struct crypto_blkcipher *crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER); + return __crypto_blkcipher_cast(tfm); +} + +static inline struct crypto_blkcipher *crypto_alloc_blkcipher( + const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_blkcipher_tfm( + struct crypto_blkcipher *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) +{ + crypto_free_tfm(crypto_blkcipher_tfm(tfm)); +} + +static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); +} + +static inline struct blkcipher_tfm *crypto_blkcipher_crt( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->crt_blkcipher; +} + +static inline struct blkcipher_alg *crypto_blkcipher_alg( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher; +} + +static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm) +{ + return crypto_blkcipher_alg(tfm)->ivsize; +} + +static inline unsigned int crypto_blkcipher_blocksize( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm)); +} + +static inline unsigned int crypto_blkcipher_alignmask( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm)); +} + +static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm)); +} + +static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags); +} + +static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags); +} + +static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm), + key, keylen); +} + +static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm, + const u8 *src, unsigned int len) +{ + memcpy(crypto_blkcipher_crt(tfm)->iv, src, len); +} + +static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm, + u8 *dst, unsigned int len) +{ + memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len); +} + static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) { return (struct crypto_cipher *)tfm; From db131ef9084110d9e82549c0a627e157e8bb99d7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:44:08 +1000 Subject: [PATCH 0312/1063] [CRYPTO] cipher: Added block ciphers for CBC/ECB This patch adds two block cipher algorithms, CBC and ECB. These are implemented as templates on top of existing single-block cipher algorithms. They invoke the single-block cipher through the new encrypt_one/decrypt_one interface. This also optimises the in-place encryption and decryption to remove the cost of an IV copy each round. Signed-off-by: Herbert Xu --- crypto/Kconfig | 17 ++ crypto/Makefile | 2 + crypto/cbc.c | 344 ++++++++++++++++++++++++++++++++++++++++ crypto/ecb.c | 181 +++++++++++++++++++++ crypto/internal.h | 1 - include/crypto/algapi.h | 2 + 6 files changed, 546 insertions(+), 1 deletion(-) create mode 100644 crypto/cbc.c create mode 100644 crypto/ecb.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 68790ad7308d..90d467c99c2c 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -123,6 +123,23 @@ config CRYPTO_TGR192 See also: . +config CRYPTO_ECB + tristate "ECB support" + select CRYPTO_BLKCIPHER + default m + help + ECB: Electronic CodeBook mode + This is the simplest block cipher algorithm. It simply encrypts + the input block by block. + +config CRYPTO_CBC + tristate "CBC support" + select CRYPTO_BLKCIPHER + default m + help + CBC: Cipher Block Chaining mode + This block cipher algorithm is required for IPSec. + config CRYPTO_DES tristate "DES and Triple DES EDE cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index b5051951c636..5e1ff4e0b1fc 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256.o obj-$(CONFIG_CRYPTO_SHA512) += sha512.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o +obj-$(CONFIG_CRYPTO_ECB) += ecb.o +obj-$(CONFIG_CRYPTO_CBC) += cbc.o obj-$(CONFIG_CRYPTO_DES) += des.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o diff --git a/crypto/cbc.c b/crypto/cbc.c new file mode 100644 index 000000000000..f5542b4db387 --- /dev/null +++ b/crypto/cbc.c @@ -0,0 +1,344 @@ +/* + * CBC: Cipher Block Chaining mode + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct crypto_cbc_ctx { + struct crypto_cipher *child; + void (*xor)(u8 *dst, const u8 *src, unsigned int bs); +}; + +static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key, + unsigned int keylen) +{ + struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_cipher *child = ctx->child; + int err; + + crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_cipher_setkey(child, key, keylen); + crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_cipher *tfm, + void (*xor)(u8 *, const u8 *, + unsigned int)) +{ + void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = + crypto_cipher_alg(tfm)->cia_encrypt; + int bsize = crypto_cipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + xor(iv, src, bsize); + fn(crypto_cipher_tfm(tfm), dst, iv); + memcpy(iv, dst, bsize); + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + return nbytes; +} + +static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_cipher *tfm, + void (*xor)(u8 *, const u8 *, + unsigned int)) +{ + void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = + crypto_cipher_alg(tfm)->cia_encrypt; + int bsize = crypto_cipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *iv = walk->iv; + + do { + xor(src, iv, bsize); + fn(crypto_cipher_tfm(tfm), src, src); + iv = src; + + src += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static int crypto_cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_cipher *child = ctx->child; + void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + if (walk.src.virt.addr == walk.dst.virt.addr) + nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child, + xor); + else + nbytes = crypto_cbc_encrypt_segment(desc, &walk, child, + xor); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_cipher *tfm, + void (*xor)(u8 *, const u8 *, + unsigned int)) +{ + void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = + crypto_cipher_alg(tfm)->cia_decrypt; + int bsize = crypto_cipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + fn(crypto_cipher_tfm(tfm), dst, src); + xor(dst, iv, bsize); + iv = src; + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_cipher *tfm, + void (*xor)(u8 *, const u8 *, + unsigned int)) +{ + void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = + crypto_cipher_alg(tfm)->cia_decrypt; + int bsize = crypto_cipher_blocksize(tfm); + unsigned long alignmask = crypto_cipher_alignmask(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 stack[bsize + alignmask]; + u8 *first_iv = (u8 *)ALIGN((unsigned long)stack, alignmask + 1); + + memcpy(first_iv, walk->iv, bsize); + + /* Start of the last block. */ + src += nbytes - nbytes % bsize - bsize; + memcpy(walk->iv, src, bsize); + + for (;;) { + fn(crypto_cipher_tfm(tfm), src, src); + if ((nbytes -= bsize) < bsize) + break; + xor(src, src - bsize, bsize); + src -= bsize; + } + + xor(src, first_iv, bsize); + + return nbytes; +} + +static int crypto_cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_cipher *child = ctx->child; + void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + if (walk.src.virt.addr == walk.dst.virt.addr) + nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child, + xor); + else + nbytes = crypto_cbc_decrypt_segment(desc, &walk, child, + xor); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static void xor_byte(u8 *a, const u8 *b, unsigned int bs) +{ + do { + *a++ ^= *b++; + } while (--bs); +} + +static void xor_quad(u8 *dst, const u8 *src, unsigned int bs) +{ + u32 *a = (u32 *)dst; + u32 *b = (u32 *)src; + + do { + *a++ ^= *b++; + } while ((bs -= 4)); +} + +static void xor_64(u8 *a, const u8 *b, unsigned int bs) +{ + ((u32 *)a)[0] ^= ((u32 *)b)[0]; + ((u32 *)a)[1] ^= ((u32 *)b)[1]; +} + +static void xor_128(u8 *a, const u8 *b, unsigned int bs) +{ + ((u32 *)a)[0] ^= ((u32 *)b)[0]; + ((u32 *)a)[1] ^= ((u32 *)b)[1]; + ((u32 *)a)[2] ^= ((u32 *)b)[2]; + ((u32 *)a)[3] ^= ((u32 *)b)[3]; +} + +static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + switch (crypto_tfm_alg_blocksize(tfm)) { + case 8: + ctx->xor = xor_64; + break; + + case 16: + ctx->xor = xor_128; + break; + + default: + if (crypto_tfm_alg_blocksize(tfm) % 4) + ctx->xor = xor_byte; + else + ctx->xor = xor_quad; + } + + tfm = crypto_spawn_tfm(spawn); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + ctx->child = crypto_cipher_cast(tfm); + return 0; +} + +static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_free_cipher(ctx->child); +} + +static struct crypto_instance *crypto_cbc_alloc(void *param, unsigned int len) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + + alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER, + CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC); + if (IS_ERR(alg)) + return ERR_PTR(PTR_ERR(alg)); + + inst = crypto_alloc_instance("cbc", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_blkcipher_type; + + if (!(alg->cra_blocksize % 4)) + inst->alg.cra_alignmask |= 3; + inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; + inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + + inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx); + + inst->alg.cra_init = crypto_cbc_init_tfm; + inst->alg.cra_exit = crypto_cbc_exit_tfm; + + inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey; + inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt; + inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void crypto_cbc_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_cbc_tmpl = { + .name = "cbc", + .alloc = crypto_cbc_alloc, + .free = crypto_cbc_free, + .module = THIS_MODULE, +}; + +static int __init crypto_cbc_module_init(void) +{ + return crypto_register_template(&crypto_cbc_tmpl); +} + +static void __exit crypto_cbc_module_exit(void) +{ + crypto_unregister_template(&crypto_cbc_tmpl); +} + +module_init(crypto_cbc_module_init); +module_exit(crypto_cbc_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CBC block cipher algorithm"); diff --git a/crypto/ecb.c b/crypto/ecb.c new file mode 100644 index 000000000000..f239aa9c4017 --- /dev/null +++ b/crypto/ecb.c @@ -0,0 +1,181 @@ +/* + * ECB: Electronic CodeBook mode + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct crypto_ecb_ctx { + struct crypto_cipher *child; +}; + +static int crypto_ecb_setkey(struct crypto_tfm *parent, const u8 *key, + unsigned int keylen) +{ + struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_cipher *child = ctx->child; + int err; + + crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_cipher_setkey(child, key, keylen); + crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int crypto_ecb_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_cipher *tfm, + void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +{ + int bsize = crypto_cipher_blocksize(tfm); + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + do { + fn(crypto_cipher_tfm(tfm), wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + } while ((nbytes -= bsize) >= bsize); + + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int crypto_ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_cipher *child = ctx->child; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return crypto_ecb_crypt(desc, &walk, child, + crypto_cipher_alg(child)->cia_encrypt); +} + +static int crypto_ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_cipher *child = ctx->child; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return crypto_ecb_crypt(desc, &walk, child, + crypto_cipher_alg(child)->cia_decrypt); +} + +static int crypto_ecb_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm); + + tfm = crypto_spawn_tfm(spawn); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + ctx->child = crypto_cipher_cast(tfm); + return 0; +} + +static void crypto_ecb_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_free_cipher(ctx->child); +} + +static struct crypto_instance *crypto_ecb_alloc(void *param, unsigned int len) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + + alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER, + CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC); + if (IS_ERR(alg)) + return ERR_PTR(PTR_ERR(alg)); + + inst = crypto_alloc_instance("ecb", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_blkcipher_type; + + inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + + inst->alg.cra_ctxsize = sizeof(struct crypto_ecb_ctx); + + inst->alg.cra_init = crypto_ecb_init_tfm; + inst->alg.cra_exit = crypto_ecb_exit_tfm; + + inst->alg.cra_blkcipher.setkey = crypto_ecb_setkey; + inst->alg.cra_blkcipher.encrypt = crypto_ecb_encrypt; + inst->alg.cra_blkcipher.decrypt = crypto_ecb_decrypt; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void crypto_ecb_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_ecb_tmpl = { + .name = "ecb", + .alloc = crypto_ecb_alloc, + .free = crypto_ecb_free, + .module = THIS_MODULE, +}; + +static int __init crypto_ecb_module_init(void) +{ + return crypto_register_template(&crypto_ecb_tmpl); +} + +static void __exit crypto_ecb_module_exit(void) +{ + crypto_unregister_template(&crypto_ecb_tmpl); +} + +module_init(crypto_ecb_module_init); +module_exit(crypto_ecb_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ECB block cipher algorithm"); diff --git a/crypto/internal.h b/crypto/internal.h index 7dc04efb55c6..93d9b10ff914 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -131,7 +131,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, } struct crypto_alg *crypto_mod_get(struct crypto_alg *alg); -void crypto_mod_put(struct crypto_alg *alg); struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask); struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index f3946baf0c07..444f602724db 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -83,6 +83,8 @@ struct blkcipher_walk { extern const struct crypto_type crypto_blkcipher_type; +void crypto_mod_put(struct crypto_alg *alg); + int crypto_register_template(struct crypto_template *tmpl); void crypto_unregister_template(struct crypto_template *tmpl); struct crypto_template *crypto_lookup_template(const char *name); From 28ce728a90cce3a0c6c0ed00354299de52db94b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:38:42 +1000 Subject: [PATCH 0313/1063] [CRYPTO] padlock: Added block cipher versions of CBC/ECB This patch adds block cipher algorithms for cbc(aes) and ecb(aes) for the PadLock device. Once all users to the old cipher type have been converted the old cbc/ecb PadLock operations will be removed. Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 1 + drivers/crypto/padlock-aes.c | 174 +++++++++++++++++++++++++++++++++-- drivers/crypto/padlock.h | 1 + 3 files changed, 169 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 86c99cd333fa..adb554153f67 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -27,6 +27,7 @@ config CRYPTO_DEV_PADLOCK config CRYPTO_DEV_PADLOCK_AES tristate "PadLock driver for AES algorithm" depends on CRYPTO_DEV_PADLOCK + select CRYPTO_BLKCIPHER default m help Use VIA PadLock for AES algorithm. diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 3e683709243e..f53301e836d9 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -43,11 +43,11 @@ * --------------------------------------------------------------------------- */ +#include #include #include #include #include -#include #include #include #include @@ -297,9 +297,9 @@ aes_hw_extkey_available(uint8_t key_len) return 0; } -static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) +static inline struct aes_ctx *aes_ctx_common(void *ctx) { - unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm); + unsigned long addr = (unsigned long)ctx; unsigned long align = PADLOCK_ALIGNMENT; if (align <= crypto_tfm_ctx_alignment()) @@ -307,6 +307,16 @@ static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) return (struct aes_ctx *)ALIGN(addr, align); } +static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm) +{ + return aes_ctx_common(crypto_tfm_ctx(tfm)); +} + +static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm) +{ + return aes_ctx_common(crypto_blkcipher_ctx(tfm)); +} + static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -507,6 +517,141 @@ static struct crypto_alg aes_alg = { } }; +static int ecb_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, + ctx->E, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static int ecb_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, + ctx->D, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg ecb_aes_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-padlock", + .cra_priority = PADLOCK_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_alignmask = PADLOCK_ALIGNMENT - 1, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, + } + } +}; + +static int cbc_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, + walk.dst.virt.addr, ctx->E, + walk.iv, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + memcpy(walk.iv, iv, AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static int cbc_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, + ctx->D, walk.iv, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static struct crypto_alg cbc_aes_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-padlock", + .cra_priority = PADLOCK_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_alignmask = PADLOCK_ALIGNMENT - 1, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, + } + } +}; + static int __init padlock_init(void) { int ret; @@ -522,18 +667,33 @@ static int __init padlock_init(void) } gen_tabs(); - if ((ret = crypto_register_alg(&aes_alg))) { - printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); - return ret; - } + if ((ret = crypto_register_alg(&aes_alg))) + goto aes_err; + + if ((ret = crypto_register_alg(&ecb_aes_alg))) + goto ecb_aes_err; + + if ((ret = crypto_register_alg(&cbc_aes_alg))) + goto cbc_aes_err; printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); +out: return ret; + +cbc_aes_err: + crypto_unregister_alg(&ecb_aes_alg); +ecb_aes_err: + crypto_unregister_alg(&aes_alg); +aes_err: + printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); + goto out; } static void __exit padlock_fini(void) { + crypto_unregister_alg(&cbc_aes_alg); + crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); } diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h index 7e3385b0904d..b728e4518bd1 100644 --- a/drivers/crypto/padlock.h +++ b/drivers/crypto/padlock.h @@ -18,5 +18,6 @@ #define PFX "padlock: " #define PADLOCK_CRA_PRIORITY 300 +#define PADLOCK_COMPOSITE_PRIORITY 400 #endif /* _CRYPTO_PADLOCK_H */ From a9e62fadf0b02ba4a1d945d1a75652507da94319 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:39:24 +1000 Subject: [PATCH 0314/1063] [CRYPTO] s390: Added block cipher versions of CBC/ECB This patch adds block cipher algorithms for S390. Once all users of the old cipher type have been converted the existing CBC/ECB non-block cipher operations will be removed. Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 218 ++++++++++++++++++- arch/s390/crypto/crypt_s390.h | 1 + arch/s390/crypto/des_s390.c | 385 +++++++++++++++++++++++++++++++++- crypto/Kconfig | 2 + 4 files changed, 592 insertions(+), 14 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 220300e760d8..8f04b4e41b55 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -16,9 +16,9 @@ * */ +#include #include #include -#include #include "crypt_s390.h" #define AES_MIN_KEY_SIZE 16 @@ -34,6 +34,8 @@ int has_aes_256 = 0; struct s390_aes_ctx { u8 iv[AES_BLOCK_SIZE]; u8 key[AES_MAX_KEY_SIZE]; + long enc; + long dec; int key_len; }; @@ -244,6 +246,189 @@ static struct crypto_alg aes_alg = { } }; +static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KM_AES_128_ENCRYPT; + sctx->dec = KM_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KM_AES_192_ENCRYPT; + sctx->dec = KM_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KM_AES_256_ENCRYPT; + sctx->dec = KM_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; + + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + return ret; +} + +static int ecb_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk); +} + +static int ecb_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk); +} + +static struct crypto_alg ecb_aes_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ecb_aes_set_key, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, + } + } +}; + +static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KMC_AES_128_ENCRYPT; + sctx->dec = KMC_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMC_AES_192_ENCRYPT; + sctx->dec = KMC_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMC_AES_256_ENCRYPT; + sctx->dec = KMC_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + + if (!nbytes) + goto out; + + memcpy(param, walk->iv, AES_BLOCK_SIZE); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, param, AES_BLOCK_SIZE); + +out: + return ret; +} + +static int cbc_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk); +} + +static int cbc_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk); +} + +static struct crypto_alg cbc_aes_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_aes_set_key, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, + } + } +}; + static int __init aes_init(void) { int ret; @@ -259,13 +444,40 @@ static int __init aes_init(void) return -ENOSYS; ret = crypto_register_alg(&aes_alg); - if (ret != 0) - printk(KERN_INFO "crypt_s390: aes_s390 couldn't be loaded.\n"); + if (ret != 0) { + printk(KERN_INFO "crypt_s390: aes-s390 couldn't be loaded.\n"); + goto aes_err; + } + + ret = crypto_register_alg(&ecb_aes_alg); + if (ret != 0) { + printk(KERN_INFO + "crypt_s390: ecb-aes-s390 couldn't be loaded.\n"); + goto ecb_aes_err; + } + + ret = crypto_register_alg(&cbc_aes_alg); + if (ret != 0) { + printk(KERN_INFO + "crypt_s390: cbc-aes-s390 couldn't be loaded.\n"); + goto cbc_aes_err; + } + +out: return ret; + +cbc_aes_err: + crypto_unregister_alg(&ecb_aes_alg); +ecb_aes_err: + crypto_unregister_alg(&aes_alg); +aes_err: + goto out; } static void __exit aes_fini(void) { + crypto_unregister_alg(&cbc_aes_alg); + crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); } diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index d1d330797f75..efd836c2e4a6 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -21,6 +21,7 @@ #define CRYPT_S390_FUNC_MASK 0x00FF #define CRYPT_S390_PRIORITY 300 +#define CRYPT_S390_COMPOSITE_PRIORITY 400 /* s930 cryptographic operations */ enum crypt_s390_operations { diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 3fd5d37d5e05..a6d2385ccb7a 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -13,9 +13,10 @@ * (at your option) any later version. * */ + +#include #include #include -#include #include "crypt_s390.h" #include "crypto_des.h" @@ -157,6 +158,143 @@ static struct crypto_alg des_alg = { } }; +static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, + void *param, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; + + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + return ret; +} + +static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, + void *param, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + + if (!nbytes) + goto out; + + memcpy(param, walk->iv, DES_BLOCK_SIZE); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, param, out, in, n); + BUG_ON((ret < 0) || (ret != n)); + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, param, DES_BLOCK_SIZE); + +out: + return ret; +} + +static int ecb_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk); +} + +static int ecb_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk); +} + +static struct crypto_alg ecb_des_alg = { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_setkey, + .encrypt = ecb_des_encrypt, + .decrypt = ecb_des_decrypt, + } + } +}; + +static int cbc_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk); +} + +static int cbc_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk); +} + +static struct crypto_alg cbc_des_alg = { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = cbc_des_encrypt, + .decrypt = cbc_des_decrypt, + } + } +}; + /* * RFC2451: * @@ -295,6 +433,95 @@ static struct crypto_alg des3_128_alg = { } }; +static int ecb_des3_128_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk); +} + +static int ecb_des3_128_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk); +} + +static struct crypto_alg ecb_des3_128_alg = { + .cra_name = "ecb(des3_ede128)", + .cra_driver_name = "ecb-des3_ede128-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_128_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + ecb_des3_128_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_128_KEY_SIZE, + .max_keysize = DES3_128_KEY_SIZE, + .setkey = des3_128_setkey, + .encrypt = ecb_des3_128_encrypt, + .decrypt = ecb_des3_128_decrypt, + } + } +}; + +static int cbc_des3_128_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk); +} + +static int cbc_des3_128_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk); +} + +static struct crypto_alg cbc_des3_128_alg = { + .cra_name = "cbc(des3_ede128)", + .cra_driver_name = "cbc-des3_ede128-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_128_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + cbc_des3_128_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_128_KEY_SIZE, + .max_keysize = DES3_128_KEY_SIZE, + .ivsize = DES3_128_BLOCK_SIZE, + .setkey = des3_128_setkey, + .encrypt = cbc_des3_128_encrypt, + .decrypt = cbc_des3_128_decrypt, + } + } +}; + /* * RFC2451: * @@ -437,6 +664,95 @@ static struct crypto_alg des3_192_alg = { } }; +static int ecb_des3_192_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk); +} + +static int ecb_des3_192_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk); +} + +static struct crypto_alg ecb_des3_192_alg = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_192_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + ecb_des3_192_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_192_KEY_SIZE, + .max_keysize = DES3_192_KEY_SIZE, + .setkey = des3_192_setkey, + .encrypt = ecb_des3_192_encrypt, + .decrypt = ecb_des3_192_decrypt, + } + } +}; + +static int cbc_des3_192_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk); +} + +static int cbc_des3_192_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk); +} + +static struct crypto_alg cbc_des3_192_alg = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES3_192_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT( + cbc_des3_192_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = DES3_192_KEY_SIZE, + .max_keysize = DES3_192_KEY_SIZE, + .ivsize = DES3_192_BLOCK_SIZE, + .setkey = des3_192_setkey, + .encrypt = cbc_des3_192_encrypt, + .decrypt = cbc_des3_192_decrypt, + } + } +}; + static int init(void) { int ret = 0; @@ -446,22 +762,69 @@ static int init(void) !crypt_s390_func_available(KM_TDEA_192_ENCRYPT)) return -ENOSYS; - ret |= (crypto_register_alg(&des_alg) == 0) ? 0:1; - ret |= (crypto_register_alg(&des3_128_alg) == 0) ? 0:2; - ret |= (crypto_register_alg(&des3_192_alg) == 0) ? 0:4; - if (ret) { - crypto_unregister_alg(&des3_192_alg); - crypto_unregister_alg(&des3_128_alg); - crypto_unregister_alg(&des_alg); - return -EEXIST; - } - return 0; + ret = crypto_register_alg(&des_alg); + if (ret) + goto des_err; + ret = crypto_register_alg(&ecb_des_alg); + if (ret) + goto ecb_des_err; + ret = crypto_register_alg(&cbc_des_alg); + if (ret) + goto cbc_des_err; + + ret = crypto_register_alg(&des3_128_alg); + if (ret) + goto des3_128_err; + ret = crypto_register_alg(&ecb_des3_128_alg); + if (ret) + goto ecb_des3_128_err; + ret = crypto_register_alg(&cbc_des3_128_alg); + if (ret) + goto cbc_des3_128_err; + + ret = crypto_register_alg(&des3_192_alg); + if (ret) + goto des3_192_err; + ret = crypto_register_alg(&ecb_des3_192_alg); + if (ret) + goto ecb_des3_192_err; + ret = crypto_register_alg(&cbc_des3_192_alg); + if (ret) + goto cbc_des3_192_err; + +out: + return ret; + +cbc_des3_192_err: + crypto_unregister_alg(&ecb_des3_192_alg); +ecb_des3_192_err: + crypto_unregister_alg(&des3_192_alg); +des3_192_err: + crypto_unregister_alg(&cbc_des3_128_alg); +cbc_des3_128_err: + crypto_unregister_alg(&ecb_des3_128_alg); +ecb_des3_128_err: + crypto_unregister_alg(&des3_128_alg); +des3_128_err: + crypto_unregister_alg(&cbc_des_alg); +cbc_des_err: + crypto_unregister_alg(&ecb_des_alg); +ecb_des_err: + crypto_unregister_alg(&des_alg); +des_err: + goto out; } static void __exit fini(void) { + crypto_unregister_alg(&cbc_des3_192_alg); + crypto_unregister_alg(&ecb_des3_192_alg); crypto_unregister_alg(&des3_192_alg); + crypto_unregister_alg(&cbc_des3_128_alg); + crypto_unregister_alg(&ecb_des3_128_alg); crypto_unregister_alg(&des3_128_alg); + crypto_unregister_alg(&cbc_des_alg); + crypto_unregister_alg(&ecb_des_alg); crypto_unregister_alg(&des_alg); } diff --git a/crypto/Kconfig b/crypto/Kconfig index 90d467c99c2c..be5eb0cb7c30 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -150,6 +150,7 @@ config CRYPTO_DES_S390 tristate "DES and Triple DES cipher algorithms (s390)" depends on S390 select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER help DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). @@ -298,6 +299,7 @@ config CRYPTO_AES_S390 tristate "AES cipher algorithms (s390)" depends on S390 select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER help This is the s390 hardware accelerated implementation of the AES cipher algorithms (FIPS-197). AES uses the Rijndael From cba83564d112e4aec52227f68670f8dbd4d4ac89 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 08:26:09 +1000 Subject: [PATCH 0315/1063] [CRYPTO] tcrypt: Use block ciphers where applicable This patch converts tcrypt to use the new block cipher type where applicable. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 435 ++++++++++++++++++++++++++++-------------------- 1 file changed, 259 insertions(+), 176 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 56d0d8b3bcf2..5e2278069d22 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -17,6 +17,7 @@ * */ +#include #include #include #include @@ -54,8 +55,6 @@ */ #define ENCRYPT 1 #define DECRYPT 0 -#define MODE_ECB 1 -#define MODE_CBC 0 static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 }; @@ -250,28 +249,27 @@ out: #endif /* CONFIG_CRYPTO_HMAC */ -static void test_cipher(char *algo, int mode, int enc, +static void test_cipher(char *algo, int enc, struct cipher_testvec *template, unsigned int tcount) { unsigned int ret, i, j, k, temp; unsigned int tsize; + unsigned int iv_len; + unsigned int len; char *q; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; char *key; struct cipher_testvec *cipher_tv; + struct blkcipher_desc desc; struct scatterlist sg[8]; - const char *e, *m; + const char *e; if (enc == ENCRYPT) e = "encryption"; else e = "decryption"; - if (mode == MODE_ECB) - m = "ECB"; - else - m = "CBC"; - printk("\ntesting %s %s %s\n", algo, m, e); + printk("\ntesting %s %s\n", algo, e); tsize = sizeof (struct cipher_testvec); tsize *= tcount; @@ -285,15 +283,15 @@ static void test_cipher(char *algo, int mode, int enc, memcpy(tvmem, template, tsize); cipher_tv = (void *)tvmem; - if (mode) - tfm = crypto_alloc_tfm(algo, 0); - else - tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC); + tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC); - if (tfm == NULL) { - printk("failed to load transform for %s %s\n", algo, m); + if (IS_ERR(tfm)) { + printk("failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); return; } + desc.tfm = tfm; + desc.flags = 0; j = 0; for (i = 0; i < tcount; i++) { @@ -302,14 +300,17 @@ static void test_cipher(char *algo, int mode, int enc, printk("test %u (%d bit key):\n", j, cipher_tv[i].klen * 8); - tfm->crt_flags = 0; + crypto_blkcipher_clear_flags(tfm, ~0); if (cipher_tv[i].wk) - tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY; + crypto_blkcipher_set_flags( + tfm, CRYPTO_TFM_REQ_WEAK_KEY); key = cipher_tv[i].key; - ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen); + ret = crypto_blkcipher_setkey(tfm, key, + cipher_tv[i].klen); if (ret) { - printk("setkey() failed flags=%x\n", tfm->crt_flags); + printk("setkey() failed flags=%x\n", + crypto_blkcipher_get_flags(tfm)); if (!cipher_tv[i].fail) goto out; @@ -318,19 +319,19 @@ static void test_cipher(char *algo, int mode, int enc, sg_set_buf(&sg[0], cipher_tv[i].input, cipher_tv[i].ilen); - if (!mode) { - crypto_cipher_set_iv(tfm, cipher_tv[i].iv, - crypto_tfm_alg_ivsize(tfm)); - } - - if (enc) - ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen); - else - ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen); + iv_len = crypto_blkcipher_ivsize(tfm); + if (iv_len) + crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv, + iv_len); + len = cipher_tv[i].ilen; + ret = enc ? + crypto_blkcipher_encrypt(&desc, sg, sg, len) : + crypto_blkcipher_decrypt(&desc, sg, sg, len); if (ret) { - printk("%s () failed flags=%x\n", e, tfm->crt_flags); + printk("%s () failed flags=%x\n", e, + desc.flags); goto out; } @@ -343,7 +344,7 @@ static void test_cipher(char *algo, int mode, int enc, } } - printk("\ntesting %s %s %s across pages (chunking)\n", algo, m, e); + printk("\ntesting %s %s across pages (chunking)\n", algo, e); memset(xbuf, 0, XBUFSIZE); j = 0; @@ -353,14 +354,17 @@ static void test_cipher(char *algo, int mode, int enc, printk("test %u (%d bit key):\n", j, cipher_tv[i].klen * 8); - tfm->crt_flags = 0; + crypto_blkcipher_clear_flags(tfm, ~0); if (cipher_tv[i].wk) - tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY; + crypto_blkcipher_set_flags( + tfm, CRYPTO_TFM_REQ_WEAK_KEY); key = cipher_tv[i].key; - ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen); + ret = crypto_blkcipher_setkey(tfm, key, + cipher_tv[i].klen); if (ret) { - printk("setkey() failed flags=%x\n", tfm->crt_flags); + printk("setkey() failed flags=%x\n", + crypto_blkcipher_get_flags(tfm)); if (!cipher_tv[i].fail) goto out; @@ -376,18 +380,19 @@ static void test_cipher(char *algo, int mode, int enc, cipher_tv[i].tap[k]); } - if (!mode) { - crypto_cipher_set_iv(tfm, cipher_tv[i].iv, - crypto_tfm_alg_ivsize(tfm)); - } + iv_len = crypto_blkcipher_ivsize(tfm); + if (iv_len) + crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv, + iv_len); - if (enc) - ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen); - else - ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen); + len = cipher_tv[i].ilen; + ret = enc ? + crypto_blkcipher_encrypt(&desc, sg, sg, len) : + crypto_blkcipher_decrypt(&desc, sg, sg, len); if (ret) { - printk("%s () failed flags=%x\n", e, tfm->crt_flags); + printk("%s () failed flags=%x\n", e, + desc.flags); goto out; } @@ -406,10 +411,10 @@ static void test_cipher(char *algo, int mode, int enc, } out: - crypto_free_tfm(tfm); + crypto_free_blkcipher(tfm); } -static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p, +static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc, char *p, int blen, int sec) { struct scatterlist sg[1]; @@ -422,9 +427,9 @@ static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p, for (start = jiffies, end = start + sec * HZ, bcount = 0; time_before(jiffies, end); bcount++) { if (enc) - ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); else - ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); if (ret) return ret; @@ -435,7 +440,7 @@ static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p, return 0; } -static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p, +static int test_cipher_cycles(struct blkcipher_desc *desc, int enc, char *p, int blen) { struct scatterlist sg[1]; @@ -451,9 +456,9 @@ static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p, /* Warm-up run. */ for (i = 0; i < 4; i++) { if (enc) - ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); else - ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); if (ret) goto out; @@ -465,9 +470,9 @@ static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p, start = get_cycles(); if (enc) - ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_encrypt(desc, sg, sg, blen); else - ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + ret = crypto_blkcipher_decrypt(desc, sg, sg, blen); end = get_cycles(); if (ret) @@ -487,35 +492,32 @@ out: return ret; } -static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec, +static void test_cipher_speed(char *algo, int enc, unsigned int sec, struct cipher_testvec *template, unsigned int tcount, struct cipher_speed *speed) { unsigned int ret, i, j, iv_len; unsigned char *key, *p, iv[128]; - struct crypto_tfm *tfm; - const char *e, *m; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; + const char *e; if (enc == ENCRYPT) e = "encryption"; else e = "decryption"; - if (mode == MODE_ECB) - m = "ECB"; - else - m = "CBC"; - printk("\ntesting speed of %s %s %s\n", algo, m, e); + printk("\ntesting speed of %s %s\n", algo, e); - if (mode) - tfm = crypto_alloc_tfm(algo, 0); - else - tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC); + tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC); - if (tfm == NULL) { - printk("failed to load transform for %s %s\n", algo, m); + if (IS_ERR(tfm)) { + printk("failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); return; } + desc.tfm = tfm; + desc.flags = 0; for (i = 0; speed[i].klen != 0; i++) { if ((speed[i].blen + speed[i].klen) > TVMEMSIZE) { @@ -539,32 +541,33 @@ static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec, } p = (unsigned char *)tvmem + speed[i].klen; - ret = crypto_cipher_setkey(tfm, key, speed[i].klen); + ret = crypto_blkcipher_setkey(tfm, key, speed[i].klen); if (ret) { - printk("setkey() failed flags=%x\n", tfm->crt_flags); + printk("setkey() failed flags=%x\n", + crypto_blkcipher_get_flags(tfm)); goto out; } - if (!mode) { - iv_len = crypto_tfm_alg_ivsize(tfm); + iv_len = crypto_blkcipher_ivsize(tfm); + if (iv_len) { memset(&iv, 0xff, iv_len); - crypto_cipher_set_iv(tfm, iv, iv_len); + crypto_blkcipher_set_iv(tfm, iv, iv_len); } if (sec) - ret = test_cipher_jiffies(tfm, enc, p, speed[i].blen, + ret = test_cipher_jiffies(&desc, enc, p, speed[i].blen, sec); else - ret = test_cipher_cycles(tfm, enc, p, speed[i].blen); + ret = test_cipher_cycles(&desc, enc, p, speed[i].blen); if (ret) { - printk("%s() failed flags=%x\n", e, tfm->crt_flags); + printk("%s() failed flags=%x\n", e, desc.flags); break; } } out: - crypto_free_tfm(tfm); + crypto_free_blkcipher(tfm); } static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen, @@ -784,79 +787,119 @@ static void do_test(void) test_hash("sha1", sha1_tv_template, SHA1_TEST_VECTORS); //DES - test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS); - test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template, + DES_ENC_TEST_VECTORS); + test_cipher("ecb(des)", DECRYPT, des_dec_tv_template, + DES_DEC_TEST_VECTORS); + test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template, + DES_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template, + DES_CBC_DEC_TEST_VECTORS); //DES3_EDE - test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS); - test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); + test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template, + DES3_EDE_ENC_TEST_VECTORS); + test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template, + DES3_EDE_DEC_TEST_VECTORS); test_hash("md4", md4_tv_template, MD4_TEST_VECTORS); test_hash("sha256", sha256_tv_template, SHA256_TEST_VECTORS); //BLOWFISH - test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS); - test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS); - test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS); - test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template, + BF_ENC_TEST_VECTORS); + test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template, + BF_DEC_TEST_VECTORS); + test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template, + BF_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template, + BF_CBC_DEC_TEST_VECTORS); //TWOFISH - test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS); - test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS); - test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS); - test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template, + TF_ENC_TEST_VECTORS); + test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template, + TF_DEC_TEST_VECTORS); + test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template, + TF_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template, + TF_CBC_DEC_TEST_VECTORS); //SERPENT - test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); - test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); + test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template, + SERPENT_ENC_TEST_VECTORS); + test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template, + SERPENT_DEC_TEST_VECTORS); //TNEPRES - test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS); - test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS); + test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template, + TNEPRES_ENC_TEST_VECTORS); + test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template, + TNEPRES_DEC_TEST_VECTORS); //AES - test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS); - test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS); - test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS); - test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template, + AES_ENC_TEST_VECTORS); + test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template, + AES_DEC_TEST_VECTORS); + test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template, + AES_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template, + AES_CBC_DEC_TEST_VECTORS); //CAST5 - test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS); - test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS); + test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template, + CAST5_ENC_TEST_VECTORS); + test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template, + CAST5_DEC_TEST_VECTORS); //CAST6 - test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS); - test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS); + test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template, + CAST6_ENC_TEST_VECTORS); + test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template, + CAST6_DEC_TEST_VECTORS); //ARC4 - test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS); - test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS); + test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template, + ARC4_ENC_TEST_VECTORS); + test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template, + ARC4_DEC_TEST_VECTORS); //TEA - test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS); - test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS); + test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template, + TEA_ENC_TEST_VECTORS); + test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template, + TEA_DEC_TEST_VECTORS); //XTEA - test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS); - test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS); + test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template, + XTEA_ENC_TEST_VECTORS); + test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template, + XTEA_DEC_TEST_VECTORS); //KHAZAD - test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS); - test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS); + test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template, + KHAZAD_ENC_TEST_VECTORS); + test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template, + KHAZAD_DEC_TEST_VECTORS); //ANUBIS - test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS); - test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS); - test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); - test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template, + ANUBIS_ENC_TEST_VECTORS); + test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template, + ANUBIS_DEC_TEST_VECTORS); + test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template, + ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template, + ANUBIS_CBC_ENC_TEST_VECTORS); //XETA - test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS); - test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS); + test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template, + XETA_ENC_TEST_VECTORS); + test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template, + XETA_DEC_TEST_VECTORS); test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS); test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS); @@ -886,15 +929,21 @@ static void do_test(void) break; case 3: - test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS); - test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template, + DES_ENC_TEST_VECTORS); + test_cipher("ecb(des)", DECRYPT, des_dec_tv_template, + DES_DEC_TEST_VECTORS); + test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template, + DES_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template, + DES_CBC_DEC_TEST_VECTORS); break; case 4: - test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS); - test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); + test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template, + DES3_EDE_ENC_TEST_VECTORS); + test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template, + DES3_EDE_DEC_TEST_VECTORS); break; case 5: @@ -906,29 +955,43 @@ static void do_test(void) break; case 7: - test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS); - test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS); - test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS); - test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template, + BF_ENC_TEST_VECTORS); + test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template, + BF_DEC_TEST_VECTORS); + test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template, + BF_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template, + BF_CBC_DEC_TEST_VECTORS); break; case 8: - test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS); - test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS); - test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS); - test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template, + TF_ENC_TEST_VECTORS); + test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template, + TF_DEC_TEST_VECTORS); + test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template, + TF_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template, + TF_CBC_DEC_TEST_VECTORS); break; case 9: - test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); - test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); + test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template, + SERPENT_ENC_TEST_VECTORS); + test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template, + SERPENT_DEC_TEST_VECTORS); break; case 10: - test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS); - test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS); - test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS); - test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS); + test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template, + AES_ENC_TEST_VECTORS); + test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template, + AES_DEC_TEST_VECTORS); + test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template, + AES_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template, + AES_CBC_DEC_TEST_VECTORS); break; case 11: @@ -944,18 +1007,24 @@ static void do_test(void) break; case 14: - test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS); - test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS); + test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template, + CAST5_ENC_TEST_VECTORS); + test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template, + CAST5_DEC_TEST_VECTORS); break; case 15: - test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS); - test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS); + test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template, + CAST6_ENC_TEST_VECTORS); + test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template, + CAST6_DEC_TEST_VECTORS); break; case 16: - test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS); - test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS); + test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template, + ARC4_ENC_TEST_VECTORS); + test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template, + ARC4_DEC_TEST_VECTORS); break; case 17: @@ -967,18 +1036,24 @@ static void do_test(void) break; case 19: - test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS); - test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS); + test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template, + TEA_ENC_TEST_VECTORS); + test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template, + TEA_DEC_TEST_VECTORS); break; case 20: - test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS); - test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS); + test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template, + XTEA_ENC_TEST_VECTORS); + test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template, + XTEA_DEC_TEST_VECTORS); break; case 21: - test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS); - test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS); + test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template, + KHAZAD_ENC_TEST_VECTORS); + test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template, + KHAZAD_DEC_TEST_VECTORS); break; case 22: @@ -994,15 +1069,21 @@ static void do_test(void) break; case 25: - test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS); - test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS); + test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template, + TNEPRES_ENC_TEST_VECTORS); + test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template, + TNEPRES_DEC_TEST_VECTORS); break; case 26: - test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS); - test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS); - test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); - test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template, + ANUBIS_ENC_TEST_VECTORS); + test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template, + ANUBIS_DEC_TEST_VECTORS); + test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template, + ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template, + ANUBIS_CBC_ENC_TEST_VECTORS); break; case 27: @@ -1019,8 +1100,10 @@ static void do_test(void) break; case 30: - test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS); - test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS); + test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template, + XETA_ENC_TEST_VECTORS); + test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template, + XETA_DEC_TEST_VECTORS); break; #ifdef CONFIG_CRYPTO_HMAC @@ -1039,65 +1122,65 @@ static void do_test(void) #endif case 200: - test_cipher_speed("aes", MODE_ECB, ENCRYPT, sec, NULL, 0, + test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, aes_speed_template); - test_cipher_speed("aes", MODE_ECB, DECRYPT, sec, NULL, 0, + test_cipher_speed("ecb(aes)", DECRYPT, sec, NULL, 0, aes_speed_template); - test_cipher_speed("aes", MODE_CBC, ENCRYPT, sec, NULL, 0, + test_cipher_speed("cbc(aes)", ENCRYPT, sec, NULL, 0, aes_speed_template); - test_cipher_speed("aes", MODE_CBC, DECRYPT, sec, NULL, 0, + test_cipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0, aes_speed_template); break; case 201: - test_cipher_speed("des3_ede", MODE_ECB, ENCRYPT, sec, + test_cipher_speed("ecb(des3_ede)", ENCRYPT, sec, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS, des3_ede_speed_template); - test_cipher_speed("des3_ede", MODE_ECB, DECRYPT, sec, + test_cipher_speed("ecb(des3_ede)", DECRYPT, sec, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS, des3_ede_speed_template); - test_cipher_speed("des3_ede", MODE_CBC, ENCRYPT, sec, + test_cipher_speed("cbc(des3_ede)", ENCRYPT, sec, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS, des3_ede_speed_template); - test_cipher_speed("des3_ede", MODE_CBC, DECRYPT, sec, + test_cipher_speed("cbc(des3_ede)", DECRYPT, sec, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS, des3_ede_speed_template); break; case 202: - test_cipher_speed("twofish", MODE_ECB, ENCRYPT, sec, NULL, 0, + test_cipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0, twofish_speed_template); - test_cipher_speed("twofish", MODE_ECB, DECRYPT, sec, NULL, 0, + test_cipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0, twofish_speed_template); - test_cipher_speed("twofish", MODE_CBC, ENCRYPT, sec, NULL, 0, + test_cipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0, twofish_speed_template); - test_cipher_speed("twofish", MODE_CBC, DECRYPT, sec, NULL, 0, + test_cipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0, twofish_speed_template); break; case 203: - test_cipher_speed("blowfish", MODE_ECB, ENCRYPT, sec, NULL, 0, + test_cipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0, blowfish_speed_template); - test_cipher_speed("blowfish", MODE_ECB, DECRYPT, sec, NULL, 0, + test_cipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0, blowfish_speed_template); - test_cipher_speed("blowfish", MODE_CBC, ENCRYPT, sec, NULL, 0, + test_cipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0, blowfish_speed_template); - test_cipher_speed("blowfish", MODE_CBC, DECRYPT, sec, NULL, 0, + test_cipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0, blowfish_speed_template); break; case 204: - test_cipher_speed("des", MODE_ECB, ENCRYPT, sec, NULL, 0, + test_cipher_speed("ecb(des)", ENCRYPT, sec, NULL, 0, des_speed_template); - test_cipher_speed("des", MODE_ECB, DECRYPT, sec, NULL, 0, + test_cipher_speed("ecb(des)", DECRYPT, sec, NULL, 0, des_speed_template); - test_cipher_speed("des", MODE_CBC, ENCRYPT, sec, NULL, 0, + test_cipher_speed("cbc(des)", ENCRYPT, sec, NULL, 0, des_speed_template); - test_cipher_speed("des", MODE_CBC, DECRYPT, sec, NULL, 0, + test_cipher_speed("cbc(des)", DECRYPT, sec, NULL, 0, des_speed_template); break; From 69affe7fc52c14e4b81408a2076e9e58ba4af60a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Sep 2006 11:45:53 +1000 Subject: [PATCH 0316/1063] [BLOCK] cryptoloop: Use block ciphers where applicable This patch converts cryptoloop to use the new block cipher type where applicable. As a result the ECB-specific and CBC-specific transfer functions have been merged. Signed-off-by: Herbert Xu --- drivers/block/cryptoloop.c | 158 +++++++++++++------------------------ 1 file changed, 53 insertions(+), 105 deletions(-) diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 3d4261c39f16..40535036e893 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -40,11 +40,13 @@ static int cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) { int err = -EINVAL; + int cipher_len; + int mode_len; char cms[LO_NAME_SIZE]; /* cipher-mode string */ char *cipher; char *mode; char *cmsp = cms; /* c-m string pointer */ - struct crypto_tfm *tfm = NULL; + struct crypto_blkcipher *tfm; /* encryption breaks for non sector aligned offsets */ @@ -53,20 +55,39 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE); cms[LO_NAME_SIZE - 1] = 0; - cipher = strsep(&cmsp, "-"); - mode = strsep(&cmsp, "-"); - if (mode == NULL || strcmp(mode, "cbc") == 0) - tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_CBC | - CRYPTO_TFM_REQ_MAY_SLEEP); - else if (strcmp(mode, "ecb") == 0) - tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_ECB | - CRYPTO_TFM_REQ_MAY_SLEEP); - if (tfm == NULL) + cipher = cmsp; + cipher_len = strcspn(cmsp, "-"); + + mode = cmsp + cipher_len; + mode_len = 0; + if (*mode) { + mode++; + mode_len = strcspn(mode, "-"); + } + + if (!mode_len) { + mode = "cbc"; + mode_len = 3; + } + + if (cipher_len + mode_len + 3 > LO_NAME_SIZE) return -EINVAL; - err = tfm->crt_u.cipher.cit_setkey(tfm, info->lo_encrypt_key, - info->lo_encrypt_key_size); + memmove(cms, mode, mode_len); + cmsp = cms + mode_len; + *cmsp++ = '('; + memcpy(cmsp, info->lo_crypt_name, cipher_len); + cmsp += cipher_len; + *cmsp++ = ')'; + *cmsp = 0; + + tfm = crypto_alloc_blkcipher(cms, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + err = crypto_blkcipher_setkey(tfm, info->lo_encrypt_key, + info->lo_encrypt_key_size); if (err != 0) goto out_free_tfm; @@ -75,99 +96,49 @@ cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) return 0; out_free_tfm: - crypto_free_tfm(tfm); + crypto_free_blkcipher(tfm); out: return err; } -typedef int (*encdec_ecb_t)(struct crypto_tfm *tfm, +typedef int (*encdec_cbc_t)(struct blkcipher_desc *desc, struct scatterlist *sg_out, struct scatterlist *sg_in, unsigned int nsg); - static int -cryptoloop_transfer_ecb(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t IV) +cryptoloop_transfer(struct loop_device *lo, int cmd, + struct page *raw_page, unsigned raw_off, + struct page *loop_page, unsigned loop_off, + int size, sector_t IV) { - struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data; - struct scatterlist sg_out = { NULL, }; - struct scatterlist sg_in = { NULL, }; - - encdec_ecb_t encdecfunc; - struct page *in_page, *out_page; - unsigned in_offs, out_offs; - - if (cmd == READ) { - in_page = raw_page; - in_offs = raw_off; - out_page = loop_page; - out_offs = loop_off; - encdecfunc = tfm->crt_u.cipher.cit_decrypt; - } else { - in_page = loop_page; - in_offs = loop_off; - out_page = raw_page; - out_offs = raw_off; - encdecfunc = tfm->crt_u.cipher.cit_encrypt; - } - - while (size > 0) { - const int sz = min(size, LOOP_IV_SECTOR_SIZE); - - sg_in.page = in_page; - sg_in.offset = in_offs; - sg_in.length = sz; - - sg_out.page = out_page; - sg_out.offset = out_offs; - sg_out.length = sz; - - encdecfunc(tfm, &sg_out, &sg_in, sz); - - size -= sz; - in_offs += sz; - out_offs += sz; - } - - return 0; -} - -typedef int (*encdec_cbc_t)(struct crypto_tfm *tfm, - struct scatterlist *sg_out, - struct scatterlist *sg_in, - unsigned int nsg, u8 *iv); - -static int -cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t IV) -{ - struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data; + struct crypto_blkcipher *tfm = lo->key_data; + struct blkcipher_desc desc = { + .tfm = tfm, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP, + }; struct scatterlist sg_out = { NULL, }; struct scatterlist sg_in = { NULL, }; encdec_cbc_t encdecfunc; struct page *in_page, *out_page; unsigned in_offs, out_offs; + int err; if (cmd == READ) { in_page = raw_page; in_offs = raw_off; out_page = loop_page; out_offs = loop_off; - encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv; + encdecfunc = crypto_blkcipher_crt(tfm)->decrypt; } else { in_page = loop_page; in_offs = loop_off; out_page = raw_page; out_offs = raw_off; - encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv; + encdecfunc = crypto_blkcipher_crt(tfm)->encrypt; } while (size > 0) { @@ -183,7 +154,10 @@ cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, sg_out.offset = out_offs; sg_out.length = sz; - encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv); + desc.info = iv; + err = encdecfunc(&desc, &sg_out, &sg_in, sz); + if (err) + return err; IV++; size -= sz; @@ -194,32 +168,6 @@ cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, return 0; } -static int -cryptoloop_transfer(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t IV) -{ - struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data; - if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB) - { - lo->transfer = cryptoloop_transfer_ecb; - return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off, - loop_page, loop_off, size, IV); - } - if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC) - { - lo->transfer = cryptoloop_transfer_cbc; - return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off, - loop_page, loop_off, size, IV); - } - - /* This is not supposed to happen */ - - printk( KERN_ERR "cryptoloop: unsupported cipher mode in cryptoloop_transfer!\n"); - return -EINVAL; -} - static int cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg) { @@ -229,9 +177,9 @@ cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg) static int cryptoloop_release(struct loop_device *lo) { - struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data; + struct crypto_blkcipher *tfm = lo->key_data; if (tfm != NULL) { - crypto_free_tfm(tfm); + crypto_free_blkcipher(tfm); lo->key_data = NULL; return 0; } From d1806f6a97a536b043fe50e6d8a25b061755cf50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 20:29:17 +1000 Subject: [PATCH 0317/1063] [BLOCK] dm-crypt: Use block ciphers where applicable This patch converts dm-crypt to use the new block cipher type where applicable. It also changes simple cipher operations to use the new encrypt_one/decrypt_one interface. Signed-off-by: Herbert Xu --- drivers/md/dm-crypt.c | 112 ++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6022ed12a795..91d4081cb00e 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -5,6 +5,7 @@ * This file is released under the GPL. */ +#include #include #include #include @@ -78,11 +79,13 @@ struct crypt_config { */ struct crypt_iv_operations *iv_gen_ops; char *iv_mode; - void *iv_gen_private; + struct crypto_cipher *iv_gen_private; sector_t iv_offset; unsigned int iv_size; - struct crypto_tfm *tfm; + char cipher[CRYPTO_MAX_ALG_NAME]; + char chainmode[CRYPTO_MAX_ALG_NAME]; + struct crypto_blkcipher *tfm; unsigned int key_size; u8 key[0]; }; @@ -118,11 +121,12 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - struct crypto_tfm *essiv_tfm; + struct crypto_cipher *essiv_tfm; struct crypto_tfm *hash_tfm; struct scatterlist sg; unsigned int saltsize; u8 *salt; + int err; if (opts == NULL) { ti->error = "Digest algorithm missing for ESSIV mode"; @@ -155,51 +159,44 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, crypto_free_tfm(hash_tfm); /* Setup the essiv_tfm with the given salt */ - essiv_tfm = crypto_alloc_tfm(crypto_tfm_alg_name(cc->tfm), - CRYPTO_TFM_MODE_ECB | - CRYPTO_TFM_REQ_MAY_SLEEP); - if (essiv_tfm == NULL) { + essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(essiv_tfm)) { ti->error = "Error allocating crypto tfm for ESSIV"; kfree(salt); - return -EINVAL; + return PTR_ERR(essiv_tfm); } - if (crypto_tfm_alg_blocksize(essiv_tfm) - != crypto_tfm_alg_ivsize(cc->tfm)) { + if (crypto_cipher_blocksize(essiv_tfm) != + crypto_blkcipher_ivsize(cc->tfm)) { ti->error = "Block size of ESSIV cipher does " "not match IV size of block cipher"; - crypto_free_tfm(essiv_tfm); + crypto_free_cipher(essiv_tfm); kfree(salt); return -EINVAL; } - if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) { + err = crypto_cipher_setkey(essiv_tfm, salt, saltsize); + if (err) { ti->error = "Failed to set key for ESSIV cipher"; - crypto_free_tfm(essiv_tfm); + crypto_free_cipher(essiv_tfm); kfree(salt); - return -EINVAL; + return err; } kfree(salt); - cc->iv_gen_private = (void *)essiv_tfm; + cc->iv_gen_private = essiv_tfm; return 0; } static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - crypto_free_tfm((struct crypto_tfm *)cc->iv_gen_private); + crypto_free_cipher(cc->iv_gen_private); cc->iv_gen_private = NULL; } static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { - struct scatterlist sg; - memset(iv, 0, cc->iv_size); *(u64 *)iv = cpu_to_le64(sector); - - sg_set_buf(&sg, iv, cc->iv_size); - crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private, - &sg, &sg, cc->iv_size); - + crypto_cipher_encrypt_one(cc->iv_gen_private, iv, iv); return 0; } @@ -220,6 +217,11 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, int write, sector_t sector) { u8 iv[cc->iv_size]; + struct blkcipher_desc desc = { + .tfm = cc->tfm, + .info = iv, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP, + }; int r; if (cc->iv_gen_ops) { @@ -228,14 +230,14 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, return r; if (write) - r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv); + r = crypto_blkcipher_encrypt_iv(&desc, out, in, length); else - r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv); + r = crypto_blkcipher_decrypt_iv(&desc, out, in, length); } else { if (write) - r = crypto_cipher_encrypt(cc->tfm, out, in, length); + r = crypto_blkcipher_encrypt(&desc, out, in, length); else - r = crypto_cipher_decrypt(cc->tfm, out, in, length); + r = crypto_blkcipher_decrypt(&desc, out, in, length); } return r; @@ -510,13 +512,12 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; char *tmp; char *cipher; char *chainmode; char *ivmode; char *ivopts; - unsigned int crypto_flags; unsigned int key_size; unsigned long long tmpll; @@ -556,31 +557,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ivmode = "plain"; } - /* Choose crypto_flags according to chainmode */ - if (strcmp(chainmode, "cbc") == 0) - crypto_flags = CRYPTO_TFM_MODE_CBC; - else if (strcmp(chainmode, "ecb") == 0) - crypto_flags = CRYPTO_TFM_MODE_ECB; - else { - ti->error = "Unknown chaining mode"; - goto bad1; - } - - if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) { + if (strcmp(chainmode, "ecb") && !ivmode) { ti->error = "This chaining mode requires an IV mechanism"; goto bad1; } - tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP); - if (!tfm) { + if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, + cipher) >= CRYPTO_MAX_ALG_NAME) { + ti->error = "Chain mode + cipher name is too long"; + goto bad1; + } + + tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { ti->error = "Error allocating crypto tfm"; goto bad1; } - if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) { - ti->error = "Expected cipher algorithm"; - goto bad2; - } + strcpy(cc->cipher, cipher); + strcpy(cc->chainmode, chainmode); cc->tfm = tfm; /* @@ -603,12 +598,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) goto bad2; - if (tfm->crt_cipher.cit_decrypt_iv && tfm->crt_cipher.cit_encrypt_iv) + cc->iv_size = crypto_blkcipher_ivsize(tfm); + if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ - cc->iv_size = max(crypto_tfm_alg_ivsize(tfm), + cc->iv_size = max(cc->iv_size, (unsigned int)(sizeof(u64) / sizeof(u8))); else { - cc->iv_size = 0; if (cc->iv_gen_ops) { DMWARN("Selected cipher does not support IVs"); if (cc->iv_gen_ops->dtr) @@ -629,7 +624,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad4; } - if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) { + if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) { ti->error = "Error setting key"; goto bad5; } @@ -675,7 +670,7 @@ bad3: if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); bad2: - crypto_free_tfm(tfm); + crypto_free_blkcipher(tfm); bad1: /* Must zero key material before freeing */ memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); @@ -693,7 +688,7 @@ static void crypt_dtr(struct dm_target *ti) kfree(cc->iv_mode); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); - crypto_free_tfm(cc->tfm); + crypto_free_blkcipher(cc->tfm); dm_put_device(ti, cc->dev); /* Must zero key material before freeing */ @@ -858,18 +853,9 @@ static int crypt_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - cipher = crypto_tfm_alg_name(cc->tfm); + cipher = crypto_blkcipher_name(cc->tfm); - switch(cc->tfm->crt_cipher.cit_mode) { - case CRYPTO_TFM_MODE_CBC: - chainmode = "cbc"; - break; - case CRYPTO_TFM_MODE_ECB: - chainmode = "ecb"; - break; - default: - BUG(); - } + chainmode = cc->chainmode; if (cc->iv_mode) DMEMIT("%s-%s-%s ", cipher, chainmode, cc->iv_mode); From 04ff12609445c7b462d7fc7f2d30dad442c922f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 08:50:00 +1000 Subject: [PATCH 0318/1063] [IPSEC]: Add compatibility algorithm name support This patch adds a compatibility name field for each IPsec algorithm. This is needed when parameterised algorithms are used. For example, "md5" will become "hmac(md5)", and "aes" will become "cbc(aes)". Signed-off-by: Herbert Xu --- include/net/xfrm.h | 1 + net/xfrm/xfrm_algo.c | 3 ++- net/xfrm/xfrm_user.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 10396b4bde14..e9114e41affc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -854,6 +854,7 @@ struct xfrm_algo_comp_info { struct xfrm_algo_desc { char *name; + char *compat; u8 available:1; union { struct xfrm_algo_auth_info auth; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 04e1aea58bc9..b68974b38741 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -359,7 +359,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, return NULL; for (i = 0; i < entries; i++) { - if (strcmp(name, list[i].name)) + if (strcmp(name, list[i].name) && + (!list[i].compat || strcmp(name, list[i].compat))) continue; if (list[i].available) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7d18ca03c80d..fa79ddc4239e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -213,6 +213,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOMEM; memcpy(p, ualg, len); + strcpy(p->alg_name, algo->name); *algpp = p; return 0; } From 6b7326c8497f954c2cfcb4c49fe42be5b80887bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 15:41:01 +1000 Subject: [PATCH 0319/1063] [IPSEC] ESP: Use block ciphers where applicable This patch converts IPSec/ESP to use the new block cipher type where applicable. Similar to the HMAC conversion, existing algorithm names have been kept for compatibility. Signed-off-by: Herbert Xu --- include/net/esp.h | 2 +- net/ipv4/Kconfig | 1 + net/ipv4/esp4.c | 49 ++++++++++++++++++++++++++------------------ net/ipv6/Kconfig | 1 + net/ipv6/esp6.c | 48 +++++++++++++++++++++++++------------------ net/xfrm/xfrm_algo.c | 24 ++++++++++++++-------- 6 files changed, 76 insertions(+), 49 deletions(-) diff --git a/include/net/esp.h b/include/net/esp.h index 6eb837973c84..af2ff18700c7 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -22,7 +22,7 @@ struct esp_data * >= crypto_tfm_alg_ivsize(tfm). */ int ivlen; int padlen; /* 0..255 */ - struct crypto_tfm *tfm; /* crypto handle */ + struct crypto_blkcipher *tfm; /* crypto handle */ } conf; /* Integrity. It is active when icv_full_len != 0 */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8514106761b0..3b5d504a74be 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -386,6 +386,7 @@ config INET_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fc2f8ce441de..7c63ae494742 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -16,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct iphdr *top_iph; struct ip_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -36,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -92,7 +96,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) xfrm_aevent_doreplay(x); if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -103,14 +107,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { @@ -121,8 +128,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) ip_send_check(top_iph); - err = 0; - error: return err; } @@ -137,8 +142,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -146,6 +153,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr[2]; struct scatterlist *sg; int padlen; + int err; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; @@ -178,7 +186,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); sg = &esp->sgbuf[0]; @@ -188,9 +196,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(err)) + return err; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -254,7 +264,7 @@ out: static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); @@ -293,7 +303,7 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; @@ -307,6 +317,7 @@ static void esp_destroy(struct xfrm_state *x) static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -351,13 +362,11 @@ static int esp_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); @@ -365,7 +374,7 @@ static int esp_init_state(struct xfrm_state *x) goto error; get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; if (x->props.mode) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e923d4dea418..0ba06c0c5d39 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -77,6 +77,7 @@ config INET6_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a278d5e862fe..46a7e687948e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,6 +24,7 @@ * This file is derived from net/ipv4/esp.c */ +#include #include #include #include @@ -44,7 +45,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; struct ipv6hdr *top_iph; struct ipv6_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -67,7 +69,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -96,7 +100,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) xfrm_aevent_doreplay(x); if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -107,14 +111,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { @@ -123,8 +130,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) pskb_put(skb, trailer, alen); } - err = 0; - error: return err; } @@ -134,8 +139,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *iph; struct ipv6_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; @@ -182,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); { u8 nexthdr[2]; @@ -197,9 +204,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } } skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(ret)) + goto out; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -225,7 +234,7 @@ out: static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); @@ -266,7 +275,7 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; @@ -280,6 +289,7 @@ static void esp6_destroy(struct xfrm_state *x) static int esp6_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -327,13 +337,11 @@ static int esp6_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); @@ -341,7 +349,7 @@ static int esp6_init_state(struct xfrm_state *x) goto error; get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; if (x->props.mode) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index b68974b38741..9b03d8497fba 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -118,7 +118,8 @@ static struct xfrm_algo_desc aalg_list[] = { static struct xfrm_algo_desc ealg_list[] = { { - .name = "cipher_null", + .name = "ecb(cipher_null)", + .compat = "cipher_null", .uinfo = { .encr = { @@ -135,7 +136,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des", + .name = "cbc(des)", + .compat = "des", .uinfo = { .encr = { @@ -152,7 +154,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des3_ede", + .name = "cbc(des3_ede)", + .compat = "des3_ede", .uinfo = { .encr = { @@ -169,7 +172,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cast128", + .name = "cbc(cast128)", + .compat = "cast128", .uinfo = { .encr = { @@ -186,7 +190,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "blowfish", + .name = "cbc(blowfish)", + .compat = "blowfish", .uinfo = { .encr = { @@ -203,7 +208,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "aes", + .name = "cbc(aes)", + .compat = "aes", .uinfo = { .encr = { @@ -220,7 +226,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "serpent", + .name = "cbc(serpent)", + .compat = "serpent", .uinfo = { .encr = { @@ -237,7 +244,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "twofish", + .name = "cbc(twofish)", + .compat = "twofish", .uinfo = { .encr = { From 378c6697a282c383d89428380a3405bf95189347 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 20:33:54 +1000 Subject: [PATCH 0320/1063] [SUNRPC] GSS: Use block ciphers where applicable This patch converts SUNRPC/GSS to use the new block cipher type where applicable. Signed-off-by: Herbert Xu --- include/linux/sunrpc/gss_krb5.h | 19 ++++----- include/linux/sunrpc/gss_spkm3.h | 4 +- net/sunrpc/auth_gss/gss_krb5_crypto.c | 57 +++++++++++++++------------ net/sunrpc/auth_gss/gss_krb5_mech.c | 24 +++++------ net/sunrpc/auth_gss/gss_krb5_seqnum.c | 4 +- net/sunrpc/auth_gss/gss_krb5_wrap.c | 4 +- net/sunrpc/auth_gss/gss_spkm3_mech.c | 29 +++++++------- 7 files changed, 76 insertions(+), 65 deletions(-) diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 1279280d7196..e30ba201910a 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -46,8 +46,8 @@ struct krb5_ctx { unsigned char seed[16]; int signalg; int sealalg; - struct crypto_tfm *enc; - struct crypto_tfm *seq; + struct crypto_blkcipher *enc; + struct crypto_blkcipher *seq; s32 endtime; u32 seq_send; struct xdr_netobj mech_used; @@ -136,26 +136,27 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, u32 -krb5_encrypt(struct crypto_tfm * key, +krb5_encrypt(struct crypto_blkcipher *key, void *iv, void *in, void *out, int length); u32 -krb5_decrypt(struct crypto_tfm * key, +krb5_decrypt(struct crypto_blkcipher *key, void *iv, void *in, void *out, int length); int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, - struct page **pages); +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf, + int offset, struct page **pages); int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, + int offset); s32 -krb5_make_seq_num(struct crypto_tfm * key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf); s32 -krb5_get_seq_num(struct crypto_tfm * key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum); diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index 336e218c2782..2cf3fbb40b4f 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -19,9 +19,9 @@ struct spkm3_ctx { unsigned int req_flags ; struct xdr_netobj share_key; int conf_alg; - struct crypto_tfm* derived_conf_key; + struct crypto_blkcipher *derived_conf_key; int intg_alg; - struct crypto_tfm* derived_integ_key; + struct crypto_blkcipher *derived_integ_key; int keyestb_alg; /* alg used to get share_key */ int owf_alg; /* one way function */ }; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 76b969e6904f..57192dfe3065 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -49,7 +49,7 @@ u32 krb5_encrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -58,26 +58,27 @@ krb5_encrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_encrypt: output data:\n"); print_hexl((u32 *)out, length, 0); @@ -90,7 +91,7 @@ EXPORT_SYMBOL(krb5_encrypt); u32 krb5_decrypt( - struct crypto_tfm *tfm, + struct crypto_blkcipher *tfm, void * iv, void * in, void * out, @@ -99,25 +100,26 @@ krb5_decrypt( u32 ret = -EINVAL; struct scatterlist sg[1]; u8 local_iv[16] = {0}; + struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; dprintk("RPC: krb5_decrypt: input data:\n"); print_hexl((u32 *)in, length, 0); - if (length % crypto_tfm_alg_blocksize(tfm) != 0) + if (length % crypto_blkcipher_blocksize(tfm) != 0) goto out; - if (crypto_tfm_alg_ivsize(tfm) > 16) { + if (crypto_blkcipher_ivsize(tfm) > 16) { dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", - crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_ivsize(tfm)); goto out; } if (iv) - memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); + memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); memcpy(out, in, length); sg_set_buf(sg, out, length); - ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); dprintk("RPC: krb5_decrypt: output_data:\n"); print_hexl((u32 *)out, length, 0); @@ -240,7 +242,7 @@ EXPORT_SYMBOL(make_checksum); struct encryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; int pos; struct xdr_buf *outbuf; struct page **pages; @@ -285,8 +287,8 @@ encryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, - thislen, desc->iv); + ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, + desc->infrags, thislen); if (ret) return ret; if (fraglen) { @@ -305,16 +307,18 @@ encryptor(struct scatterlist *sg, void *data) } int -gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, - struct page **pages) +gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset, struct page **pages) { int ret; struct encryptor_desc desc; - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.pos = offset; desc.outbuf = buf; desc.pages = pages; @@ -329,7 +333,7 @@ EXPORT_SYMBOL(gss_encrypt_xdr_buf); struct decryptor_desc { u8 iv[8]; /* XXX hard-coded blocksize */ - struct crypto_tfm *tfm; + struct blkcipher_desc desc; struct scatterlist frags[4]; int fragno; int fraglen; @@ -355,8 +359,8 @@ decryptor(struct scatterlist *sg, void *data) if (thislen == 0) return 0; - ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, - thislen, desc->iv); + ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, + desc->frags, thislen); if (ret) return ret; if (fraglen) { @@ -373,15 +377,18 @@ decryptor(struct scatterlist *sg, void *data) } int -gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, + int offset) { struct decryptor_desc desc; /* XXXJBF: */ - BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); memset(desc.iv, 0, sizeof(desc.iv)); - desc.tfm = tfm; + desc.desc.tfm = tfm; + desc.desc.info = desc.iv; + desc.desc.flags = 0; desc.fragno = 0; desc.fraglen = 0; return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 70e1e53a632b..325e72e4fd31 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -34,6 +34,7 @@ * */ +#include #include #include #include @@ -78,10 +79,10 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res) +get_key(const void *p, const void *end, struct crypto_blkcipher **res) { struct xdr_netobj key; - int alg, alg_mode; + int alg; char *alg_name; p = simple_get_bytes(p, end, &alg, sizeof(alg)); @@ -93,18 +94,19 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) switch (alg) { case ENCTYPE_DES_CBC_RAW: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; break; default: printk("gss_kerberos_mech: unsupported algorithm %d\n", alg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); goto out_err_free_tfm; } @@ -113,7 +115,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: kfree(key.data); p = ERR_PTR(-EINVAL); @@ -172,9 +174,9 @@ gss_import_sec_context_kerberos(const void *p, return 0; out_err_free_key2: - crypto_free_tfm(ctx->seq); + crypto_free_blkcipher(ctx->seq); out_err_free_key1: - crypto_free_tfm(ctx->enc); + crypto_free_blkcipher(ctx->enc); out_err_free_mech: kfree(ctx->mech_used.data); out_err_free_ctx: @@ -187,8 +189,8 @@ static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; - crypto_free_tfm(kctx->seq); - crypto_free_tfm(kctx->enc); + crypto_free_blkcipher(kctx->seq); + crypto_free_blkcipher(kctx->enc); kfree(kctx->mech_used.data); kfree(kctx); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c53ead39118d..c604baf3a5f6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -41,7 +41,7 @@ #endif s32 -krb5_make_seq_num(struct crypto_tfm *key, +krb5_make_seq_num(struct crypto_blkcipher *key, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) @@ -62,7 +62,7 @@ krb5_make_seq_num(struct crypto_tfm *key, } s32 -krb5_get_seq_num(struct crypto_tfm *key, +krb5_get_seq_num(struct crypto_blkcipher *key, unsigned char *cksum, unsigned char *buf, int *direction, s32 * seqnum) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 89d1f3e14128..f179415d0c38 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -149,7 +149,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset, goto out_err; } - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); gss_krb5_add_padding(buf, offset, blocksize); BUG_ON((buf->len - offset) % blocksize); plainlen = blocksize + buf->len - offset; @@ -346,7 +346,7 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) /* Copy the data back to the right position. XXX: Would probably be * better to copy and encrypt at the same time. */ - blocksize = crypto_tfm_alg_blocksize(kctx->enc); + blocksize = crypto_blkcipher_blocksize(kctx->enc); data_start = ptr + 22 + blocksize; orig_start = buf->head[0].iov_base + offset; data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 88dcb52d171b..bdedf456bc17 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -34,6 +34,7 @@ * */ +#include #include #include #include @@ -83,10 +84,11 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) } static inline const void * -get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) +get_key(const void *p, const void *end, struct crypto_blkcipher **res, + int *resalg) { struct xdr_netobj key = { 0 }; - int alg_mode,setkey = 0; + int setkey = 0; char *alg_name; p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); @@ -98,14 +100,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) switch (*resalg) { case NID_des_cbc: - alg_name = "des"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(des)"; setkey = 1; break; case NID_cast5_cbc: /* XXXX here in name only, not used */ - alg_name = "cast5"; - alg_mode = CRYPTO_TFM_MODE_CBC; + alg_name = "cbc(cast5)"; setkey = 0; /* XXX will need to set to 1 */ break; case NID_md5: @@ -113,19 +113,20 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); } alg_name = "md5"; - alg_mode = 0; setkey = 0; break; default: dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg); goto out_err_free_key; } - if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) { + *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(*res)) { printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name); + *res = NULL; goto out_err_free_key; } if (setkey) { - if (crypto_cipher_setkey(*res, key.data, key.len)) { + if (crypto_blkcipher_setkey(*res, key.data, key.len)) { printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name); goto out_err_free_tfm; } @@ -136,7 +137,7 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) return p; out_err_free_tfm: - crypto_free_tfm(*res); + crypto_free_blkcipher(*res); out_err_free_key: if(key.len > 0) kfree(key.data); @@ -204,9 +205,9 @@ gss_import_sec_context_spkm3(const void *p, size_t len, return 0; out_err_free_key2: - crypto_free_tfm(ctx->derived_integ_key); + crypto_free_blkcipher(ctx->derived_integ_key); out_err_free_key1: - crypto_free_tfm(ctx->derived_conf_key); + crypto_free_blkcipher(ctx->derived_conf_key); out_err_free_s_key: kfree(ctx->share_key.data); out_err_free_mech: @@ -223,8 +224,8 @@ static void gss_delete_sec_context_spkm3(void *internal_ctx) { struct spkm3_ctx *sctx = internal_ctx; - crypto_free_tfm(sctx->derived_integ_key); - crypto_free_tfm(sctx->derived_conf_key); + crypto_free_blkcipher(sctx->derived_integ_key); + crypto_free_blkcipher(sctx->derived_conf_key); kfree(sctx->share_key.data); kfree(sctx->mech_used.data); kfree(sctx); From f12cc2090d721647c23dfce20834f4306db3b77d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 22 Aug 2006 20:36:13 +1000 Subject: [PATCH 0321/1063] [CRYPTO] users: Use block ciphers where applicable This patch converts all remaining users to use the new block cipher type where applicable. It also changes all simple cipher operations to use the new encrypt_one/decrypt_one interface. Signed-off-by: Herbert Xu --- drivers/net/ppp_mppe.c | 32 +++++++++++++++----------- drivers/net/wireless/airo.c | 22 ++++++++++-------- net/ieee80211/ieee80211_crypt_ccmp.c | 32 ++++++++++---------------- net/ieee80211/ieee80211_crypt_tkip.c | 34 ++++++++++++++++++---------- net/ieee80211/ieee80211_crypt_wep.c | 25 +++++++++++--------- 5 files changed, 79 insertions(+), 66 deletions(-) diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index 51ff9a9d1bb5..495d8667419a 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -43,6 +43,7 @@ * deprecated in 2.6 */ +#include #include #include #include @@ -95,7 +96,7 @@ static inline void sha_pad_init(struct sha_pad *shapad) * State for an MPPE (de)compressor. */ struct ppp_mppe_state { - struct crypto_tfm *arc4; + struct crypto_blkcipher *arc4; struct crypto_tfm *sha1; unsigned char *sha1_digest; unsigned char master_key[MPPE_MAX_KEY_LEN]; @@ -156,14 +157,15 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) { unsigned char InterimKey[MPPE_MAX_KEY_LEN]; struct scatterlist sg_in[1], sg_out[1]; + struct blkcipher_desc desc = { .tfm = state->arc4 }; get_new_key_from_sha(state, InterimKey); if (!initial_key) { - crypto_cipher_setkey(state->arc4, InterimKey, state->keylen); + crypto_blkcipher_setkey(state->arc4, InterimKey, state->keylen); setup_sg(sg_in, InterimKey, state->keylen); setup_sg(sg_out, state->session_key, state->keylen); - if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in, - state->keylen) != 0) { + if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + state->keylen) != 0) { printk(KERN_WARNING "mppe_rekey: cipher_encrypt failed\n"); } } else { @@ -175,7 +177,7 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key) state->session_key[1] = 0x26; state->session_key[2] = 0x9e; } - crypto_cipher_setkey(state->arc4, state->session_key, state->keylen); + crypto_blkcipher_setkey(state->arc4, state->session_key, state->keylen); } /* @@ -196,9 +198,11 @@ static void *mppe_alloc(unsigned char *options, int optlen) memset(state, 0, sizeof(*state)); - state->arc4 = crypto_alloc_tfm("arc4", 0); - if (!state->arc4) + state->arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(state->arc4)) { + state->arc4 = NULL; goto out_free; + } state->sha1 = crypto_alloc_tfm("sha1", 0); if (!state->sha1) @@ -231,7 +235,7 @@ static void *mppe_alloc(unsigned char *options, int optlen) if (state->sha1) crypto_free_tfm(state->sha1); if (state->arc4) - crypto_free_tfm(state->arc4); + crypto_free_blkcipher(state->arc4); kfree(state); out: return NULL; @@ -249,7 +253,7 @@ static void mppe_free(void *arg) if (state->sha1) crypto_free_tfm(state->sha1); if (state->arc4) - crypto_free_tfm(state->arc4); + crypto_free_blkcipher(state->arc4); kfree(state); } } @@ -356,6 +360,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf, int isize, int osize) { struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; + struct blkcipher_desc desc = { .tfm = state->arc4 }; int proto; struct scatterlist sg_in[1], sg_out[1]; @@ -413,7 +418,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf, /* Encrypt packet */ setup_sg(sg_in, ibuf, isize); setup_sg(sg_out, obuf, osize); - if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in, isize) != 0) { + if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in, isize) != 0) { printk(KERN_DEBUG "crypto_cypher_encrypt failed\n"); return -1; } @@ -462,6 +467,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, int osize) { struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg; + struct blkcipher_desc desc = { .tfm = state->arc4 }; unsigned ccount; int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED; int sanity = 0; @@ -599,7 +605,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, */ setup_sg(sg_in, ibuf, 1); setup_sg(sg_out, obuf, 1); - if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, 1) != 0) { + if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, 1) != 0) { printk(KERN_DEBUG "crypto_cypher_decrypt failed\n"); return DECOMP_ERROR; } @@ -619,7 +625,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf, /* And finally, decrypt the rest of the packet. */ setup_sg(sg_in, ibuf + 1, isize - 1); setup_sg(sg_out, obuf + 1, osize - 1); - if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, isize - 1) != 0) { + if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, isize - 1)) { printk(KERN_DEBUG "crypto_cypher_decrypt failed\n"); return DECOMP_ERROR; } @@ -694,7 +700,7 @@ static struct compressor ppp_mppe = { static int __init ppp_mppe_init(void) { int answer; - if (!(crypto_alg_available("arc4", 0) && + if (!(crypto_alg_available("ecb(arc4)", 0) && crypto_alg_available("sha1", 0))) return -ENODEV; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index a4dd13942714..170c500169da 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -19,6 +19,7 @@ ======================================================================*/ +#include #include #include @@ -1203,7 +1204,7 @@ struct airo_info { struct iw_spy_data spy_data; struct iw_public_data wireless_data; /* MIC stuff */ - struct crypto_tfm *tfm; + struct crypto_cipher *tfm; mic_module mod[2]; mic_statistics micstats; HostRxDesc rxfids[MPI_MAX_FIDS]; // rx/tx/config MPI350 descriptors @@ -1271,7 +1272,8 @@ static int flashrestart(struct airo_info *ai,struct net_device *dev); static int RxSeqValid (struct airo_info *ai,miccntx *context,int mcast,u32 micSeq); static void MoveWindow(miccntx *context, u32 micSeq); -static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *); +static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, + struct crypto_cipher *tfm); static void emmh32_init(emmh32_context *context); static void emmh32_update(emmh32_context *context, u8 *pOctets, int len); static void emmh32_final(emmh32_context *context, u8 digest[4]); @@ -1339,10 +1341,11 @@ static int micsetup(struct airo_info *ai) { int i; if (ai->tfm == NULL) - ai->tfm = crypto_alloc_tfm("aes", CRYPTO_TFM_REQ_MAY_SLEEP); + ai->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); - if (ai->tfm == NULL) { + if (IS_ERR(ai->tfm)) { airo_print_err(ai->dev->name, "failed to load transform for AES"); + ai->tfm = NULL; return ERROR; } @@ -1608,7 +1611,8 @@ static void MoveWindow(miccntx *context, u32 micSeq) static unsigned char aes_counter[16]; /* expand the key to fill the MMH coefficient array */ -static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *tfm) +static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, + struct crypto_cipher *tfm) { /* take the keying material, expand if necessary, truncate at 16-bytes */ /* run through AES counter mode to generate context->coeff[] */ @@ -1616,7 +1620,6 @@ static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct int i,j; u32 counter; u8 *cipher, plain[16]; - struct scatterlist sg[1]; crypto_cipher_setkey(tfm, pkey, 16); counter = 0; @@ -1627,9 +1630,8 @@ static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct aes_counter[12] = (u8)(counter >> 24); counter++; memcpy (plain, aes_counter, 16); - sg_set_buf(sg, plain, 16); - crypto_cipher_encrypt(tfm, sg, sg, 16); - cipher = kmap(sg->page) + sg->offset; + crypto_cipher_encrypt_one(tfm, plain, plain); + cipher = plain; for (j=0; (j<16) && (i< (sizeof(context->coeff)/sizeof(context->coeff[0]))); ) { context->coeff[i++] = ntohl(*(u32 *)&cipher[j]); j += 4; @@ -2432,7 +2434,7 @@ void stop_airo_card( struct net_device *dev, int freeres ) ai->shared, ai->shared_dma); } } - crypto_free_tfm(ai->tfm); + crypto_free_cipher(ai->tfm); del_airo_dev( dev ); free_netdev( dev ); } diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index ed90a8af1444..fdfe7704a469 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,6 +9,7 @@ * more details. */ +#include #include #include #include @@ -48,7 +49,7 @@ struct ieee80211_ccmp_data { int key_idx; - struct crypto_tfm *tfm; + struct crypto_cipher *tfm; /* scratch buffers for virt_to_page() (crypto API) */ u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], @@ -56,20 +57,10 @@ struct ieee80211_ccmp_data { u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; }; -static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm, - const u8 pt[16], u8 ct[16]) +static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, + const u8 pt[16], u8 ct[16]) { - struct scatterlist src, dst; - - src.page = virt_to_page(pt); - src.offset = offset_in_page(pt); - src.length = AES_BLOCK_LEN; - - dst.page = virt_to_page(ct); - dst.offset = offset_in_page(ct); - dst.length = AES_BLOCK_LEN; - - crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN); + crypto_cipher_encrypt_one(tfm, ct, pt); } static void *ieee80211_ccmp_init(int key_idx) @@ -81,10 +72,11 @@ static void *ieee80211_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = crypto_alloc_tfm("aes", 0); - if (priv->tfm == NULL) { + priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm)) { printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate " "crypto API aes\n"); + priv->tfm = NULL; goto fail; } @@ -93,7 +85,7 @@ static void *ieee80211_ccmp_init(int key_idx) fail: if (priv) { if (priv->tfm) - crypto_free_tfm(priv->tfm); + crypto_free_cipher(priv->tfm); kfree(priv); } @@ -104,7 +96,7 @@ static void ieee80211_ccmp_deinit(void *priv) { struct ieee80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) - crypto_free_tfm(_priv->tfm); + crypto_free_cipher(_priv->tfm); kfree(priv); } @@ -115,7 +107,7 @@ static inline void xor_block(u8 * b, u8 * a, size_t len) b[i] ^= a[i]; } -static void ccmp_init_blocks(struct crypto_tfm *tfm, +static void ccmp_init_blocks(struct crypto_cipher *tfm, struct ieee80211_hdr_4addr *hdr, u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) { @@ -377,7 +369,7 @@ static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_ccmp_data *data = priv; int keyidx; - struct crypto_tfm *tfm = data->tfm; + struct crypto_cipher *tfm = data->tfm; keyidx = data->key_idx; memset(data, 0, sizeof(*data)); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 34dba0ba545d..d60ce9b49b4f 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -9,6 +9,7 @@ * more details. */ +#include #include #include #include @@ -52,7 +53,7 @@ struct ieee80211_tkip_data { int key_idx; - struct crypto_tfm *tfm_arc4; + struct crypto_blkcipher *tfm_arc4; struct crypto_tfm *tfm_michael; /* scratch buffers for virt_to_page() (crypto API) */ @@ -85,10 +86,12 @@ static void *ieee80211_tkip_init(int key_idx) priv->key_idx = key_idx; - priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0); - if (priv->tfm_arc4 == NULL) { + priv->tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm_arc4)) { printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " "crypto API arc4\n"); + priv->tfm_arc4 = NULL; goto fail; } @@ -106,7 +109,7 @@ static void *ieee80211_tkip_init(int key_idx) if (priv->tfm_michael) crypto_free_tfm(priv->tfm_michael); if (priv->tfm_arc4) - crypto_free_tfm(priv->tfm_arc4); + crypto_free_blkcipher(priv->tfm_arc4); kfree(priv); } @@ -119,7 +122,7 @@ static void ieee80211_tkip_deinit(void *priv) if (_priv && _priv->tfm_michael) crypto_free_tfm(_priv->tfm_michael); if (_priv && _priv->tfm_arc4) - crypto_free_tfm(_priv->tfm_arc4); + crypto_free_blkcipher(_priv->tfm_arc4); kfree(priv); } @@ -318,6 +321,7 @@ static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 }; int len; u8 rc4key[16], *pos, *icv; u32 crc; @@ -351,18 +355,17 @@ static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[2] = crc >> 16; icv[3] = crc >> 24; - crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); + crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = len + 4; - crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4); - - return 0; + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct ieee80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 }; u8 rc4key[16]; u8 keyidx, *pos; u32 iv32; @@ -434,11 +437,18 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) plen = skb->len - hdr_len - 12; - crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16); + crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = plen + 4; - crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { + if (net_ratelimit()) { + printk(KERN_DEBUG ": TKIP: failed to decrypt " + "received packet from " MAC_FMT "\n", + MAC_ARG(hdr->addr2)); + } + return -7; + } crc = ~crc32_le(~0, pos, plen); icv[0] = crc; @@ -619,7 +629,7 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) struct ieee80211_tkip_data *tkey = priv; int keyidx; struct crypto_tfm *tfm = tkey->tfm_michael; - struct crypto_tfm *tfm2 = tkey->tfm_arc4; + struct crypto_blkcipher *tfm2 = tkey->tfm_arc4; keyidx = tkey->key_idx; memset(tkey, 0, sizeof(*tkey)); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 0ebf235f6939..3d46d3efe1dd 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -9,6 +9,7 @@ * more details. */ +#include #include #include #include @@ -32,7 +33,7 @@ struct prism2_wep_data { u8 key[WEP_KEY_LEN + 1]; u8 key_len; u8 key_idx; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; }; static void *prism2_wep_init(int keyidx) @@ -44,10 +45,11 @@ static void *prism2_wep_init(int keyidx) goto fail; priv->key_idx = keyidx; - priv->tfm = crypto_alloc_tfm("arc4", 0); - if (priv->tfm == NULL) { + priv->tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm)) { printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " "crypto API arc4\n"); + priv->tfm = NULL; goto fail; } @@ -59,7 +61,7 @@ static void *prism2_wep_init(int keyidx) fail: if (priv) { if (priv->tfm) - crypto_free_tfm(priv->tfm); + crypto_free_blkcipher(priv->tfm); kfree(priv); } return NULL; @@ -69,7 +71,7 @@ static void prism2_wep_deinit(void *priv) { struct prism2_wep_data *_priv = priv; if (_priv && _priv->tfm) - crypto_free_tfm(_priv->tfm); + crypto_free_blkcipher(_priv->tfm); kfree(priv); } @@ -120,6 +122,7 @@ static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct prism2_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->tfm }; u32 crc, klen, len; u8 *pos, *icv; struct scatterlist sg; @@ -151,13 +154,11 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) icv[2] = crc >> 16; icv[3] = crc >> 24; - crypto_cipher_setkey(wep->tfm, key, klen); + crypto_blkcipher_setkey(wep->tfm, key, klen); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = len + 4; - crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4); - - return 0; + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); } /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of @@ -170,6 +171,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct prism2_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->tfm }; u32 crc, klen, plen; u8 key[WEP_KEY_LEN + 3]; u8 keyidx, *pos, icv[4]; @@ -194,11 +196,12 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) /* Apply RC4 to data and compute CRC32 over decrypted data */ plen = skb->len - hdr_len - 8; - crypto_cipher_setkey(wep->tfm, key, klen); + crypto_blkcipher_setkey(wep->tfm, key, klen); sg.page = virt_to_page(pos); sg.offset = offset_in_page(pos); sg.length = plen + 4; - crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) + return -7; crc = ~crc32_le(~0, pos, plen); icv[0] = crc; From efcf8023e299be605f217dc2c1b2754b5534569c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 5 Aug 2006 16:28:19 +1000 Subject: [PATCH 0322/1063] [CRYPTO] drivers: Remove obsolete block cipher operations This patch removes obsolete block operations of the simple cipher type from drivers. These were preserved so that existing users can make a smooth transition. Now that the transition is complete, they are no longer needed. Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 112 ------------------- arch/s390/crypto/des_s390.c | 203 ----------------------------------- drivers/crypto/padlock-aes.c | 44 -------- 3 files changed, 359 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 8f04b4e41b55..15c9eec02928 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -113,114 +113,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) } } -static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(AES_BLOCK_SIZE - 1); - - switch (sctx->key_len) { - case 16: - ret = crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 24: - ret = crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 32: - ret = crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - } - return nbytes; -} - -static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(AES_BLOCK_SIZE - 1); - - switch (sctx->key_len) { - case 16: - ret = crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 24: - ret = crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 32: - ret = crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - } - return nbytes; -} - -static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(AES_BLOCK_SIZE - 1); - - memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE); - switch (sctx->key_len) { - case 16: - ret = crypt_s390_kmc(KMC_AES_128_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 24: - ret = crypt_s390_kmc(KMC_AES_192_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 32: - ret = crypt_s390_kmc(KMC_AES_256_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - } - memcpy(desc->info, &sctx->iv, AES_BLOCK_SIZE); - - return nbytes; -} - -static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(AES_BLOCK_SIZE - 1); - - memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE); - switch (sctx->key_len) { - case 16: - ret = crypt_s390_kmc(KMC_AES_128_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 24: - ret = crypt_s390_kmc(KMC_AES_192_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - case 32: - ret = crypt_s390_kmc(KMC_AES_256_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - break; - } - return nbytes; -} - static struct crypto_alg aes_alg = { .cra_name = "aes", @@ -238,10 +130,6 @@ static struct crypto_alg aes_alg = { .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt, - .cia_encrypt_ecb = aes_encrypt_ecb, - .cia_decrypt_ecb = aes_decrypt_ecb, - .cia_encrypt_cbc = aes_encrypt_cbc, - .cia_decrypt_cbc = aes_decrypt_cbc, } } }; diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index a6d2385ccb7a..2aba04852fe3 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -73,67 +73,6 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE); } -static unsigned int des_encrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_DEA_ENCRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des_decrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_DEA_DECRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des_encrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES_BLOCK_SIZE - 1); - - memcpy(sctx->iv, desc->info, DES_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_DEA_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - memcpy(desc->info, sctx->iv, DES_BLOCK_SIZE); - return nbytes; -} - -static unsigned int des_decrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES_BLOCK_SIZE - 1); - - memcpy(&sctx->iv, desc->info, DES_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_DEA_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - static struct crypto_alg des_alg = { .cra_name = "des", .cra_driver_name = "des-s390", @@ -150,10 +89,6 @@ static struct crypto_alg des_alg = { .cia_setkey = des_setkey, .cia_encrypt = des_encrypt, .cia_decrypt = des_decrypt, - .cia_encrypt_ecb = des_encrypt_ecb, - .cia_decrypt_ecb = des_decrypt_ecb, - .cia_encrypt_cbc = des_encrypt_cbc, - .cia_decrypt_cbc = des_decrypt_cbc, } } }; @@ -344,71 +279,6 @@ static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) DES3_128_BLOCK_SIZE); } -static unsigned int des3_128_encrypt_ecb(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_128_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_TDEA_128_ENCRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des3_128_decrypt_ecb(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_128_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_TDEA_128_DECRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des3_128_encrypt_cbc(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_128_BLOCK_SIZE - 1); - - memcpy(sctx->iv, desc->info, DES3_128_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_TDEA_128_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - memcpy(desc->info, sctx->iv, DES3_128_BLOCK_SIZE); - return nbytes; -} - -static unsigned int des3_128_decrypt_cbc(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_128_BLOCK_SIZE - 1); - - memcpy(&sctx->iv, desc->info, DES3_128_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_TDEA_128_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - static struct crypto_alg des3_128_alg = { .cra_name = "des3_ede128", .cra_driver_name = "des3_ede128-s390", @@ -425,10 +295,6 @@ static struct crypto_alg des3_128_alg = { .cia_setkey = des3_128_setkey, .cia_encrypt = des3_128_encrypt, .cia_decrypt = des3_128_decrypt, - .cia_encrypt_ecb = des3_128_encrypt_ecb, - .cia_decrypt_ecb = des3_128_decrypt_ecb, - .cia_encrypt_cbc = des3_128_encrypt_cbc, - .cia_decrypt_cbc = des3_128_decrypt_cbc, } } }; @@ -575,71 +441,6 @@ static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) DES3_192_BLOCK_SIZE); } -static unsigned int des3_192_encrypt_ecb(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_192_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_TDEA_192_ENCRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des3_192_decrypt_ecb(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_192_BLOCK_SIZE - 1); - ret = crypt_s390_km(KM_TDEA_192_DECRYPT, sctx->key, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - -static unsigned int des3_192_encrypt_cbc(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_192_BLOCK_SIZE - 1); - - memcpy(sctx->iv, desc->info, DES3_192_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_TDEA_192_ENCRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - memcpy(desc->info, sctx->iv, DES3_192_BLOCK_SIZE); - return nbytes; -} - -static unsigned int des3_192_decrypt_cbc(const struct cipher_desc *desc, - u8 *out, const u8 *in, - unsigned int nbytes) -{ - struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm); - int ret; - - /* only use complete blocks */ - nbytes &= ~(DES3_192_BLOCK_SIZE - 1); - - memcpy(&sctx->iv, desc->info, DES3_192_BLOCK_SIZE); - ret = crypt_s390_kmc(KMC_TDEA_192_DECRYPT, &sctx->iv, out, in, nbytes); - BUG_ON((ret < 0) || (ret != nbytes)); - - return nbytes; -} - static struct crypto_alg des3_192_alg = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-s390", @@ -656,10 +457,6 @@ static struct crypto_alg des3_192_alg = { .cia_setkey = des3_192_setkey, .cia_encrypt = des3_192_encrypt, .cia_decrypt = des3_192_decrypt, - .cia_encrypt_ecb = des3_192_encrypt_ecb, - .cia_decrypt_ecb = des3_192_decrypt_ecb, - .cia_encrypt_cbc = des3_192_encrypt_cbc, - .cia_decrypt_cbc = des3_192_decrypt_cbc, } } }; diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index f53301e836d9..d4501dc7e650 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -452,46 +452,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); } -static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct aes_ctx *ctx = aes_ctx(desc->tfm); - padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, - nbytes / AES_BLOCK_SIZE); - return nbytes & ~(AES_BLOCK_SIZE - 1); -} - -static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct aes_ctx *ctx = aes_ctx(desc->tfm); - padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, - nbytes / AES_BLOCK_SIZE); - return nbytes & ~(AES_BLOCK_SIZE - 1); -} - -static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct aes_ctx *ctx = aes_ctx(desc->tfm); - u8 *iv; - - iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info, - &ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE); - memcpy(desc->info, iv, AES_BLOCK_SIZE); - - return nbytes & ~(AES_BLOCK_SIZE - 1); -} - -static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) -{ - struct aes_ctx *ctx = aes_ctx(desc->tfm); - padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt, - nbytes / AES_BLOCK_SIZE); - return nbytes & ~(AES_BLOCK_SIZE - 1); -} - static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-padlock", @@ -509,10 +469,6 @@ static struct crypto_alg aes_alg = { .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt, - .cia_encrypt_ecb = aes_encrypt_ecb, - .cia_decrypt_ecb = aes_decrypt_ecb, - .cia_encrypt_cbc = aes_encrypt_cbc, - .cia_decrypt_cbc = aes_decrypt_cbc, } } }; From 03fd9cee7f46dddcd2562bc175d2c348502ce281 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 14 Aug 2006 23:11:53 +1000 Subject: [PATCH 0323/1063] [PATCH] scatterlist: Add const to sg_set_buf/sg_init_one pointer argument This patch adds a const modifier to the buf argument of sg_set_buf and sg_init_one. This lets people call it with pointers that are const. Signed-off-by: Herbert Xu --- include/linux/scatterlist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 66ff545552f7..4efbd9c445f5 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -5,7 +5,7 @@ #include #include -static inline void sg_set_buf(struct scatterlist *sg, void *buf, +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { sg->page = virt_to_page(buf); @@ -13,7 +13,7 @@ static inline void sg_set_buf(struct scatterlist *sg, void *buf, sg->length = buflen; } -static inline void sg_init_one(struct scatterlist *sg, void *buf, +static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) { memset(sg, 0, sizeof(*sg)); From 7226bc877a22244e8003924031435a4bffd52654 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 21:40:49 +1000 Subject: [PATCH 0324/1063] [CRYPTO] api: Mark parts of cipher interface as deprecated Mark the parts of the cipher interface that have been replaced by block ciphers as deprecated. Thanks to Andrew Morton for suggesting doing this before removing them completely. Signed-off-by: Herbert Xu --- crypto/cipher.c | 34 ++++++++++++++++++++++++------ include/linux/crypto.h | 48 +++++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 18 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index 326461780673..9e03701cfdcc 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -23,6 +23,28 @@ #include "internal.h" #include "scatterwalk.h" +struct cipher_alg_compat { + unsigned int cia_min_keysize; + unsigned int cia_max_keysize; + int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + + unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); +}; + static inline void xor_64(u8 *a, const u8 *b) { ((u32 *)a)[0] ^= ((u32 *)b)[0]; @@ -276,7 +298,7 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -291,7 +313,7 @@ static int ecb_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; @@ -306,7 +328,7 @@ static int cbc_encrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -322,7 +344,7 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_encrypt; @@ -338,7 +360,7 @@ static int cbc_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; @@ -354,7 +376,7 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; - struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; + struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher; desc.tfm = tfm; desc.crfn = cipher->cia_decrypt; diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5a5466d518e8..0be666b50463 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -137,16 +136,16 @@ struct cipher_alg { unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, u8 *dst, const u8 *src, - unsigned int nbytes); + unsigned int nbytes) __deprecated; }; struct digest_alg { @@ -358,18 +357,23 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; } +static unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm) + __deprecated; static inline unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); return tfm->__crt_alg->cra_cipher.cia_min_keysize; } +static unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm) + __deprecated; static inline unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); return tfm->__crt_alg->cra_cipher.cia_max_keysize; } +static unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) __deprecated; static inline unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) { BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); @@ -622,6 +626,13 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); } +static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm), + key, keylen); +} + static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, u8 *dst, const u8 *src) { @@ -671,13 +682,10 @@ static inline int crypto_digest_setkey(struct crypto_tfm *tfm, return tfm->crt_digest.dit_setkey(tfm, key, keylen); } -static inline int crypto_cipher_setkey(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); - return tfm->crt_cipher.cit_setkey(tfm, key, keylen); -} - +static int crypto_cipher_encrypt(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) __deprecated; static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -687,6 +695,10 @@ static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_encrypt(tfm, dst, src, nbytes); } +static int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, u8 *iv) __deprecated; static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -696,6 +708,10 @@ static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv); } +static int crypto_cipher_decrypt(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) __deprecated; static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -705,6 +721,10 @@ static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_decrypt(tfm, dst, src, nbytes); } +static int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, u8 *iv) __deprecated; static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, @@ -714,6 +734,8 @@ static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm, return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv); } +static void crypto_cipher_set_iv(struct crypto_tfm *tfm, + const u8 *src, unsigned int len) __deprecated; static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm, const u8 *src, unsigned int len) { @@ -721,6 +743,8 @@ static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm, memcpy(tfm->crt_cipher.cit_iv, src, len); } +static void crypto_cipher_get_iv(struct crypto_tfm *tfm, + u8 *dst, unsigned int len) __deprecated; static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm, u8 *dst, unsigned int len) { From 055bcee3102dc35f019b69df9c2618e9d6dd1c09 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 19 Aug 2006 22:24:23 +1000 Subject: [PATCH 0325/1063] [CRYPTO] digest: Added user API for new hash type The existing digest user interface is inadequate for support asynchronous operations. For one it doesn't return a value to indicate success or failure, nor does it take a per-operation descriptor which is essential for the issuing of requests while other requests are still outstanding. This patch is the first in a series of steps to remodel the interface for asynchronous operations. For the ease of transition the new interface will be known as "hash" while the old one will remain as "digest". This patch also changes sg_next to allow chaining. Signed-off-by: Herbert Xu --- crypto/Kconfig | 4 + crypto/Makefile | 3 + crypto/digest.c | 131 ++++++++++++++++++++++------- crypto/hash.c | 61 ++++++++++++++ crypto/hmac.c | 12 +-- crypto/scatterwalk.h | 4 +- include/crypto/algapi.h | 6 ++ include/linux/crypto.h | 180 +++++++++++++++++++++++++++++++--------- 8 files changed, 326 insertions(+), 75 deletions(-) create mode 100644 crypto/hash.c diff --git a/crypto/Kconfig b/crypto/Kconfig index be5eb0cb7c30..69c5f992bcd4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -20,6 +20,10 @@ config CRYPTO_BLKCIPHER tristate select CRYPTO_ALGAPI +config CRYPTO_HASH + tristate + select CRYPTO_ALGAPI + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 5e1ff4e0b1fc..72366208e291 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -10,6 +10,9 @@ obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o +crypto_hash-objs := hash.o +obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o + obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o diff --git a/crypto/digest.c b/crypto/digest.c index 96244a528844..5873063db840 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -11,29 +11,89 @@ * any later version. * */ -#include + #include #include #include -#include -#include "internal.h" +#include +#include -static void init(struct crypto_tfm *tfm) +#include "internal.h" +#include "scatterwalk.h" + +void crypto_digest_init(struct crypto_tfm *tfm) { + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + + crypto_hash_init(&desc); +} +EXPORT_SYMBOL_GPL(crypto_digest_init); + +void crypto_digest_update(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg) +{ + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + unsigned int nbytes = 0; + unsigned int i; + + for (i = 0; i < nsg; i++) + nbytes += sg[i].length; + + crypto_hash_update(&desc, sg, nbytes); +} +EXPORT_SYMBOL_GPL(crypto_digest_update); + +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) +{ + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + + crypto_hash_final(&desc, out); +} +EXPORT_SYMBOL_GPL(crypto_digest_final); + +void crypto_digest_digest(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg, u8 *out) +{ + struct crypto_hash *hash = crypto_hash_cast(tfm); + struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; + unsigned int nbytes = 0; + unsigned int i; + + for (i = 0; i < nsg; i++) + nbytes += sg[i].length; + + crypto_hash_digest(&desc, sg, nbytes, out); +} +EXPORT_SYMBOL_GPL(crypto_digest_digest); + +static int init(struct hash_desc *desc) +{ + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); + tfm->__crt_alg->cra_digest.dia_init(tfm); + return 0; } -static void update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) +static int update(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nbytes) { - unsigned int i; + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); - for (i = 0; i < nsg; i++) { + if (!nbytes) + return 0; - struct page *pg = sg[i].page; - unsigned int offset = sg[i].offset; - unsigned int l = sg[i].length; + for (;;) { + struct page *pg = sg->page; + unsigned int offset = sg->offset; + unsigned int l = sg->length; + + if (unlikely(l > nbytes)) + l = nbytes; + nbytes -= l; do { unsigned int bytes_from_page = min(l, ((unsigned int) @@ -55,16 +115,23 @@ static void update(struct crypto_tfm *tfm, tfm->__crt_alg->cra_digest.dia_update(tfm, p, bytes_from_page); crypto_kunmap(src, 0); - crypto_yield(tfm->crt_flags); + crypto_yield(desc->flags); offset = 0; pg++; l -= bytes_from_page; } while (l > 0); + + if (!nbytes) + break; + sg = sg_next(sg); } + + return 0; } -static void final(struct crypto_tfm *tfm, u8 *out) +static int final(struct hash_desc *desc, u8 *out) { + struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); struct digest_alg *digest = &tfm->__crt_alg->cra_digest; @@ -78,26 +145,30 @@ static void final(struct crypto_tfm *tfm, u8 *out) memcpy(out, dst, digest->dia_digestsize); } else digest->dia_final(tfm, out); + + return 0; } -static int nosetkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK); return -ENOSYS; } -static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + struct crypto_tfm *tfm = crypto_hash_tfm(hash); + + crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK); return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen); } -static void digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) +static int digest(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nbytes, u8 *out) { - init(tfm); - update(tfm, sg, nsg); - final(tfm, out); + init(desc); + update(desc, sg, nbytes); + return final(desc, out); } int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) @@ -107,14 +178,18 @@ int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags) int crypto_init_digest_ops(struct crypto_tfm *tfm) { - struct digest_tfm *ops = &tfm->crt_digest; + struct hash_tfm *ops = &tfm->crt_hash; struct digest_alg *dalg = &tfm->__crt_alg->cra_digest; + + if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm)) + return -EINVAL; - ops->dit_init = init; - ops->dit_update = update; - ops->dit_final = final; - ops->dit_digest = digest; - ops->dit_setkey = dalg->dia_setkey ? setkey : nosetkey; + ops->init = init; + ops->update = update; + ops->final = final; + ops->digest = digest; + ops->setkey = dalg->dia_setkey ? setkey : nosetkey; + ops->digestsize = dalg->dia_digestsize; return crypto_alloc_hmac_block(tfm); } diff --git a/crypto/hash.c b/crypto/hash.c new file mode 100644 index 000000000000..cdec23d885fe --- /dev/null +++ b/crypto/hash.c @@ -0,0 +1,61 @@ +/* + * Cryptographic Hash operations. + * + * Copyright (c) 2006 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include +#include +#include + +#include "internal.h" + +static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg) +{ + return alg->cra_ctxsize; +} + +static int crypto_init_hash_ops(struct crypto_tfm *tfm) +{ + struct hash_tfm *crt = &tfm->crt_hash; + struct hash_alg *alg = &tfm->__crt_alg->cra_hash; + + if (alg->digestsize > crypto_tfm_alg_blocksize(tfm)) + return -EINVAL; + + crt->init = alg->init; + crt->update = alg->update; + crt->final = alg->final; + crt->digest = alg->digest; + crt->setkey = alg->setkey; + crt->digestsize = alg->digestsize; + + return 0; +} + +static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) + __attribute_used__; +static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_printf(m, "type : hash\n"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "digestsize : %u\n", alg->cra_hash.digestsize); +} + +const struct crypto_type crypto_hash_type = { + .ctxsize = crypto_hash_ctxsize, + .init = crypto_init_hash_ops, +#ifdef CONFIG_PROC_FS + .show = crypto_hash_show, +#endif +}; +EXPORT_SYMBOL_GPL(crypto_hash_type); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic cryptographic hash type"); diff --git a/crypto/hmac.c b/crypto/hmac.c index 46120dee5ada..ecf7b0a95b56 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -35,9 +35,9 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm) BUG_ON(!crypto_tfm_alg_blocksize(tfm)); - tfm->crt_digest.dit_hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), - GFP_KERNEL); - if (tfm->crt_digest.dit_hmac_block == NULL) + tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), + GFP_KERNEL); + if (tfm->crt_hash.hmac_block == NULL) ret = -ENOMEM; return ret; @@ -46,14 +46,14 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm) void crypto_free_hmac_block(struct crypto_tfm *tfm) { - kfree(tfm->crt_digest.dit_hmac_block); + kfree(tfm->crt_hash.hmac_block); } void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen) { unsigned int i; struct scatterlist tmp; - char *ipad = tfm->crt_digest.dit_hmac_block; + char *ipad = tfm->crt_hash.hmac_block; if (*keylen > crypto_tfm_alg_blocksize(tfm)) { hash_key(tfm, key, *keylen); @@ -83,7 +83,7 @@ void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, { unsigned int i; struct scatterlist tmp; - char *opad = tfm->crt_digest.dit_hmac_block; + char *opad = tfm->crt_hash.hmac_block; if (*keylen > crypto_tfm_alg_blocksize(tfm)) { hash_key(tfm, key, *keylen); diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index ace595a2e119..f1592cc2d0f4 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -20,11 +20,9 @@ #include "internal.h" -/* Define sg_next is an inline routine now in case we want to change - scatterlist to a linked list later. */ static inline struct scatterlist *sg_next(struct scatterlist *sg) { - return sg + 1; + return (++sg)->length ? sg : (void *)sg->page; } static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 444f602724db..5748aecdb414 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -82,6 +82,7 @@ struct blkcipher_walk { }; extern const struct crypto_type crypto_blkcipher_type; +extern const struct crypto_type crypto_hash_type; void crypto_mod_put(struct crypto_alg *alg); @@ -136,6 +137,11 @@ static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm) return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher; } +static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm) +{ + return crypto_tfm_ctx_aligned(&tfm->base); +} + static inline void blkcipher_walk_init(struct blkcipher_walk *walk, struct scatterlist *dst, struct scatterlist *src, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 0be666b50463..40c0aab8ad4c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -31,8 +31,11 @@ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_DIGEST 0x00000002 -#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000003 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000004 +#define CRYPTO_ALG_TYPE_HASH 0x00000003 +#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 +#define CRYPTO_ALG_TYPE_COMPRESS 0x00000005 + +#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 @@ -90,6 +93,7 @@ struct scatterlist; struct crypto_blkcipher; +struct crypto_hash; struct crypto_tfm; struct crypto_type; @@ -107,6 +111,11 @@ struct cipher_desc { void *info; }; +struct hash_desc { + struct crypto_hash *tfm; + u32 flags; +}; + /* * Algorithms: modular crypto algorithm implementations, managed * via crypto_register_alg() and crypto_unregister_alg(). @@ -158,6 +167,19 @@ struct digest_alg { unsigned int keylen); }; +struct hash_alg { + int (*init)(struct hash_desc *desc); + int (*update)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nbytes); + int (*final)(struct hash_desc *desc, u8 *out); + int (*digest)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nbytes, u8 *out); + int (*setkey)(struct crypto_hash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; +}; + struct compress_alg { int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen); @@ -168,6 +190,7 @@ struct compress_alg { #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest +#define cra_hash cra_u.hash #define cra_compress cra_u.compress struct crypto_alg { @@ -191,6 +214,7 @@ struct crypto_alg { struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct digest_alg digest; + struct hash_alg hash; struct compress_alg compress; } cra_u; @@ -262,18 +286,19 @@ struct cipher_tfm { void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -struct digest_tfm { - void (*dit_init)(struct crypto_tfm *tfm); - void (*dit_update)(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); - void (*dit_final)(struct crypto_tfm *tfm, u8 *out); - void (*dit_digest)(struct crypto_tfm *tfm, struct scatterlist *sg, - unsigned int nsg, u8 *out); - int (*dit_setkey)(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen); +struct hash_tfm { + int (*init)(struct hash_desc *desc); + int (*update)(struct hash_desc *desc, + struct scatterlist *sg, unsigned int nsg); + int (*final)(struct hash_desc *desc, u8 *out); + int (*digest)(struct hash_desc *desc, struct scatterlist *sg, + unsigned int nsg, u8 *out); + int (*setkey)(struct crypto_hash *tfm, const u8 *key, + unsigned int keylen); #ifdef CONFIG_CRYPTO_HMAC - void *dit_hmac_block; + void *hmac_block; #endif + unsigned int digestsize; }; struct compress_tfm { @@ -287,7 +312,7 @@ struct compress_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher -#define crt_digest crt_u.digest +#define crt_hash crt_u.hash #define crt_compress crt_u.compress struct crypto_tfm { @@ -297,7 +322,7 @@ struct crypto_tfm { union { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; - struct digest_tfm digest; + struct hash_tfm hash; struct compress_tfm compress; } crt_u; @@ -312,6 +337,10 @@ struct crypto_blkcipher { struct crypto_tfm base; }; +struct crypto_hash { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -647,39 +676,114 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -static inline void crypto_digest_init(struct crypto_tfm *tfm) +void crypto_digest_init(struct crypto_tfm *tfm); +void crypto_digest_update(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg); +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out); +void crypto_digest_digest(struct crypto_tfm *tfm, + struct scatterlist *sg, unsigned int nsg, u8 *out); + +static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_init(tfm); + return (struct crypto_hash *)tfm; } -static inline void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, - unsigned int nsg) +static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_update(tfm, sg, nsg); -} - -static inline void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_final(tfm, out); -} - -static inline void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, - unsigned int nsg, u8 *out) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - tfm->crt_digest.dit_digest(tfm, sg, nsg, out); + BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH) & + CRYPTO_ALG_TYPE_HASH_MASK); + return __crypto_hash_cast(tfm); } static inline int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST); - return tfm->crt_digest.dit_setkey(tfm, key, keylen); + return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen); +} + +static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_HASH; + mask |= CRYPTO_ALG_TYPE_HASH_MASK; + + return __crypto_hash_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_hash(struct crypto_hash *tfm) +{ + crypto_free_tfm(crypto_hash_tfm(tfm)); +} + +static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm) +{ + return &crypto_hash_tfm(tfm)->crt_hash; +} + +static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm)); +} + +static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm)); +} + +static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm) +{ + return crypto_hash_crt(tfm)->digestsize; +} + +static inline u32 crypto_hash_get_flags(struct crypto_hash *tfm) +{ + return crypto_tfm_get_flags(crypto_hash_tfm(tfm)); +} + +static inline void crypto_hash_set_flags(struct crypto_hash *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_hash_tfm(tfm), flags); +} + +static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags); +} + +static inline int crypto_hash_init(struct hash_desc *desc) +{ + return crypto_hash_crt(desc->tfm)->init(desc); +} + +static inline int crypto_hash_update(struct hash_desc *desc, + struct scatterlist *sg, + unsigned int nbytes) +{ + return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes); +} + +static inline int crypto_hash_final(struct hash_desc *desc, u8 *out) +{ + return crypto_hash_crt(desc->tfm)->final(desc, out); +} + +static inline int crypto_hash_digest(struct hash_desc *desc, + struct scatterlist *sg, + unsigned int nbytes, u8 *out) +{ + return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out); +} + +static inline int crypto_hash_setkey(struct crypto_hash *hash, + const u8 *key, unsigned int keylen) +{ + return crypto_hash_crt(hash)->setkey(hash, key, keylen); } static int crypto_cipher_encrypt(struct crypto_tfm *tfm, From 0796ae061e6da5de7cfc1af57dfd42a73908b1bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Aug 2006 20:50:52 +1000 Subject: [PATCH 0326/1063] [CRYPTO] hmac: Add crypto template implementation This patch rewrites HMAC as a crypto template. This means that HMAC is no longer a hard-coded part of the API. It's now a template that generates standard digest algorithms like any other. The old HMAC is preserved until all current users are converted. The same structure can be used by other MACs such as AES-XCBC-MAC. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/Kconfig | 1 + crypto/hmac.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 236 insertions(+), 6 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 69c5f992bcd4..f07d9237950f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -34,6 +34,7 @@ config CRYPTO_MANAGER config CRYPTO_HMAC bool "HMAC support" + select CRYPTO_HASH help HMAC: Keyed-Hashing for Message Authentication (RFC2104). This is required for IPSec. diff --git a/crypto/hmac.c b/crypto/hmac.c index ecf7b0a95b56..eac77e294740 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -4,22 +4,30 @@ * HMAC: Keyed-Hashing for Message Authentication (RFC2104). * * Copyright (c) 2002 James Morris + * Copyright (c) 2006 Herbert Xu * * The HMAC implementation is derived from USAGI. * Copyright (c) 2002 Kazunori Miyazawa / USAGI * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ -#include -#include -#include -#include + +#include +#include +#include +#include +#include #include -#include "internal.h" +#include +#include + +struct hmac_ctx { + struct crypto_hash *child; +}; static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen) { @@ -122,3 +130,224 @@ EXPORT_SYMBOL_GPL(crypto_hmac_update); EXPORT_SYMBOL_GPL(crypto_hmac_final); EXPORT_SYMBOL_GPL(crypto_hmac); +static inline void *align_ptr(void *p, unsigned int align) +{ + return (void *)ALIGN((unsigned long)p, align); +} + +static inline struct hmac_ctx *hmac_ctx(struct crypto_hash *tfm) +{ + return align_ptr(crypto_hash_ctx_aligned(tfm) + + crypto_hash_blocksize(tfm) * 2 + + crypto_hash_digestsize(tfm), sizeof(void *)); +} + +static int hmac_setkey(struct crypto_hash *parent, + const u8 *inkey, unsigned int keylen) +{ + int bs = crypto_hash_blocksize(parent); + int ds = crypto_hash_digestsize(parent); + char *ipad = crypto_hash_ctx_aligned(parent); + char *opad = ipad + bs; + char *digest = opad + bs; + struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *)); + struct crypto_hash *tfm = ctx->child; + unsigned int i; + + if (keylen > bs) { + struct hash_desc desc; + struct scatterlist tmp; + int err; + + desc.tfm = tfm; + desc.flags = crypto_hash_get_flags(parent); + desc.flags &= CRYPTO_TFM_REQ_MAY_SLEEP; + sg_set_buf(&tmp, inkey, keylen); + + err = crypto_hash_digest(&desc, &tmp, keylen, digest); + if (err) + return err; + + inkey = digest; + keylen = ds; + } + + memcpy(ipad, inkey, keylen); + memset(ipad + keylen, 0, bs - keylen); + memcpy(opad, ipad, bs); + + for (i = 0; i < bs; i++) { + ipad[i] ^= 0x36; + opad[i] ^= 0x5c; + } + + return 0; +} + +static int hmac_init(struct hash_desc *pdesc) +{ + struct crypto_hash *parent = pdesc->tfm; + int bs = crypto_hash_blocksize(parent); + int ds = crypto_hash_digestsize(parent); + char *ipad = crypto_hash_ctx_aligned(parent); + struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *)); + struct hash_desc desc; + struct scatterlist tmp; + + desc.tfm = ctx->child; + desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; + sg_set_buf(&tmp, ipad, bs); + + return unlikely(crypto_hash_init(&desc)) ?: + crypto_hash_update(&desc, &tmp, 1); +} + +static int hmac_update(struct hash_desc *pdesc, + struct scatterlist *sg, unsigned int nbytes) +{ + struct hmac_ctx *ctx = hmac_ctx(pdesc->tfm); + struct hash_desc desc; + + desc.tfm = ctx->child; + desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_hash_update(&desc, sg, nbytes); +} + +static int hmac_final(struct hash_desc *pdesc, u8 *out) +{ + struct crypto_hash *parent = pdesc->tfm; + int bs = crypto_hash_blocksize(parent); + int ds = crypto_hash_digestsize(parent); + char *opad = crypto_hash_ctx_aligned(parent) + bs; + char *digest = opad + bs; + struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *)); + struct hash_desc desc; + struct scatterlist tmp; + + desc.tfm = ctx->child; + desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; + sg_set_buf(&tmp, opad, bs + ds); + + return unlikely(crypto_hash_final(&desc, digest)) ?: + crypto_hash_digest(&desc, &tmp, bs + ds, out); +} + +static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, + unsigned int nbytes, u8 *out) +{ + struct crypto_hash *parent = pdesc->tfm; + int bs = crypto_hash_blocksize(parent); + int ds = crypto_hash_digestsize(parent); + char *ipad = crypto_hash_ctx_aligned(parent); + char *opad = ipad + bs; + char *digest = opad + bs; + struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *)); + struct hash_desc desc; + struct scatterlist sg1[2]; + struct scatterlist sg2[1]; + + desc.tfm = ctx->child; + desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; + + sg_set_buf(sg1, ipad, bs); + sg1[1].page = (void *)sg; + sg1[1].length = 0; + sg_set_buf(sg2, opad, bs + ds); + + return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?: + crypto_hash_digest(&desc, sg2, bs + ds, out); +} + +static int hmac_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm)); + + tfm = crypto_spawn_tfm(spawn); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + ctx->child = crypto_hash_cast(tfm); + return 0; +} + +static void hmac_exit_tfm(struct crypto_tfm *tfm) +{ + struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm)); + crypto_free_hash(ctx->child); +} + +static void hmac_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_instance *hmac_alloc(void *param, unsigned int len) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + + alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_HASH, + CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC); + if (IS_ERR(alg)) + return ERR_PTR(PTR_ERR(alg)); + + inst = crypto_alloc_instance("hmac", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = CRYPTO_ALG_TYPE_HASH; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = &crypto_hash_type; + + inst->alg.cra_hash.digestsize = + (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_HASH ? alg->cra_hash.digestsize : + alg->cra_digest.dia_digestsize; + + inst->alg.cra_ctxsize = sizeof(struct hmac_ctx) + + ALIGN(inst->alg.cra_blocksize * 2 + + inst->alg.cra_hash.digestsize, + sizeof(void *)); + + inst->alg.cra_init = hmac_init_tfm; + inst->alg.cra_exit = hmac_exit_tfm; + + inst->alg.cra_hash.init = hmac_init; + inst->alg.cra_hash.update = hmac_update; + inst->alg.cra_hash.final = hmac_final; + inst->alg.cra_hash.digest = hmac_digest; + inst->alg.cra_hash.setkey = hmac_setkey; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static struct crypto_template hmac_tmpl = { + .name = "hmac", + .alloc = hmac_alloc, + .free = hmac_free, + .module = THIS_MODULE, +}; + +static int __init hmac_module_init(void) +{ + return crypto_register_template(&hmac_tmpl); +} + +static void __exit hmac_module_exit(void) +{ + crypto_unregister_template(&hmac_tmpl); +} + +module_init(hmac_module_init); +module_exit(hmac_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("HMAC hash algorithm"); From e9d41164e2fdd897fe4520c2079ea0000f6e0ec3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 19 Aug 2006 21:38:49 +1000 Subject: [PATCH 0327/1063] [CRYPTO] tcrypt: Use HMAC template and hash interface This patch converts tcrypt to use the new HMAC template rather than the hard-coded version of HMAC. It also converts all digest users to use the new cipher interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/tcrypt.c | 381 ++++++++++++++++++++++++++++-------------------- crypto/tcrypt.h | 23 +-- 2 files changed, 226 insertions(+), 178 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 5e2278069d22..840ab8be0b96 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -88,9 +88,11 @@ static void test_hash(char *algo, struct hash_testvec *template, unsigned int i, j, k, temp; struct scatterlist sg[8]; char result[64]; - struct crypto_tfm *tfm; + struct crypto_hash *tfm; + struct hash_desc desc; struct hash_testvec *hash_tv; unsigned int tsize; + int ret; printk("\ntesting %s\n", algo); @@ -104,27 +106,42 @@ static void test_hash(char *algo, struct hash_testvec *template, memcpy(tvmem, template, tsize); hash_tv = (void *)tvmem; - tfm = crypto_alloc_tfm(algo, 0); - if (tfm == NULL) { - printk("failed to load transform for %s\n", algo); + + tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + printk("failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); return; } + desc.tfm = tfm; + desc.flags = 0; + for (i = 0; i < tcount; i++) { printk("test %u:\n", i + 1); memset(result, 0, 64); sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize); - crypto_digest_init(tfm); - crypto_digest_setkey(tfm, hash_tv[i].key, hash_tv[i].ksize); - crypto_digest_update(tfm, sg, 1); - crypto_digest_final(tfm, result); + if (hash_tv[i].ksize) { + ret = crypto_hash_setkey(tfm, hash_tv[i].key, + hash_tv[i].ksize); + if (ret) { + printk("setkey() failed ret=%d\n", ret); + goto out; + } + } - hexdump(result, crypto_tfm_alg_digestsize(tfm)); + ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize, result); + if (ret) { + printk("digest () failed ret=%d\n", ret); + goto out; + } + + hexdump(result, crypto_hash_digestsize(tfm)); printk("%s\n", memcmp(result, hash_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? + crypto_hash_digestsize(tfm)) ? "fail" : "pass"); } @@ -150,104 +167,34 @@ static void test_hash(char *algo, struct hash_testvec *template, hash_tv[i].tap[k]); } - crypto_digest_digest(tfm, sg, hash_tv[i].np, result); + if (hash_tv[i].ksize) { + ret = crypto_hash_setkey(tfm, hash_tv[i].key, + hash_tv[i].ksize); - hexdump(result, crypto_tfm_alg_digestsize(tfm)); - printk("%s\n", - memcmp(result, hash_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? - "fail" : "pass"); - } - } - - crypto_free_tfm(tfm); -} - - -#ifdef CONFIG_CRYPTO_HMAC - -static void test_hmac(char *algo, struct hmac_testvec *template, - unsigned int tcount) -{ - unsigned int i, j, k, temp; - struct scatterlist sg[8]; - char result[64]; - struct crypto_tfm *tfm; - struct hmac_testvec *hmac_tv; - unsigned int tsize, klen; - - tfm = crypto_alloc_tfm(algo, 0); - if (tfm == NULL) { - printk("failed to load transform for %s\n", algo); - return; - } - - printk("\ntesting hmac_%s\n", algo); - - tsize = sizeof(struct hmac_testvec); - tsize *= tcount; - if (tsize > TVMEMSIZE) { - printk("template (%u) too big for tvmem (%u)\n", tsize, - TVMEMSIZE); - goto out; - } - - memcpy(tvmem, template, tsize); - hmac_tv = (void *)tvmem; - - for (i = 0; i < tcount; i++) { - printk("test %u:\n", i + 1); - memset(result, 0, sizeof (result)); - - klen = hmac_tv[i].ksize; - sg_set_buf(&sg[0], hmac_tv[i].plaintext, hmac_tv[i].psize); - - crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, 1, result); - - hexdump(result, crypto_tfm_alg_digestsize(tfm)); - printk("%s\n", - memcmp(result, hmac_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? "fail" : - "pass"); - } - - printk("\ntesting hmac_%s across pages\n", algo); - - memset(xbuf, 0, XBUFSIZE); - - j = 0; - for (i = 0; i < tcount; i++) { - if (hmac_tv[i].np) { - j++; - printk("test %u:\n",j); - memset(result, 0, 64); - - temp = 0; - klen = hmac_tv[i].ksize; - for (k = 0; k < hmac_tv[i].np; k++) { - memcpy(&xbuf[IDX[k]], - hmac_tv[i].plaintext + temp, - hmac_tv[i].tap[k]); - temp += hmac_tv[i].tap[k]; - sg_set_buf(&sg[k], &xbuf[IDX[k]], - hmac_tv[i].tap[k]); + if (ret) { + printk("setkey() failed ret=%d\n", ret); + goto out; + } } - crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, - hmac_tv[i].np, result); - hexdump(result, crypto_tfm_alg_digestsize(tfm)); + ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize, + result); + if (ret) { + printk("digest () failed ret=%d\n", ret); + goto out; + } + hexdump(result, crypto_hash_digestsize(tfm)); printk("%s\n", - memcmp(result, hmac_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? + memcmp(result, hash_tv[i].digest, + crypto_hash_digestsize(tfm)) ? "fail" : "pass"); } } -out: - crypto_free_tfm(tfm); -} -#endif /* CONFIG_CRYPTO_HMAC */ +out: + crypto_free_hash(tfm); +} static void test_cipher(char *algo, int enc, struct cipher_testvec *template, unsigned int tcount) @@ -570,97 +517,202 @@ out: crypto_free_blkcipher(tfm); } -static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen, - int plen, char *out, int sec) +static int test_hash_jiffies_digest(struct hash_desc *desc, char *p, int blen, + char *out, int sec) { struct scatterlist sg[1]; unsigned long start, end; - int bcount, pcount; + int bcount; + int ret; for (start = jiffies, end = start + sec * HZ, bcount = 0; time_before(jiffies, end); bcount++) { - crypto_digest_init(tfm); - for (pcount = 0; pcount < blen; pcount += plen) { - sg_set_buf(sg, p + pcount, plen); - crypto_digest_update(tfm, sg, 1); - } - /* we assume there is enough space in 'out' for the result */ - crypto_digest_final(tfm, out); + sg_set_buf(sg, p, blen); + ret = crypto_hash_digest(desc, sg, blen, out); + if (ret) + return ret; } printk("%6u opers/sec, %9lu bytes/sec\n", bcount / sec, ((long)bcount * blen) / sec); - return; + return 0; } -static void test_digest_cycles(struct crypto_tfm *tfm, char *p, int blen, - int plen, char *out) +static int test_hash_jiffies(struct hash_desc *desc, char *p, int blen, + int plen, char *out, int sec) +{ + struct scatterlist sg[1]; + unsigned long start, end; + int bcount, pcount; + int ret; + + if (plen == blen) + return test_hash_jiffies_digest(desc, p, blen, out, sec); + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + ret = crypto_hash_init(desc); + if (ret) + return ret; + for (pcount = 0; pcount < blen; pcount += plen) { + sg_set_buf(sg, p + pcount, plen); + ret = crypto_hash_update(desc, sg, plen); + if (ret) + return ret; + } + /* we assume there is enough space in 'out' for the result */ + ret = crypto_hash_final(desc, out); + if (ret) + return ret; + } + + printk("%6u opers/sec, %9lu bytes/sec\n", + bcount / sec, ((long)bcount * blen) / sec); + + return 0; +} + +static int test_hash_cycles_digest(struct hash_desc *desc, char *p, int blen, + char *out) { struct scatterlist sg[1]; unsigned long cycles = 0; - int i, pcount; + int i; + int ret; local_bh_disable(); local_irq_disable(); /* Warm-up run. */ for (i = 0; i < 4; i++) { - crypto_digest_init(tfm); - for (pcount = 0; pcount < blen; pcount += plen) { - sg_set_buf(sg, p + pcount, plen); - crypto_digest_update(tfm, sg, 1); - } - crypto_digest_final(tfm, out); + sg_set_buf(sg, p, blen); + ret = crypto_hash_digest(desc, sg, blen, out); + if (ret) + goto out; } /* The real thing. */ for (i = 0; i < 8; i++) { cycles_t start, end; - crypto_digest_init(tfm); - start = get_cycles(); - for (pcount = 0; pcount < blen; pcount += plen) { - sg_set_buf(sg, p + pcount, plen); - crypto_digest_update(tfm, sg, 1); - } - crypto_digest_final(tfm, out); + sg_set_buf(sg, p, blen); + ret = crypto_hash_digest(desc, sg, blen, out); + if (ret) + goto out; end = get_cycles(); cycles += end - start; } +out: local_irq_enable(); local_bh_enable(); + if (ret) + return ret; + printk("%6lu cycles/operation, %4lu cycles/byte\n", cycles / 8, cycles / (8 * blen)); - return; + return 0; } -static void test_digest_speed(char *algo, unsigned int sec, - struct digest_speed *speed) +static int test_hash_cycles(struct hash_desc *desc, char *p, int blen, + int plen, char *out) { - struct crypto_tfm *tfm; + struct scatterlist sg[1]; + unsigned long cycles = 0; + int i, pcount; + int ret; + + if (plen == blen) + return test_hash_cycles_digest(desc, p, blen, out); + + local_bh_disable(); + local_irq_disable(); + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + ret = crypto_hash_init(desc); + if (ret) + goto out; + for (pcount = 0; pcount < blen; pcount += plen) { + sg_set_buf(sg, p + pcount, plen); + ret = crypto_hash_update(desc, sg, plen); + if (ret) + goto out; + } + crypto_hash_final(desc, out); + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + + ret = crypto_hash_init(desc); + if (ret) + goto out; + for (pcount = 0; pcount < blen; pcount += plen) { + sg_set_buf(sg, p + pcount, plen); + ret = crypto_hash_update(desc, sg, plen); + if (ret) + goto out; + } + ret = crypto_hash_final(desc, out); + if (ret) + goto out; + + end = get_cycles(); + + cycles += end - start; + } + +out: + local_irq_enable(); + local_bh_enable(); + + if (ret) + return ret; + + printk("%6lu cycles/operation, %4lu cycles/byte\n", + cycles / 8, cycles / (8 * blen)); + + return 0; +} + +static void test_hash_speed(char *algo, unsigned int sec, + struct hash_speed *speed) +{ + struct crypto_hash *tfm; + struct hash_desc desc; char output[1024]; int i; + int ret; printk("\ntesting speed of %s\n", algo); - tfm = crypto_alloc_tfm(algo, 0); + tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC); - if (tfm == NULL) { - printk("failed to load transform for %s\n", algo); + if (IS_ERR(tfm)) { + printk("failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); return; } - if (crypto_tfm_alg_digestsize(tfm) > sizeof(output)) { + desc.tfm = tfm; + desc.flags = 0; + + if (crypto_hash_digestsize(tfm) > sizeof(output)) { printk("digestsize(%u) > outputbuffer(%zu)\n", - crypto_tfm_alg_digestsize(tfm), sizeof(output)); + crypto_hash_digestsize(tfm), sizeof(output)); goto out; } @@ -677,13 +729,20 @@ static void test_digest_speed(char *algo, unsigned int sec, memset(tvmem, 0xff, speed[i].blen); if (sec) - test_digest_jiffies(tfm, tvmem, speed[i].blen, speed[i].plen, output, sec); + ret = test_hash_jiffies(&desc, tvmem, speed[i].blen, + speed[i].plen, output, sec); else - test_digest_cycles(tfm, tvmem, speed[i].blen, speed[i].plen, output); + ret = test_hash_cycles(&desc, tvmem, speed[i].blen, + speed[i].plen, output); + + if (ret) { + printk("hashing failed ret=%d\n", ret); + break; + } } out: - crypto_free_tfm(tfm); + crypto_free_hash(tfm); } static void test_deflate(void) @@ -911,11 +970,12 @@ static void do_test(void) test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS); test_deflate(); test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS); -#ifdef CONFIG_CRYPTO_HMAC - test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); - test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); - test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS); -#endif + test_hash("hmac(md5)", hmac_md5_tv_template, + HMAC_MD5_TEST_VECTORS); + test_hash("hmac(sha1)", hmac_sha1_tv_template, + HMAC_SHA1_TEST_VECTORS); + test_hash("hmac(sha256)", hmac_sha256_tv_template, + HMAC_SHA256_TEST_VECTORS); test_hash("michael_mic", michael_mic_tv_template, MICHAEL_MIC_TEST_VECTORS); break; @@ -1106,20 +1166,21 @@ static void do_test(void) XETA_DEC_TEST_VECTORS); break; -#ifdef CONFIG_CRYPTO_HMAC case 100: - test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); + test_hash("hmac(md5)", hmac_md5_tv_template, + HMAC_MD5_TEST_VECTORS); break; case 101: - test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); + test_hash("hmac(sha1)", hmac_sha1_tv_template, + HMAC_SHA1_TEST_VECTORS); break; case 102: - test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS); + test_hash("hmac(sha256)", hmac_sha256_tv_template, + HMAC_SHA256_TEST_VECTORS); break; -#endif case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, @@ -1188,51 +1249,51 @@ static void do_test(void) /* fall through */ case 301: - test_digest_speed("md4", sec, generic_digest_speed_template); + test_hash_speed("md4", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 302: - test_digest_speed("md5", sec, generic_digest_speed_template); + test_hash_speed("md5", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 303: - test_digest_speed("sha1", sec, generic_digest_speed_template); + test_hash_speed("sha1", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 304: - test_digest_speed("sha256", sec, generic_digest_speed_template); + test_hash_speed("sha256", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 305: - test_digest_speed("sha384", sec, generic_digest_speed_template); + test_hash_speed("sha384", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 306: - test_digest_speed("sha512", sec, generic_digest_speed_template); + test_hash_speed("sha512", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 307: - test_digest_speed("wp256", sec, generic_digest_speed_template); + test_hash_speed("wp256", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 308: - test_digest_speed("wp384", sec, generic_digest_speed_template); + test_hash_speed("wp384", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 309: - test_digest_speed("wp512", sec, generic_digest_speed_template); + test_hash_speed("wp512", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 310: - test_digest_speed("tgr128", sec, generic_digest_speed_template); + test_hash_speed("tgr128", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 311: - test_digest_speed("tgr160", sec, generic_digest_speed_template); + test_hash_speed("tgr160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 312: - test_digest_speed("tgr192", sec, generic_digest_speed_template); + test_hash_speed("tgr192", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; case 399: diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index 408d5aad5864..a40c4411729e 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -36,16 +36,6 @@ struct hash_testvec { unsigned char ksize; }; -struct hmac_testvec { - char key[128]; - char plaintext[128]; - char digest[MAX_DIGEST_SIZE]; - unsigned char tap[MAX_TAP]; - unsigned char ksize; - unsigned char psize; - unsigned char np; -}; - struct cipher_testvec { char key[MAX_KEYLEN] __attribute__ ((__aligned__(4))); char iv[MAX_IVLEN]; @@ -65,7 +55,7 @@ struct cipher_speed { unsigned int blen; }; -struct digest_speed { +struct hash_speed { unsigned int blen; /* buffer length */ unsigned int plen; /* per-update length */ }; @@ -697,14 +687,13 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; -#ifdef CONFIG_CRYPTO_HMAC /* * HMAC-MD5 test vectors from RFC2202 * (These need to be fixed to not use strlen). */ #define HMAC_MD5_TEST_VECTORS 7 -static struct hmac_testvec hmac_md5_tv_template[] = +static struct hash_testvec hmac_md5_tv_template[] = { { .key = { [0 ... 15] = 0x0b }, @@ -768,7 +757,7 @@ static struct hmac_testvec hmac_md5_tv_template[] = */ #define HMAC_SHA1_TEST_VECTORS 7 -static struct hmac_testvec hmac_sha1_tv_template[] = { +static struct hash_testvec hmac_sha1_tv_template[] = { { .key = { [0 ... 19] = 0x0b }, .ksize = 20, @@ -833,7 +822,7 @@ static struct hmac_testvec hmac_sha1_tv_template[] = { */ #define HMAC_SHA256_TEST_VECTORS 10 -static struct hmac_testvec hmac_sha256_tv_template[] = { +static struct hash_testvec hmac_sha256_tv_template[] = { { .key = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, @@ -944,8 +933,6 @@ static struct hmac_testvec hmac_sha256_tv_template[] = { }, }; -#endif /* CONFIG_CRYPTO_HMAC */ - /* * DES test vectors. */ @@ -3160,7 +3147,7 @@ static struct cipher_speed des_speed_template[] = { /* * Digest speed tests */ -static struct digest_speed generic_digest_speed_template[] = { +static struct hash_speed generic_hash_speed_template[] = { { .blen = 16, .plen = 16, }, { .blen = 64, .plen = 16, }, { .blen = 64, .plen = 64, }, From 07d4ee583e21830ec5604d31f65cdc60a6eca19e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 14:24:50 +1000 Subject: [PATCH 0328/1063] [IPSEC]: Use HMAC template and hash interface This patch converts IPsec to use the new HMAC template. The names of existing simple digest algorithms may still be used to refer to their HMAC composites. The same structure can be used by other MACs such as AES-XCBC-MAC. This patch also switches from the digest interface to hash. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ah.h | 29 ++++++++++++++++++----------- include/net/esp.h | 26 +++++++++++++++----------- include/net/xfrm.h | 9 +++++---- net/ipv4/ah4.c | 36 ++++++++++++++++++++++++------------ net/ipv4/esp4.c | 36 +++++++++++++++++++++--------------- net/ipv6/ah6.c | 35 +++++++++++++++++++++++------------ net/ipv6/esp6.c | 42 ++++++++++++++++++++++++------------------ net/xfrm/xfrm_algo.c | 40 +++++++++++++++++++++++++++------------- 8 files changed, 157 insertions(+), 96 deletions(-) diff --git a/include/net/ah.h b/include/net/ah.h index 8e27c9ba8b84..8f257c159902 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -15,22 +15,29 @@ struct ah_data int icv_full_len; int icv_trunc_len; - void (*icv)(struct ah_data*, - struct sk_buff *skb, u8 *icv); - - struct crypto_tfm *tfm; + struct crypto_hash *tfm; }; -static inline void -ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data) +static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb, + u8 *auth_data) { - struct crypto_tfm *tfm = ahp->tfm; + struct hash_desc desc; + int err; + + desc.tfm = ahp->tfm; + desc.flags = 0; memset(auth_data, 0, ahp->icv_trunc_len); - crypto_hmac_init(tfm, ahp->key, &ahp->key_len); - skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update); - crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv); - memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len); + err = crypto_hash_init(&desc); + if (unlikely(err)) + goto out; + err = skb_icv_walk(skb, &desc, 0, skb->len, crypto_hash_update); + if (unlikely(err)) + goto out; + err = crypto_hash_final(&desc, ahp->work_icv); + +out: + return err; } #endif diff --git a/include/net/esp.h b/include/net/esp.h index af2ff18700c7..064366d66eea 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -35,7 +35,7 @@ struct esp_data void (*icv)(struct esp_data*, struct sk_buff *skb, int offset, int len, u8 *icv); - struct crypto_tfm *tfm; + struct crypto_hash *tfm; } auth; }; @@ -43,18 +43,22 @@ extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); -static inline void -esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset, - int len, u8 *auth_data) +static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb, + int offset, int len) { - struct crypto_tfm *tfm = esp->auth.tfm; - char *icv = esp->auth.work_icv; + struct hash_desc desc; + int err; - memset(auth_data, 0, esp->auth.icv_trunc_len); - crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len); - skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update); - crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv); - memcpy(auth_data, icv, esp->auth.icv_trunc_len); + desc.tfm = esp->auth.tfm; + desc.flags = 0; + + err = crypto_hash_init(&desc); + if (unlikely(err)) + return err; + err = skb_icv_walk(skb, &desc, offset, len, crypto_hash_update); + if (unlikely(err)) + return err; + return crypto_hash_final(&desc, esp->auth.work_icv); } #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e9114e41affc..3ecd9fa1ed4b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -984,12 +984,13 @@ extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe); -struct crypto_tfm; +struct hash_desc; struct scatterlist; -typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int); +typedef int (icv_update_fn_t)(struct hash_desc *, struct scatterlist *, + unsigned int); -extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update); +extern int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *tfm, + int offset, int len, icv_update_fn_t icv_update); static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b, int family) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 1366bc6ce6a5..2b98943e6b02 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -97,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -119,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; @@ -166,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); skb_push(skb, ihl); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { x->stats.integrity_failed++; goto out; } @@ -179,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) return 0; out: - return -EINVAL; + return err; } static void ah4_err(struct sk_buff *skb, u32 info) @@ -204,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -221,24 +230,27 @@ static int ah_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -262,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -277,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 7c63ae494742..b428489f6ccd 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -121,9 +121,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } ip_send_check(top_iph); @@ -163,15 +163,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; - - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + u8 sum[alen]; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + err = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (err) + goto out; + + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; goto out; } @@ -307,7 +308,7 @@ static void esp_destroy(struct xfrm_state *x) esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -333,22 +334,27 @@ static int esp_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { + crypto_hash_digestsize(hash)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), + crypto_hash_digestsize(hash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 9d4831bd4335..00ffa7bc6c9f 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -213,7 +213,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error_free_iph; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); err = 0; @@ -251,6 +254,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) u16 hdr_len; u16 ah_hlen; int nexthdr; + int err = -EINVAL; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -292,8 +296,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); skb_push(skb, hdr_len); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto free_out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); x->stats.integrity_failed++; goto free_out; @@ -310,7 +317,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) free_out: kfree(tmp_hdr); out: - return -EINVAL; + return err; } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -338,6 +345,7 @@ static int ah6_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -355,24 +363,27 @@ static int ah6_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -396,7 +407,7 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -411,7 +422,7 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 46a7e687948e..2ebfd281e721 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -125,9 +125,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } error: @@ -162,15 +162,16 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; + u8 sum[alen]; - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + ret = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (ret) + goto out; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; ret = -EINVAL; goto out; @@ -279,7 +280,7 @@ static void esp6_destroy(struct xfrm_state *x) esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -308,24 +309,29 @@ static int esp6_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { - printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + crypto_hash_digestsize(hash)) { + NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_hash_digestsize(hash), + aalg_desc->uinfo.auth.icv_fullbits/8); + goto error; } esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 9b03d8497fba..87918f281bb4 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -30,7 +30,8 @@ */ static struct xfrm_algo_desc aalg_list[] = { { - .name = "digest_null", + .name = "hmac(digest_null)", + .compat = "digest_null", .uinfo = { .auth = { @@ -47,7 +48,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "md5", + .name = "hmac(md5)", + .compat = "md5", .uinfo = { .auth = { @@ -64,7 +66,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha1", + .name = "hmac(sha1)", + .compat = "sha1", .uinfo = { .auth = { @@ -81,7 +84,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha256", + .name = "hmac(sha256)", + .compat = "sha256", .uinfo = { .auth = { @@ -98,7 +102,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "ripemd160", + .name = "hmac(ripemd160)", + .compat = "ripemd160", .uinfo = { .auth = { @@ -480,11 +485,12 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); /* Move to common area: it is shared with AH. */ -void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update) +int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, + int offset, int len, icv_update_fn_t icv_update) { int start = skb_headlen(skb); int i, copy = start - offset; + int err; struct scatterlist sg; /* Checksum header. */ @@ -496,10 +502,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } @@ -519,10 +527,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = frag->page_offset + offset-start; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if (!(len -= copy)) - return; + return 0; offset += copy; } start = end; @@ -540,15 +550,19 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - skb_icv_walk(list, tfm, offset-start, copy, icv_update); + err = skb_icv_walk(list, desc, offset-start, + copy, icv_update); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } start = end; } } BUG_ON(len); + return 0; } EXPORT_SYMBOL_GPL(skb_icv_walk); From 1b489e11d4df82514792f9f981f31976f8a94ddf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 15:07:14 +1000 Subject: [PATCH 0329/1063] [SCTP]: Use HMAC template and hash interface This patch converts SCTP to use the new HMAC template and hash interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 11 ----------- include/net/sctp/structs.h | 3 ++- net/sctp/endpointola.c | 2 +- net/sctp/sm_make_chunk.c | 37 ++++++++++++++++++++++++++---------- net/sctp/socket.c | 6 +++--- 6 files changed, 35 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index c51541ee0247..57166bfdf8eb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,9 +312,9 @@ enum { SCTP_MAX_GABS = 16 }; */ #if defined (CONFIG_SCTP_HMAC_MD5) -#define SCTP_COOKIE_HMAC_ALG "md5" +#define SCTP_COOKIE_HMAC_ALG "hmac(md5)" #elif defined (CONFIG_SCTP_HMAC_SHA1) -#define SCTP_COOKIE_HMAC_ALG "sha1" +#define SCTP_COOKIE_HMAC_ALG "hmac(sha1)" #else #define SCTP_COOKIE_HMAC_ALG NULL #endif diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 92eae0e0f3f1..1c1abce5f6b6 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -330,17 +330,6 @@ static inline void sctp_v6_exit(void) { return; } #endif /* #if defined(CONFIG_IPV6) */ -/* Some wrappers, in case crypto not available. */ -#if defined (CONFIG_CRYPTO_HMAC) -#define sctp_crypto_alloc_tfm crypto_alloc_tfm -#define sctp_crypto_free_tfm crypto_free_tfm -#define sctp_crypto_hmac crypto_hmac -#else -#define sctp_crypto_alloc_tfm(x...) NULL -#define sctp_crypto_free_tfm(x...) -#define sctp_crypto_hmac(x...) -#endif - /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e5aa7ff1f5b5..0412e730c765 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -87,6 +87,7 @@ struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; struct sctp_ssnmap; +struct crypto_hash; #include @@ -264,7 +265,7 @@ struct sctp_sock { struct sctp_pf *pf; /* Access to HMAC transform. */ - struct crypto_tfm *hmac; + struct crypto_hash *hmac; /* What is our base endpointer? */ struct sctp_endpoint *ep; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ffda1d680529..35c49ff2d062 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -173,7 +173,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free up the HMAC transform. */ - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + crypto_free_hash(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17b509282cf2..7745bdea7817 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1282,10 +1282,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, retval = kmalloc(*cookie_len, GFP_ATOMIC); - if (!retval) { - *cookie_len = 0; + if (!retval) goto nodata; - } /* Clear this memory since we are sending this data structure * out on the network. @@ -1321,19 +1319,29 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); if (sctp_sk(ep->base.sk)->hmac) { + struct hash_desc desc; + /* Sign the message. */ sg.page = virt_to_page(&cookie->c); sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; sg.length = bodysize; keylen = SCTP_SECRET_SIZE; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, cookie->signature); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) + goto free_cookie; } -nodata: return retval; + +free_cookie: + kfree(retval); +nodata: + *cookie_len = 0; + return NULL; } /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ @@ -1354,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; + struct hash_desc desc; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1389,17 +1398,25 @@ struct sctp_association *sctp_unpack_cookie( sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, - 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Try the previous key. */ key = (char *)ep->secret_key[ep->last_key]; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Yikes! Still bad signature! */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dab15949958e..85caf7963886 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4898,7 +4898,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) int sctp_inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; - struct crypto_tfm *tfm=NULL; + struct crypto_hash *tfm = NULL; int err = -EINVAL; if (unlikely(backlog < 0)) @@ -4911,7 +4911,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* Allocate HMAC for generating cookie. */ if (sctp_hmac_alg) { - tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0); + tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (!tfm) { err = -ENOSYS; goto out; @@ -4937,7 +4937,7 @@ out: sctp_release_sock(sk); return err; cleanup: - sctp_crypto_free_tfm(tfm); + crypto_free_hash(tfm); goto out; } From 878b9014666217555d16073764f30e825cf18d2f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 15:17:04 +1000 Subject: [PATCH 0330/1063] [CRYPTO] doc: Update documentation for hash and me This patch updates the documentation to reflect the switch from digest to hash. It also replaces notes about emailing James Morris to refer to me instead. Signed-off-by: Herbert Xu --- Documentation/crypto/api-intro.txt | 36 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt index 74dffc68ff9f..5a03a2801d67 100644 --- a/Documentation/crypto/api-intro.txt +++ b/Documentation/crypto/api-intro.txt @@ -19,15 +19,14 @@ At the lowest level are algorithms, which register dynamically with the API. 'Transforms' are user-instantiated objects, which maintain state, handle all -of the implementation logic (e.g. manipulating page vectors), provide an -abstraction to the underlying algorithms, and handle common logical -operations (e.g. cipher modes, HMAC for digests). However, at the user +of the implementation logic (e.g. manipulating page vectors) and provide an +abstraction to the underlying algorithms. However, at the user level they are very simple. Conceptually, the API layering looks like this: [transform api] (user interface) - [transform ops] (per-type logic glue e.g. cipher.c, digest.c) + [transform ops] (per-type logic glue e.g. cipher.c, compress.c) [algorithm api] (for registering algorithms) The idea is to make the user interface and algorithm registration API @@ -44,22 +43,27 @@ under development. Here's an example of how to use the API: #include + #include + #include struct scatterlist sg[2]; char result[128]; - struct crypto_tfm *tfm; + struct crypto_hash *tfm; + struct hash_desc desc; - tfm = crypto_alloc_tfm("md5", 0); - if (tfm == NULL) + tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) fail(); /* ... set up the scatterlists ... */ + + desc.tfm = tfm; + desc.flags = 0; - crypto_digest_init(tfm); - crypto_digest_update(tfm, &sg, 2); - crypto_digest_final(tfm, result); + if (crypto_hash_digest(&desc, &sg, 2, result)) + fail(); - crypto_free_tfm(tfm); + crypto_free_hash(tfm); Many real examples are available in the regression test module (tcrypt.c). @@ -126,7 +130,7 @@ might already be working on. BUGS Send bug reports to: -James Morris +Herbert Xu Cc: David S. Miller @@ -134,13 +138,14 @@ FURTHER INFORMATION For further patches and various updates, including the current TODO list, see: -http://samba.org/~jamesm/crypto/ +http://gondor.apana.org.au/~herbert/crypto/ AUTHORS James Morris David S. Miller +Herbert Xu CREDITS @@ -238,8 +243,11 @@ Anubis algorithm contributors: Tiger algorithm contributors: Aaron Grothe +VIA PadLock contributors: + Michal Ludvig + Generic scatterwalk code by Adam J. Richter Please send any credits updates or corrections to: -James Morris +Herbert Xu From 8425165dfed27945e8509c141cea245d1739e372 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 15:25:22 +1000 Subject: [PATCH 0331/1063] [CRYPTO] digest: Remove old HMAC implementation This patch removes the old HMAC implementation now that nobody uses it anymore. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/Kconfig | 2 +- crypto/digest.c | 3 +- crypto/hmac.c | 101 ----------------------------------------- crypto/internal.h | 13 ------ include/linux/crypto.h | 16 ------- 5 files changed, 2 insertions(+), 133 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index f07d9237950f..1e2f39c21180 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -33,7 +33,7 @@ config CRYPTO_MANAGER cbc(aes). config CRYPTO_HMAC - bool "HMAC support" + tristate "HMAC support" select CRYPTO_HASH help HMAC: Keyed-Hashing for Message Authentication (RFC2104). diff --git a/crypto/digest.c b/crypto/digest.c index 5873063db840..0155a94e4b15 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -191,10 +191,9 @@ int crypto_init_digest_ops(struct crypto_tfm *tfm) ops->setkey = dalg->dia_setkey ? setkey : nosetkey; ops->digestsize = dalg->dia_digestsize; - return crypto_alloc_hmac_block(tfm); + return 0; } void crypto_exit_digest_ops(struct crypto_tfm *tfm) { - crypto_free_hmac_block(tfm); } diff --git a/crypto/hmac.c b/crypto/hmac.c index eac77e294740..f403b6946047 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -29,107 +29,6 @@ struct hmac_ctx { struct crypto_hash *child; }; -static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen) -{ - struct scatterlist tmp; - - sg_set_buf(&tmp, key, keylen); - crypto_digest_digest(tfm, &tmp, 1, key); -} - -int crypto_alloc_hmac_block(struct crypto_tfm *tfm) -{ - int ret = 0; - - BUG_ON(!crypto_tfm_alg_blocksize(tfm)); - - tfm->crt_hash.hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm), - GFP_KERNEL); - if (tfm->crt_hash.hmac_block == NULL) - ret = -ENOMEM; - - return ret; - -} - -void crypto_free_hmac_block(struct crypto_tfm *tfm) -{ - kfree(tfm->crt_hash.hmac_block); -} - -void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen) -{ - unsigned int i; - struct scatterlist tmp; - char *ipad = tfm->crt_hash.hmac_block; - - if (*keylen > crypto_tfm_alg_blocksize(tfm)) { - hash_key(tfm, key, *keylen); - *keylen = crypto_tfm_alg_digestsize(tfm); - } - - memset(ipad, 0, crypto_tfm_alg_blocksize(tfm)); - memcpy(ipad, key, *keylen); - - for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++) - ipad[i] ^= 0x36; - - sg_set_buf(&tmp, ipad, crypto_tfm_alg_blocksize(tfm)); - - crypto_digest_init(tfm); - crypto_digest_update(tfm, &tmp, 1); -} - -void crypto_hmac_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) -{ - crypto_digest_update(tfm, sg, nsg); -} - -void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, - unsigned int *keylen, u8 *out) -{ - unsigned int i; - struct scatterlist tmp; - char *opad = tfm->crt_hash.hmac_block; - - if (*keylen > crypto_tfm_alg_blocksize(tfm)) { - hash_key(tfm, key, *keylen); - *keylen = crypto_tfm_alg_digestsize(tfm); - } - - crypto_digest_final(tfm, out); - - memset(opad, 0, crypto_tfm_alg_blocksize(tfm)); - memcpy(opad, key, *keylen); - - for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++) - opad[i] ^= 0x5c; - - sg_set_buf(&tmp, opad, crypto_tfm_alg_blocksize(tfm)); - - crypto_digest_init(tfm); - crypto_digest_update(tfm, &tmp, 1); - - sg_set_buf(&tmp, out, crypto_tfm_alg_digestsize(tfm)); - - crypto_digest_update(tfm, &tmp, 1); - crypto_digest_final(tfm, out); -} - -void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen, - struct scatterlist *sg, unsigned int nsg, u8 *out) -{ - crypto_hmac_init(tfm, key, keylen); - crypto_hmac_update(tfm, sg, nsg); - crypto_hmac_final(tfm, key, keylen, out); -} - -EXPORT_SYMBOL_GPL(crypto_hmac_init); -EXPORT_SYMBOL_GPL(crypto_hmac_update); -EXPORT_SYMBOL_GPL(crypto_hmac_final); -EXPORT_SYMBOL_GPL(crypto_hmac); - static inline void *align_ptr(void *p, unsigned int align) { return (void *)ALIGN((unsigned long)p, align); diff --git a/crypto/internal.h b/crypto/internal.h index 93d9b10ff914..2da6ad4f3593 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -73,19 +73,6 @@ static inline void crypto_yield(u32 flags) cond_resched(); } -#ifdef CONFIG_CRYPTO_HMAC -int crypto_alloc_hmac_block(struct crypto_tfm *tfm); -void crypto_free_hmac_block(struct crypto_tfm *tfm); -#else -static inline int crypto_alloc_hmac_block(struct crypto_tfm *tfm) -{ - return 0; -} - -static inline void crypto_free_hmac_block(struct crypto_tfm *tfm) -{ } -#endif - #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); void __exit crypto_exit_proc(void); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 40c0aab8ad4c..929fb9ad1314 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -295,9 +295,6 @@ struct hash_tfm { unsigned int nsg, u8 *out); int (*setkey)(struct crypto_hash *tfm, const u8 *key, unsigned int keylen); -#ifdef CONFIG_CRYPTO_HMAC - void *hmac_block; -#endif unsigned int digestsize; }; @@ -872,18 +869,5 @@ static inline int crypto_comp_decompress(struct crypto_tfm *tfm, return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen); } -/* - * HMAC support. - */ -#ifdef CONFIG_CRYPTO_HMAC -void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen); -void crypto_hmac_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); -void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key, - unsigned int *keylen, u8 *out); -void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen, - struct scatterlist *sg, unsigned int nsg, u8 *out); -#endif /* CONFIG_CRYPTO_HMAC */ - #endif /* _LINUX_CRYPTO_H */ From dc64ddf4918f0da52df10d83c2a5941a547c2035 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 24 Aug 2006 18:45:50 +1000 Subject: [PATCH 0332/1063] [SCSI] iscsi: Use crypto_hash interface instead of crypto_digest This patch converts ISCSI to use the new crypto_hash interface instead of crypto_digest. It's a fairly straightforward substitution. Signed-off-by: Herbert Xu --- drivers/scsi/iscsi_tcp.c | 134 +++++++++++++++++++++------------------ drivers/scsi/iscsi_tcp.h | 9 +-- 2 files changed, 78 insertions(+), 65 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 058f094f945a..66a1ae1d6982 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -26,6 +26,7 @@ * Zhenyu Wang */ +#include #include #include #include @@ -107,8 +108,11 @@ iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf, u8* crc) { struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct hash_desc desc; - crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc); + desc.tfm = tcp_conn->tx_tfm; + desc.flags = 0; + crypto_hash_digest(&desc, &buf->sg, buf->sg.length, crc); buf->sg.length += sizeof(uint32_t); } @@ -452,11 +456,14 @@ iscsi_tcp_hdr_recv(struct iscsi_conn *conn) } if (conn->hdrdgst_en) { + struct hash_desc desc; struct scatterlist sg; sg_init_one(&sg, (u8 *)hdr, sizeof(struct iscsi_hdr) + ahslen); - crypto_digest_digest(tcp_conn->rx_tfm, &sg, 1, (u8 *)&cdgst); + desc.tfm = tcp_conn->rx_tfm; + desc.flags = 0; + crypto_hash_digest(&desc, &sg, sg.length, (u8 *)&cdgst); rdgst = *(uint32_t*)((char*)hdr + sizeof(struct iscsi_hdr) + ahslen); if (cdgst != rdgst) { @@ -673,7 +680,7 @@ partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn, memcpy(&temp, sg, sizeof(struct scatterlist)); temp.offset = offset; temp.length = length; - crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1); + crypto_hash_update(&tcp_conn->data_rx_hash, &temp, length); } static void @@ -682,7 +689,7 @@ iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len) struct scatterlist tmp; sg_init_one(&tmp, buf, len); - crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1); + crypto_hash_update(&tcp_conn->data_rx_hash, &tmp, len); } static int iscsi_scsi_data_in(struct iscsi_conn *conn) @@ -736,9 +743,9 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn) if (!rc) { if (conn->datadgst_en) { if (!offset) - crypto_digest_update( - tcp_conn->data_rx_tfm, - &sg[i], 1); + crypto_hash_update( + &tcp_conn->data_rx_hash, + &sg[i], sg[i].length); else partial_sg_digest_update(tcp_conn, &sg[i], @@ -877,8 +884,7 @@ more: rc = iscsi_tcp_hdr_recv(conn); if (!rc && tcp_conn->in.datalen) { if (conn->datadgst_en) { - BUG_ON(!tcp_conn->data_rx_tfm); - crypto_digest_init(tcp_conn->data_rx_tfm); + crypto_hash_init(&tcp_conn->data_rx_hash); } tcp_conn->in_progress = IN_PROGRESS_DATA_RECV; } else if (rc) { @@ -931,11 +937,11 @@ more: tcp_conn->in.padding); memset(pad, 0, tcp_conn->in.padding); sg_init_one(&sg, pad, tcp_conn->in.padding); - crypto_digest_update(tcp_conn->data_rx_tfm, - &sg, 1); + crypto_hash_update(&tcp_conn->data_rx_hash, + &sg, sg.length); } - crypto_digest_final(tcp_conn->data_rx_tfm, - (u8 *) & tcp_conn->in.datadgst); + crypto_hash_final(&tcp_conn->data_rx_hash, + (u8 *)&tcp_conn->in.datadgst); debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; } else @@ -1181,8 +1187,7 @@ iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn, { struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; - BUG_ON(!tcp_conn->data_tx_tfm); - crypto_digest_init(tcp_conn->data_tx_tfm); + crypto_hash_init(&tcp_conn->data_tx_hash); tcp_ctask->digest_count = 4; } @@ -1196,7 +1201,7 @@ iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, int sent = 0; if (final) - crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest); + crypto_hash_final(&tcp_conn->data_tx_hash, (u8 *)digest); iscsi_buf_init_iov(buf, (char*)digest, 4); rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent); @@ -1491,16 +1496,17 @@ handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) if (rc) { tcp_ctask->xmstate |= XMSTATE_IMM_DATA; if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, - (u8*)&tcp_ctask->immdigest); + crypto_hash_final(&tcp_conn->data_tx_hash, + (u8 *)&tcp_ctask->immdigest); debug_tcp("tx imm sendpage fail 0x%x\n", tcp_ctask->datadigest); } return rc; } if (conn->datadgst_en) - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); + crypto_hash_update(&tcp_conn->data_tx_hash, + &tcp_ctask->sendbuf.sg, + tcp_ctask->sendbuf.sg.length); if (!ctask->imm_count) break; @@ -1577,8 +1583,8 @@ handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) tcp_ctask->xmstate |= XMSTATE_UNS_DATA; /* will continue with this ctask later.. */ if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, - (u8 *)&dtask->digest); + crypto_hash_final(&tcp_conn->data_tx_hash, + (u8 *)&dtask->digest); debug_tcp("tx uns data fail 0x%x\n", dtask->digest); } @@ -1593,8 +1599,9 @@ handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) * so pass it */ if (conn->datadgst_en && tcp_ctask->sent - start > 0) - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); + crypto_hash_update(&tcp_conn->data_tx_hash, + &tcp_ctask->sendbuf.sg, + tcp_ctask->sendbuf.sg.length); if (!ctask->data_count) break; @@ -1668,7 +1675,7 @@ solicit_again: tcp_ctask->xmstate |= XMSTATE_SOL_DATA; /* will continue with this ctask later.. */ if (conn->datadgst_en) { - crypto_digest_final(tcp_conn->data_tx_tfm, + crypto_hash_final(&tcp_conn->data_tx_hash, (u8 *)&dtask->digest); debug_tcp("r2t data send fail 0x%x\n", dtask->digest); } @@ -1677,8 +1684,8 @@ solicit_again: BUG_ON(r2t->data_count < 0); if (conn->datadgst_en) - crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg, - 1); + crypto_hash_update(&tcp_conn->data_tx_hash, &r2t->sendbuf.sg, + r2t->sendbuf.sg.length); if (r2t->data_count) { BUG_ON(ctask->sc->use_sg == 0); @@ -1766,8 +1773,9 @@ handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) } if (conn->datadgst_en) { - crypto_digest_update(tcp_conn->data_tx_tfm, - &tcp_ctask->sendbuf.sg, 1); + crypto_hash_update(&tcp_conn->data_tx_hash, + &tcp_ctask->sendbuf.sg, + tcp_ctask->sendbuf.sg.length); /* imm data? */ if (!dtask) { rc = iscsi_digest_final_send(conn, ctask, @@ -1963,13 +1971,13 @@ iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn) /* now free tcp_conn */ if (digest) { if (tcp_conn->tx_tfm) - crypto_free_tfm(tcp_conn->tx_tfm); + crypto_free_hash(tcp_conn->tx_tfm); if (tcp_conn->rx_tfm) - crypto_free_tfm(tcp_conn->rx_tfm); - if (tcp_conn->data_tx_tfm) - crypto_free_tfm(tcp_conn->data_tx_tfm); - if (tcp_conn->data_rx_tfm) - crypto_free_tfm(tcp_conn->data_rx_tfm); + crypto_free_hash(tcp_conn->rx_tfm); + if (tcp_conn->data_tx_hash.tfm) + crypto_free_hash(tcp_conn->data_tx_hash.tfm); + if (tcp_conn->data_rx_hash.tfm) + crypto_free_hash(tcp_conn->data_rx_hash.tfm); } kfree(tcp_conn); @@ -2130,44 +2138,48 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, if (conn->hdrdgst_en) { tcp_conn->hdr_size += sizeof(__u32); if (!tcp_conn->tx_tfm) - tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c", - 0); - if (!tcp_conn->tx_tfm) - return -ENOMEM; + tcp_conn->tx_tfm = + crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tcp_conn->tx_tfm)) + return PTR_ERR(tcp_conn->tx_tfm); if (!tcp_conn->rx_tfm) - tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c", - 0); - if (!tcp_conn->rx_tfm) { - crypto_free_tfm(tcp_conn->tx_tfm); - return -ENOMEM; + tcp_conn->rx_tfm = + crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tcp_conn->rx_tfm)) { + crypto_free_hash(tcp_conn->tx_tfm); + return PTR_ERR(tcp_conn->rx_tfm); } } else { if (tcp_conn->tx_tfm) - crypto_free_tfm(tcp_conn->tx_tfm); + crypto_free_hash(tcp_conn->tx_tfm); if (tcp_conn->rx_tfm) - crypto_free_tfm(tcp_conn->rx_tfm); + crypto_free_hash(tcp_conn->rx_tfm); } break; case ISCSI_PARAM_DATADGST_EN: iscsi_set_param(cls_conn, param, buf, buflen); if (conn->datadgst_en) { - if (!tcp_conn->data_tx_tfm) - tcp_conn->data_tx_tfm = - crypto_alloc_tfm("crc32c", 0); - if (!tcp_conn->data_tx_tfm) - return -ENOMEM; - if (!tcp_conn->data_rx_tfm) - tcp_conn->data_rx_tfm = - crypto_alloc_tfm("crc32c", 0); - if (!tcp_conn->data_rx_tfm) { - crypto_free_tfm(tcp_conn->data_tx_tfm); - return -ENOMEM; + if (!tcp_conn->data_tx_hash.tfm) + tcp_conn->data_tx_hash.tfm = + crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tcp_conn->data_tx_hash.tfm)) + return PTR_ERR(tcp_conn->data_tx_hash.tfm); + if (!tcp_conn->data_rx_hash.tfm) + tcp_conn->data_rx_hash.tfm = + crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(tcp_conn->data_rx_hash.tfm)) { + crypto_free_hash(tcp_conn->data_tx_hash.tfm); + return PTR_ERR(tcp_conn->data_rx_hash.tfm); } } else { - if (tcp_conn->data_tx_tfm) - crypto_free_tfm(tcp_conn->data_tx_tfm); - if (tcp_conn->data_rx_tfm) - crypto_free_tfm(tcp_conn->data_rx_tfm); + if (tcp_conn->data_tx_hash.tfm) + crypto_free_hash(tcp_conn->data_tx_hash.tfm); + if (tcp_conn->data_rx_hash.tfm) + crypto_free_hash(tcp_conn->data_rx_hash.tfm); } tcp_conn->sendpage = conn->datadgst_en ? sock_no_sendpage : tcp_conn->sock->ops->sendpage; diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 6a4ee704e46e..e35701305fc9 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -51,6 +51,7 @@ #define ISCSI_SG_TABLESIZE SG_ALL #define ISCSI_TCP_MAX_CMD_LEN 16 +struct crypto_hash; struct socket; /* Socket connection recieve helper */ @@ -84,8 +85,8 @@ struct iscsi_tcp_conn { /* iSCSI connection-wide sequencing */ int hdr_size; /* PDU header size */ - struct crypto_tfm *rx_tfm; /* CRC32C (Rx) */ - struct crypto_tfm *data_rx_tfm; /* CRC32C (Rx) for data */ + struct crypto_hash *rx_tfm; /* CRC32C (Rx) */ + struct hash_desc data_rx_hash; /* CRC32C (Rx) for data */ /* control data */ struct iscsi_tcp_recv in; /* TCP receive context */ @@ -97,8 +98,8 @@ struct iscsi_tcp_conn { void (*old_write_space)(struct sock *); /* xmit */ - struct crypto_tfm *tx_tfm; /* CRC32C (Tx) */ - struct crypto_tfm *data_tx_tfm; /* CRC32C (Tx) for data */ + struct crypto_hash *tx_tfm; /* CRC32C (Tx) */ + struct hash_desc data_tx_hash; /* CRC32C (Tx) for data */ /* MIB custom statistics */ uint32_t sendpage_failures_cnt; From 35058687912aa2f0b4554383cc10be4e0683b9a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 24 Aug 2006 19:10:20 +1000 Subject: [PATCH 0333/1063] [CRYPTO] users: Use crypto_hash interface instead of crypto_digest This patch converts all remaining crypto_digest users to use the new crypto_hash interface. Signed-off-by: Herbert Xu --- drivers/md/dm-crypt.c | 30 +++++++++++---------- drivers/net/ppp_mppe.c | 34 +++++++++++++++--------- fs/nfsd/nfs4recover.c | 21 ++++++++------- net/ieee80211/ieee80211_crypt_tkip.c | 25 ++++++++++-------- net/sunrpc/auth_gss/gss_krb5_crypto.c | 38 +++++++++++++++++---------- security/seclvl.c | 18 +++++++------ 6 files changed, 97 insertions(+), 69 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 91d4081cb00e..73f8be837a45 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -122,7 +122,8 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { struct crypto_cipher *essiv_tfm; - struct crypto_tfm *hash_tfm; + struct crypto_hash *hash_tfm; + struct hash_desc desc; struct scatterlist sg; unsigned int saltsize; u8 *salt; @@ -134,29 +135,30 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } /* Hash the cipher key with the given hash algorithm */ - hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP); - if (hash_tfm == NULL) { + hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(hash_tfm)) { ti->error = "Error initializing ESSIV hash"; - return -EINVAL; + return PTR_ERR(hash_tfm); } - if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) { - ti->error = "Expected digest algorithm for ESSIV hash"; - crypto_free_tfm(hash_tfm); - return -EINVAL; - } - - saltsize = crypto_tfm_alg_digestsize(hash_tfm); + saltsize = crypto_hash_digestsize(hash_tfm); salt = kmalloc(saltsize, GFP_KERNEL); if (salt == NULL) { ti->error = "Error kmallocing salt storage in ESSIV"; - crypto_free_tfm(hash_tfm); + crypto_free_hash(hash_tfm); return -ENOMEM; } sg_set_buf(&sg, cc->key, cc->key_size); - crypto_digest_digest(hash_tfm, &sg, 1, salt); - crypto_free_tfm(hash_tfm); + desc.tfm = hash_tfm; + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + err = crypto_hash_digest(&desc, &sg, cc->key_size, salt); + crypto_free_hash(hash_tfm); + + if (err) { + ti->error = "Error calculating hash in ESSIV"; + return err; + } /* Setup the essiv_tfm with the given salt */ essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index 495d8667419a..e7a0eb4fca60 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -65,12 +65,13 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); MODULE_VERSION("1.0.2"); -static void +static unsigned int setup_sg(struct scatterlist *sg, const void *address, unsigned int length) { sg[0].page = virt_to_page(address); sg[0].offset = offset_in_page(address); sg[0].length = length; + return length; } #define SHA1_PAD_SIZE 40 @@ -97,7 +98,7 @@ static inline void sha_pad_init(struct sha_pad *shapad) */ struct ppp_mppe_state { struct crypto_blkcipher *arc4; - struct crypto_tfm *sha1; + struct crypto_hash *sha1; unsigned char *sha1_digest; unsigned char master_key[MPPE_MAX_KEY_LEN]; unsigned char session_key[MPPE_MAX_KEY_LEN]; @@ -137,14 +138,21 @@ struct ppp_mppe_state { */ static void get_new_key_from_sha(struct ppp_mppe_state * state, unsigned char *InterimKey) { + struct hash_desc desc; struct scatterlist sg[4]; + unsigned int nbytes; - setup_sg(&sg[0], state->master_key, state->keylen); - setup_sg(&sg[1], sha_pad->sha_pad1, sizeof(sha_pad->sha_pad1)); - setup_sg(&sg[2], state->session_key, state->keylen); - setup_sg(&sg[3], sha_pad->sha_pad2, sizeof(sha_pad->sha_pad2)); + nbytes = setup_sg(&sg[0], state->master_key, state->keylen); + nbytes += setup_sg(&sg[1], sha_pad->sha_pad1, + sizeof(sha_pad->sha_pad1)); + nbytes += setup_sg(&sg[2], state->session_key, state->keylen); + nbytes += setup_sg(&sg[3], sha_pad->sha_pad2, + sizeof(sha_pad->sha_pad2)); - crypto_digest_digest (state->sha1, sg, 4, state->sha1_digest); + desc.tfm = state->sha1; + desc.flags = 0; + + crypto_hash_digest(&desc, sg, nbytes, state->sha1_digest); memcpy(InterimKey, state->sha1_digest, state->keylen); } @@ -204,11 +212,13 @@ static void *mppe_alloc(unsigned char *options, int optlen) goto out_free; } - state->sha1 = crypto_alloc_tfm("sha1", 0); - if (!state->sha1) + state->sha1 = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(state->sha1)) { + state->sha1 = NULL; goto out_free; + } - digestsize = crypto_tfm_alg_digestsize(state->sha1); + digestsize = crypto_hash_digestsize(state->sha1); if (digestsize < MPPE_MAX_KEY_LEN) goto out_free; @@ -233,7 +243,7 @@ static void *mppe_alloc(unsigned char *options, int optlen) if (state->sha1_digest) kfree(state->sha1_digest); if (state->sha1) - crypto_free_tfm(state->sha1); + crypto_free_hash(state->sha1); if (state->arc4) crypto_free_blkcipher(state->arc4); kfree(state); @@ -251,7 +261,7 @@ static void mppe_free(void *arg) if (state->sha1_digest) kfree(state->sha1_digest); if (state->sha1) - crypto_free_tfm(state->sha1); + crypto_free_hash(state->sha1); if (state->arc4) crypto_free_blkcipher(state->arc4); kfree(state); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 06da7506363c..e35d7e52fdeb 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -33,7 +33,7 @@ * */ - +#include #include #include #include @@ -87,34 +87,35 @@ int nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) { struct xdr_netobj cksum; - struct crypto_tfm *tfm; + struct hash_desc desc; struct scatterlist sg[1]; int status = nfserr_resource; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); - tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); - if (tfm == NULL) - goto out; - cksum.len = crypto_tfm_alg_digestsize(tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) + goto out_no_tfm; + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) goto out; - crypto_digest_init(tfm); sg[0].page = virt_to_page(clname->data); sg[0].offset = offset_in_page(clname->data); sg[0].length = clname->len; - crypto_digest_update(tfm, sg, 1); - crypto_digest_final(tfm, cksum.data); + if (crypto_hash_digest(&desc, sg, sg->length, cksum.data)) + goto out; md5_to_hex(dname, cksum.data); kfree(cksum.data); status = nfs_ok; out: - crypto_free_tfm(tfm); + crypto_free_hash(desc.tfm); +out_no_tfm: return status; } diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index d60ce9b49b4f..407a17495b61 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -54,7 +54,7 @@ struct ieee80211_tkip_data { int key_idx; struct crypto_blkcipher *tfm_arc4; - struct crypto_tfm *tfm_michael; + struct crypto_hash *tfm_michael; /* scratch buffers for virt_to_page() (crypto API) */ u8 rx_hdr[16], tx_hdr[16]; @@ -95,10 +95,12 @@ static void *ieee80211_tkip_init(int key_idx) goto fail; } - priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0); - if (priv->tfm_michael == NULL) { + priv->tfm_michael = crypto_alloc_hash("michael_mic", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm_michael)) { printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " "crypto API michael_mic\n"); + priv->tfm_michael = NULL; goto fail; } @@ -107,7 +109,7 @@ static void *ieee80211_tkip_init(int key_idx) fail: if (priv) { if (priv->tfm_michael) - crypto_free_tfm(priv->tfm_michael); + crypto_free_hash(priv->tfm_michael); if (priv->tfm_arc4) crypto_free_blkcipher(priv->tfm_arc4); kfree(priv); @@ -120,7 +122,7 @@ static void ieee80211_tkip_deinit(void *priv) { struct ieee80211_tkip_data *_priv = priv; if (_priv && _priv->tfm_michael) - crypto_free_tfm(_priv->tfm_michael); + crypto_free_hash(_priv->tfm_michael); if (_priv && _priv->tfm_arc4) crypto_free_blkcipher(_priv->tfm_arc4); kfree(priv); @@ -485,6 +487,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, u8 * data, size_t data_len, u8 * mic) { + struct hash_desc desc; struct scatterlist sg[2]; if (tkey->tfm_michael == NULL) { @@ -499,12 +502,12 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, sg[1].offset = offset_in_page(data); sg[1].length = data_len; - crypto_digest_init(tkey->tfm_michael); - crypto_digest_setkey(tkey->tfm_michael, key, 8); - crypto_digest_update(tkey->tfm_michael, sg, 2); - crypto_digest_final(tkey->tfm_michael, mic); + if (crypto_hash_setkey(tkey->tfm_michael, key, 8)) + return -1; - return 0; + desc.tfm = tkey->tfm_michael; + desc.flags = 0; + return crypto_hash_digest(&desc, sg, data_len + 16, mic); } static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) @@ -628,7 +631,7 @@ static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) { struct ieee80211_tkip_data *tkey = priv; int keyidx; - struct crypto_tfm *tfm = tkey->tfm_michael; + struct crypto_hash *tfm = tkey->tfm_michael; struct crypto_blkcipher *tfm2 = tkey->tfm_arc4; keyidx = tkey->key_idx; diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 57192dfe3065..e11a40b25cce 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -34,6 +34,7 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ +#include #include #include #include @@ -199,11 +200,9 @@ out: static int checksummer(struct scatterlist *sg, void *data) { - struct crypto_tfm *tfm = (struct crypto_tfm *)data; + struct hash_desc *desc = data; - crypto_digest_update(tfm, sg, 1); - - return 0; + return crypto_hash_update(desc, sg, sg->length); } /* checksum the plaintext data and hdrlen bytes of the token header */ @@ -212,8 +211,9 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum) { char *cksumname; - struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ + struct hash_desc desc; /* XXX add to ctx? */ struct scatterlist sg[1]; + int err; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -224,18 +224,28 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, " unsupported checksum %d", cksumtype); return GSS_S_FAILURE; } - if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP))) + desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) return GSS_S_FAILURE; - cksum->len = crypto_tfm_alg_digestsize(tfm); + cksum->len = crypto_hash_digestsize(desc.tfm); + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - crypto_digest_init(tfm); + err = crypto_hash_init(&desc); + if (err) + goto out; sg_set_buf(sg, header, hdrlen); - crypto_digest_update(tfm, sg, 1); - process_xdr_buf(body, body_offset, body->len - body_offset, - checksummer, tfm); - crypto_digest_final(tfm, cksum->data); - crypto_free_tfm(tfm); - return 0; + err = crypto_hash_update(&desc, sg, hdrlen); + if (err) + goto out; + err = process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, &desc); + if (err) + goto out; + err = crypto_hash_final(&desc, cksum->data); + +out: + crypto_free_hash(desc.tfm); + return err ? GSS_S_FAILURE : 0; } EXPORT_SYMBOL(make_checksum); diff --git a/security/seclvl.c b/security/seclvl.c index c26dd7de0471..8f6291991fbc 100644 --- a/security/seclvl.c +++ b/security/seclvl.c @@ -16,6 +16,7 @@ * (at your option) any later version. */ +#include #include #include #include @@ -197,26 +198,27 @@ static unsigned char hashedPassword[SHA1_DIGEST_SIZE]; static int plaintext_to_sha1(unsigned char *hash, const char *plaintext, unsigned int len) { - struct crypto_tfm *tfm; + struct hash_desc desc; struct scatterlist sg; + int err; + if (len > PAGE_SIZE) { seclvl_printk(0, KERN_ERR, "Plaintext password too large (%d " "characters). Largest possible is %lu " "bytes.\n", len, PAGE_SIZE); return -EINVAL; } - tfm = crypto_alloc_tfm("sha1", CRYPTO_TFM_REQ_MAY_SLEEP); - if (tfm == NULL) { + desc.tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) { seclvl_printk(0, KERN_ERR, "Failed to load transform for SHA1\n"); return -EINVAL; } sg_init_one(&sg, (u8 *)plaintext, len); - crypto_digest_init(tfm); - crypto_digest_update(tfm, &sg, 1); - crypto_digest_final(tfm, hash); - crypto_free_tfm(tfm); - return 0; + desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + err = crypto_hash_digest(&desc, &sg, len, hash); + crypto_free_hash(desc.tfm); + return err; } /** From fce32d70ba834129b164c40c2d4260e5a7a7d850 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 17:35:45 +1000 Subject: [PATCH 0334/1063] [CRYPTO] api: Add crypto_comp and crypto_has_* This patch adds the crypto_comp type to complete the compile-time checking conversion. The functions crypto_has_alg and crypto_has_cipher, etc. are also added to replace crypto_alg_available. Signed-off-by: Herbert Xu --- crypto/api.c | 14 +++++++ include/linux/crypto.h | 90 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 98 insertions(+), 6 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index edaa843d8e83..2e84d4b54790 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -482,3 +482,17 @@ int crypto_alg_available(const char *name, u32 flags) EXPORT_SYMBOL_GPL(crypto_alloc_tfm); EXPORT_SYMBOL_GPL(crypto_free_tfm); EXPORT_SYMBOL_GPL(crypto_alg_available); + +int crypto_has_alg(const char *name, u32 type, u32 mask) +{ + int ret = 0; + struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask); + + if (!IS_ERR(alg)) { + crypto_mod_put(alg); + ret = 1; + } + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_has_alg); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 929fb9ad1314..cf91c4c0638b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -236,11 +236,17 @@ int crypto_unregister_alg(struct crypto_alg *alg); */ #ifdef CONFIG_CRYPTO int crypto_alg_available(const char *name, u32 flags); +int crypto_has_alg(const char *name, u32 type, u32 mask); #else static inline int crypto_alg_available(const char *name, u32 flags) { return 0; } + +static inline int crypto_has_alg(const char *name, u32 type, u32 mask) +{ + return 0; +} #endif /* @@ -329,6 +335,7 @@ struct crypto_tfm { }; #define crypto_cipher crypto_tfm +#define crypto_comp crypto_tfm struct crypto_blkcipher { struct crypto_tfm base; @@ -485,6 +492,15 @@ static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) crypto_free_tfm(crypto_blkcipher_tfm(tfm)); } +static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) { return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); @@ -620,6 +636,15 @@ static inline void crypto_free_cipher(struct crypto_cipher *tfm) crypto_free_tfm(crypto_cipher_tfm(tfm)); } +static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) { return &crypto_cipher_tfm(tfm)->crt_cipher; @@ -718,6 +743,15 @@ static inline void crypto_free_hash(struct crypto_hash *tfm) crypto_free_tfm(crypto_hash_tfm(tfm)); } +static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_HASH; + mask |= CRYPTO_ALG_TYPE_HASH_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm) { return &crypto_hash_tfm(tfm)->crt_hash; @@ -853,20 +887,64 @@ static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm, memcpy(dst, tfm->crt_cipher.cit_iv, len); } -static inline int crypto_comp_compress(struct crypto_tfm *tfm, +static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_comp *)tfm; +} + +static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm) +{ + BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) & + CRYPTO_ALG_TYPE_MASK); + return __crypto_comp_cast(tfm); +} + +static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_COMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm) +{ + return tfm; +} + +static inline void crypto_free_comp(struct crypto_comp *tfm) +{ + crypto_free_tfm(crypto_comp_tfm(tfm)); +} + +static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_COMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) +{ + return &crypto_comp_tfm(tfm)->crt_compress; +} + +static inline int crypto_comp_compress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS); - return tfm->crt_compress.cot_compress(tfm, src, slen, dst, dlen); + return crypto_comp_crt(tfm)->cot_compress(tfm, src, slen, dst, dlen); } -static inline int crypto_comp_decompress(struct crypto_tfm *tfm, +static inline int crypto_comp_decompress(struct crypto_comp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS); - return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen); + return crypto_comp_crt(tfm)->cot_decompress(tfm, src, slen, dst, dlen); } #endif /* _LINUX_CRYPTO_H */ From e4d5b79c661c7cfca9d8d5afd040a295f128d3cb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:12:40 +1000 Subject: [PATCH 0335/1063] [CRYPTO] users: Use crypto_comp and crypto_has_* This patch converts all users to use the new crypto_comp type and the crypto_has_* functions. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 ++++---- drivers/crypto/padlock.c | 6 +++--- drivers/net/ppp_mppe.c | 4 ++-- include/linux/crypto.h | 5 +++++ include/net/ipcomp.h | 5 ++--- net/ipv4/ipcomp.c | 25 +++++++++++++------------ net/ipv6/ipcomp6.c | 25 +++++++++++++------------ net/xfrm/xfrm_algo.c | 27 ++++++++++++++++++--------- 8 files changed, 60 insertions(+), 45 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 840ab8be0b96..83307420d31c 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -749,7 +749,7 @@ static void test_deflate(void) { unsigned int i; char result[COMP_BUF_SIZE]; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; struct comp_testvec *tv; unsigned int tsize; @@ -821,7 +821,7 @@ static void test_deflate(void) ilen, dlen); } out: - crypto_free_tfm(tfm); + crypto_free_comp(tfm); } static void test_available(void) @@ -830,8 +830,8 @@ static void test_available(void) while (*name) { printk("alg %s ", *name); - printk((crypto_alg_available(*name, 0)) ? - "found\n" : "not found\n"); + printk(crypto_has_alg(*name, 0, CRYPTO_ALG_ASYNC) ? + "found\n" : "not found\n"); name++; } } diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c index ce581684f4b4..d6d7dd5bb98c 100644 --- a/drivers/crypto/padlock.c +++ b/drivers/crypto/padlock.c @@ -26,13 +26,13 @@ static int __init padlock_init(void) { int success = 0; - if (crypto_alg_available("aes-padlock", 0)) + if (crypto_has_cipher("aes-padlock", 0, 0)) success++; - if (crypto_alg_available("sha1-padlock", 0)) + if (crypto_has_hash("sha1-padlock", 0, 0)) success++; - if (crypto_alg_available("sha256-padlock", 0)) + if (crypto_has_hash("sha256-padlock", 0, 0)) success++; if (!success) { diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index e7a0eb4fca60..f3655fd772f5 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -710,8 +710,8 @@ static struct compressor ppp_mppe = { static int __init ppp_mppe_init(void) { int answer; - if (!(crypto_alg_available("ecb(arc4)", 0) && - crypto_alg_available("sha1", 0))) + if (!(crypto_has_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) && + crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC))) return -ENODEV; sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cf91c4c0638b..d4f9948b64b1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -928,6 +928,11 @@ static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) return crypto_has_alg(alg_name, type, mask); } +static inline const char *crypto_comp_name(struct crypto_comp *tfm) +{ + return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); +} + static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) { return &crypto_comp_tfm(tfm)->crt_compress; diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index b94e3047b4d9..87c1af3e5e82 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,15 +1,14 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include #include #define IPCOMP_SCRATCH_SIZE 65400 -struct crypto_tfm; - struct ipcomp_data { u16 threshold; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; }; #endif diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a0c28b2b756e..5bb9c9f03fb6 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -32,7 +32,7 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -46,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) int err, plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; plen = skb->len; @@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; ihlen = iph->ihl * 4; @@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_tfm **tfms) +static void ipcomp_free_tfms(struct crypto_comp **tfms) { struct ipcomp_tfms *pos; int cpu; @@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7e4d1c17bfbc..a81e9e9d93bd 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ struct ipcomp6_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -70,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; if (skb_linearize_cow(skb)) @@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; hdr_len = skb->h.raw - skb->data; @@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void) return scratches; } -static void ipcomp6_free_tfms(struct crypto_tfm **tfms) +static void ipcomp6_free_tfms(struct crypto_comp **tfms) { struct ipcomp6_tfms *pos; int cpu; @@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) { struct ipcomp6_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp6_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 87918f281bb4..5a0dbeb6bbe8 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -363,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, char *name, - int probe) + int entries, u32 type, u32 mask, + char *name, int probe) { int i, status; @@ -382,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_alg_available(name, 0); + status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC); if (!status) break; @@ -394,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) { - return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); + return xfrm_get_byname(aalg_list, aalg_entries(), + CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); + return xfrm_get_byname(ealg_list, ealg_entries(), + CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), name, probe); + return xfrm_get_byname(calg_list, calg_entries(), + CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); @@ -441,19 +447,22 @@ void xfrm_probe_algs(void) BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); + status = crypto_has_hash(aalg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (aalg_list[i].available != status) aalg_list[i].available = status; } for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); + status = crypto_has_blkcipher(ealg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (ealg_list[i].available != status) ealg_list[i].available = status; } for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); + status = crypto_has_comp(calg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (calg_list[i].available != status) calg_list[i].available = status; } From 6010439f47e6b308c031dad7d99686030ef942dd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:34:10 +1000 Subject: [PATCH 0336/1063] [CRYPTO] padlock: Convert padlock-sha to use crypto_hash This patch converts padlock-sha to use crypto_hash for its fallback. It also changes the fallback selection to use selection by type instead of name. This is done through the new CRYPTO_ALG_NEED_FALLBACK bit, which is set if and only if an algorithm needs a fallback of the same type. Signed-off-by: Herbert Xu --- drivers/crypto/padlock-sha.c | 89 ++++++++++++++---------------------- include/linux/crypto.h | 6 +++ 2 files changed, 40 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index b028db61c301..a781fd23b607 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -12,10 +12,11 @@ * */ +#include +#include #include #include #include -#include #include #include #include @@ -30,28 +31,17 @@ #define SHA256_DIGEST_SIZE 32 #define SHA256_HMAC_BLOCK_SIZE 64 -static char *sha1_fallback = SHA1_DEFAULT_FALLBACK; -static char *sha256_fallback = SHA256_DEFAULT_FALLBACK; - -module_param(sha1_fallback, charp, 0644); -module_param(sha256_fallback, charp, 0644); - -MODULE_PARM_DESC(sha1_fallback, "Fallback driver for SHA1. Default is " - SHA1_DEFAULT_FALLBACK); -MODULE_PARM_DESC(sha256_fallback, "Fallback driver for SHA256. Default is " - SHA256_DEFAULT_FALLBACK); - struct padlock_sha_ctx { char *data; size_t used; int bypass; void (*f_sha_padlock)(const char *in, char *out, int count); - struct crypto_tfm *fallback_tfm; + struct hash_desc fallback; }; static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm) { - return (struct padlock_sha_ctx *)(crypto_tfm_ctx(tfm)); + return crypto_tfm_ctx(tfm); } /* We'll need aligned address on the stack */ @@ -65,14 +55,12 @@ static void padlock_sha_bypass(struct crypto_tfm *tfm) if (ctx(tfm)->bypass) return; - BUG_ON(!ctx(tfm)->fallback_tfm); - - crypto_digest_init(ctx(tfm)->fallback_tfm); + crypto_hash_init(&ctx(tfm)->fallback); if (ctx(tfm)->data && ctx(tfm)->used) { struct scatterlist sg; sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used); - crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + crypto_hash_update(&ctx(tfm)->fallback, &sg, sg.length); } ctx(tfm)->used = 0; @@ -95,9 +83,8 @@ static void padlock_sha_update(struct crypto_tfm *tfm, if (unlikely(ctx(tfm)->bypass)) { struct scatterlist sg; - BUG_ON(!ctx(tfm)->fallback_tfm); sg_set_buf(&sg, (uint8_t *)data, length); - crypto_digest_update(ctx(tfm)->fallback_tfm, &sg, 1); + crypto_hash_update(&ctx(tfm)->fallback, &sg, length); return; } @@ -160,8 +147,7 @@ static void padlock_do_sha256(const char *in, char *out, int count) static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out) { if (unlikely(ctx(tfm)->bypass)) { - BUG_ON(!ctx(tfm)->fallback_tfm); - crypto_digest_final(ctx(tfm)->fallback_tfm, out); + crypto_hash_final(&ctx(tfm)->fallback, out); ctx(tfm)->bypass = 0; return; } @@ -172,8 +158,11 @@ static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out) ctx(tfm)->used = 0; } -static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_name) +static int padlock_cra_init(struct crypto_tfm *tfm) { + const char *fallback_driver_name = tfm->__crt_alg->cra_name; + struct crypto_hash *fallback_tfm; + /* For now we'll allocate one page. This * could eventually be configurable one day. */ ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL); @@ -181,14 +170,17 @@ static int padlock_cra_init(struct crypto_tfm *tfm, const char *fallback_driver_ return -ENOMEM; /* Allocate a fallback and abort if it failed. */ - ctx(tfm)->fallback_tfm = crypto_alloc_tfm(fallback_driver_name, 0); - if (!ctx(tfm)->fallback_tfm) { + fallback_tfm = crypto_alloc_hash(fallback_driver_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n", fallback_driver_name); free_page((unsigned long)(ctx(tfm)->data)); - return -ENOENT; + return PTR_ERR(fallback_tfm); } + ctx(tfm)->fallback.tfm = fallback_tfm; return 0; } @@ -196,14 +188,14 @@ static int padlock_sha1_cra_init(struct crypto_tfm *tfm) { ctx(tfm)->f_sha_padlock = padlock_do_sha1; - return padlock_cra_init(tfm, sha1_fallback); + return padlock_cra_init(tfm); } static int padlock_sha256_cra_init(struct crypto_tfm *tfm) { ctx(tfm)->f_sha_padlock = padlock_do_sha256; - return padlock_cra_init(tfm, sha256_fallback); + return padlock_cra_init(tfm); } static void padlock_cra_exit(struct crypto_tfm *tfm) @@ -213,16 +205,16 @@ static void padlock_cra_exit(struct crypto_tfm *tfm) ctx(tfm)->data = NULL; } - BUG_ON(!ctx(tfm)->fallback_tfm); - crypto_free_tfm(ctx(tfm)->fallback_tfm); - ctx(tfm)->fallback_tfm = NULL; + crypto_free_hash(ctx(tfm)->fallback.tfm); + ctx(tfm)->fallback.tfm = NULL; } static struct crypto_alg sha1_alg = { .cra_name = "sha1", .cra_driver_name = "sha1-padlock", .cra_priority = PADLOCK_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA1_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct padlock_sha_ctx), .cra_module = THIS_MODULE, @@ -243,7 +235,8 @@ static struct crypto_alg sha256_alg = { .cra_name = "sha256", .cra_driver_name = "sha256-padlock", .cra_priority = PADLOCK_CRA_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_flags = CRYPTO_ALG_TYPE_DIGEST | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA256_HMAC_BLOCK_SIZE, .cra_ctxsize = sizeof(struct padlock_sha_ctx), .cra_module = THIS_MODULE, @@ -262,29 +255,15 @@ static struct crypto_alg sha256_alg = { static void __init padlock_sha_check_fallbacks(void) { - struct crypto_tfm *tfm; + if (!crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK)) + printk(KERN_WARNING PFX + "Couldn't load fallback module for sha1.\n"); - /* We'll try to allocate one TFM for each fallback - * to test that the modules are available. */ - tfm = crypto_alloc_tfm(sha1_fallback, 0); - if (!tfm) { - printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", - sha1_alg.cra_name, sha1_fallback); - } else { - printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha1_alg.cra_name, - crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); - crypto_free_tfm(tfm); - } - - tfm = crypto_alloc_tfm(sha256_fallback, 0); - if (!tfm) { - printk(KERN_WARNING PFX "Couldn't load fallback module for '%s'. Tried '%s'.\n", - sha256_alg.cra_name, sha256_fallback); - } else { - printk(KERN_NOTICE PFX "Fallback for '%s' is driver '%s' (prio=%d)\n", sha256_alg.cra_name, - crypto_tfm_alg_driver_name(tfm), crypto_tfm_alg_priority(tfm)); - crypto_free_tfm(tfm); - } + if (!crypto_has_hash("sha256", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK)) + printk(KERN_WARNING PFX + "Couldn't load fallback module for sha256.\n"); } static int __init padlock_init(void) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index d4f9948b64b1..187c6ea91959 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -42,6 +42,12 @@ #define CRYPTO_ALG_DYING 0x00000040 #define CRYPTO_ALG_ASYNC 0x00000080 +/* + * Set this bit if and only if the algorithm requires another algorithm of + * the same type to handle corner cases. + */ +#define CRYPTO_ALG_NEED_FALLBACK 0x00000100 + /* * Transform masks and values (for crt_flags). */ From 3ad819c61f5f8347f39cdcbe652b3c60ec615888 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:44:31 +1000 Subject: [PATCH 0337/1063] [CRYPTO] api: Deprecate crypto_digest_* and crypto_alg_available This patch marks the crypto_digest_* functions and crypto_alg_available as deprecated. They've been replaced by crypto_hash_* and crypto_has_* respectively. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 187c6ea91959..8f2ffa4caabf 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -241,9 +241,12 @@ int crypto_unregister_alg(struct crypto_alg *alg); * Algorithm query interface. */ #ifdef CONFIG_CRYPTO -int crypto_alg_available(const char *name, u32 flags); +int crypto_alg_available(const char *name, u32 flags) + __deprecated_for_modules; int crypto_has_alg(const char *name, u32 type, u32 mask); #else +static int crypto_alg_available(const char *name, u32 flags); + __deprecated_for_modules; static inline int crypto_alg_available(const char *name, u32 flags) { return 0; @@ -704,12 +707,15 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -void crypto_digest_init(struct crypto_tfm *tfm); +void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules; void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg); -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out); + struct scatterlist *sg, unsigned int nsg) + __deprecated_for_modules; +void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) + __deprecated_for_modules; void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out); + struct scatterlist *sg, unsigned int nsg, u8 *out) + __deprecated_for_modules; static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { @@ -723,6 +729,8 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) return __crypto_hash_cast(tfm); } +static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) __deprecated; static inline int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { From 3c164bd8153c4644a22dc2101b003c67cd2a0d0a Mon Sep 17 00:00:00 2001 From: Rik Snel Date: Sat, 2 Sep 2006 18:17:33 +1000 Subject: [PATCH 0338/1063] [BLOCK] dm-crypt: trivial comment improvements Just some minor comment nits. - little-endian is better than low-endian - and since it is called essiv everywere it should also be essiv in the comments (and not ess_iv) Signed-off-by: Rik Snel Signed-off-by: Herbert Xu --- drivers/md/dm-crypt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 73f8be837a45..bdbd34993a80 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -99,12 +99,12 @@ static kmem_cache_t *_crypt_io_pool; /* * Different IV generation algorithms: * - * plain: the initial vector is the 32-bit low-endian version of the sector + * plain: the initial vector is the 32-bit little-endian version of the sector * number, padded with zeros if neccessary. * - * ess_iv: "encrypted sector|salt initial vector", the sector number is - * encrypted with the bulk cipher using a salt as key. The salt - * should be derived from the bulk cipher's key via hashing. + * essiv: "encrypted sector|salt initial vector", the sector number is + * encrypted with the bulk cipher using a salt as key. The salt + * should be derived from the bulk cipher's key via hashing. * * plumb: unimplemented, see: * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 From 8ce90907ea534f10075a9eba5f83d6dd77b39cb6 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 21 Sep 2006 00:06:21 -0400 Subject: [PATCH 0339/1063] [netdrvr] lp486e: fix typo inside #if 0'd code, but it bugged me. Really, we should probably just delete the driver. Signed-off-by: Jeff Garzik --- drivers/net/lp486e.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c index b783a6984abc..393aba95cf12 100644 --- a/drivers/net/lp486e.c +++ b/drivers/net/lp486e.c @@ -442,16 +442,16 @@ init_rx_bufs(struct net_device *dev, int num) { if (rbd) { rbd->pad = 0; rbd->count = 0; - rbd->skb = dev_alloc_skb(RX_SKB_SIZE); + rbd->skb = dev_alloc_skb(RX_SKBSIZE); if (!rbd->skb) { printk("dev_alloc_skb failed"); } rbd->next = rfd->rbd; if (i) { rfd->rbd->prev = rbd; - rbd->size = RX_SKB_SIZE; + rbd->size = RX_SKBSIZE; } else { - rbd->size = (RX_SKB_SIZE | RBD_EL); + rbd->size = (RX_SKBSIZE | RBD_EL); lp->rbd_tail = rbd; } From 54caf44da31995df1f51174468fd9e83ca5c67a2 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 21 Sep 2006 00:08:10 -0400 Subject: [PATCH 0340/1063] [netdrvr] mv643xx_eth: fix obvious typo, which caused build breakage The last minute fix submitted by the author fixed a bug, but broke the driver build. Noticed by Al Viro, since I can't build on said platform. Signed-off-by: Jeff Garzik --- drivers/net/mv643xx_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index eeab1df5bef3..59de3e74d2d7 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -385,7 +385,7 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget) struct pkt_info pkt_info; while (budget-- > 0 && eth_port_receive(mp, &pkt_info) == ETH_OK) { - dma_unmap_single(NULL, pkt_info.buf_ptr, RX_SKB_SIZE, + dma_unmap_single(NULL, pkt_info.buf_ptr, ETH_RX_SKB_SIZE, DMA_FROM_DEVICE); mp->rx_desc_count--; received_packets++; From 2fe87f02a04ad6e7075023a87fe38eb458a4bb9d Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 21 Sep 2006 07:02:52 +0000 Subject: [PATCH 0341/1063] [CIFS] Support deep tree mounts (e.g. mounts to //server/share/path) Samba bugzilla #4040 Signed-off-by: Steve French --- fs/cifs/CHANGES | 7 ++++++- fs/cifs/cifs_fs_sb.h | 2 ++ fs/cifs/cifsfs.h | 2 +- fs/cifs/cifspdu.h | 2 ++ fs/cifs/connect.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/dir.c | 23 ++++++++++++++-------- fs/cifs/xattr.c | 2 +- 7 files changed, 74 insertions(+), 11 deletions(-) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 0feb3bd49cb8..1eb9a2ec0a3b 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,7 @@ +Version 1.46 +------------ +Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps. + Version 1.45 ------------ Do not time out lockw calls when using posix extensions. Do not @@ -6,7 +10,8 @@ on requests on other threads. Improve POSIX locking emulation, (lock cancel now works, and unlock of merged range works even to Windows servers now). Fix oops on mount to lanman servers (win9x, os/2 etc.) when null password. Do not send listxattr -(SMB to query all EAs) if nouser_xattr specified. +(SMB to query all EAs) if nouser_xattr specified. Fix SE Linux +problem (instantiate inodes/dentries in right order for readdir). Version 1.44 ------------ diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index ad58eb0c4d6d..fd1e52ebcee6 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -40,5 +40,7 @@ struct cifs_sb_info { mode_t mnt_file_mode; mode_t mnt_dir_mode; int mnt_cifs_flags; + int prepathlen; + char * prepath; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 39ee8ef3bdeb..bea875d9a46a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.45" +#define CIFS_VERSION "1.46" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 86239023545b..81df2bf8e75a 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1344,6 +1344,7 @@ struct smb_t2_rsp { #define SMB_QUERY_ATTR_FLAGS 0x206 /* append,immutable etc. */ #define SMB_QUERY_POSIX_PERMISSION 0x207 #define SMB_QUERY_POSIX_LOCK 0x208 +/* #define SMB_POSIX_OPEN 0x209 */ #define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee #define SMB_QUERY_FILE_ACCESS_INFO 0x3f0 #define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */ @@ -1363,6 +1364,7 @@ struct smb_t2_rsp { #define SMB_SET_XATTR 0x205 #define SMB_SET_ATTR_FLAGS 0x206 /* append, immutable etc. */ #define SMB_SET_POSIX_LOCK 0x208 +#define SMB_POSIX_OPEN 0x209 #define SMB_SET_FILE_BASIC_INFO2 0x3ec #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */ #define SMB_FILE_ALL_INFO2 0x3fa diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5d394c726860..0e9ba0b9d71e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -89,6 +89,7 @@ struct smb_vol { unsigned int wsize; unsigned int sockopt; unsigned short int port; + char * prepath; }; static int ipv4_connect(struct sockaddr_in *psin_server, @@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) printk(KERN_WARNING "CIFS: domain name too long\n"); return 1; } + } else if (strnicmp(data, "prefixpath", 10) == 0) { + if (!value || !*value) { + printk(KERN_WARNING + "CIFS: invalid path prefix\n"); + return 1; /* needs_arg; */ + } + if ((temp_len = strnlen(value, 1024)) < 1024) { + if(value[0] != '/') + temp_len++; /* missing leading slash */ + vol->prepath = kmalloc(temp_len+1,GFP_KERNEL); + if(vol->prepath == NULL) + return 1; + if(value[0] != '/') { + vol->prepath[0] = '/'; + strcpy(vol->prepath+1,value); + } else + strcpy(vol->prepath,value); + cFYI(1,("prefix path %s",vol->prepath)); + } else { + printk(KERN_WARNING "CIFS: prefix too long\n"); + return 1; + } } else if (strnicmp(data, "iocharset", 9) == 0) { if (!value || !*value) { printk(KERN_WARNING "CIFS: invalid iocharset specified\n"); @@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } @@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, locations such as env variables and files on disk */ kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } @@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* we failed translating address */ kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } @@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cERROR(1,("Connecting to DFS root not implemented yet")); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } else /* which servers DFS root would we conect to */ { @@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified")); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } @@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset)); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -ELIBACC; } @@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, else { kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return -EINVAL; } @@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return rc; } @@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return rc; } else { @@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); kfree(volume_info.UNC); kfree(volume_info.password); + kfree(volume_info.prepath); FreeXid(xid); return rc; } @@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* Windows ME may prefer this */ cFYI(1,("readsize set to minimum 2048")); } + /* calculate prepath */ + cifs_sb->prepath = volume_info.prepath; + if(cifs_sb->prepath) { + cifs_sb->prepathlen = strlen(cifs_sb->prepath); + cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb); + volume_info.prepath = NULL; + } else + cifs_sb->prepathlen = 0; cifs_sb->mnt_uid = volume_info.linux_uid; cifs_sb->mnt_gid = volume_info.linux_gid; cifs_sb->mnt_file_mode = volume_info.file_mode; @@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, the password ptr is put in the new session structure (in which case the password will be freed at unmount time) */ kfree(volume_info.UNC); + kfree(volume_info.prepath); FreeXid(xid); return rc; } @@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) int xid; struct cifsSesInfo *ses = NULL; struct task_struct *cifsd_task; + char * tmp; xid = GetXid(); @@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) } cifs_sb->tcon = NULL; + tmp = cifs_sb->prepath; + cifs_sb->prepathlen = 0; + cifs_sb->prepath = NULL; + kfree(tmp); if (ses) schedule_timeout_interruptible(msecs_to_jiffies(500)); if (ses) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 914239d53634..66b825ade3e1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -46,7 +46,8 @@ char * build_path_from_dentry(struct dentry *direntry) { struct dentry *temp; - int namelen = 0; + int namelen; + int pplen; char *full_path; char dirsep; @@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry) when the server crashed */ dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); + pplen = CIFS_SB(direntry->d_sb)->prepathlen; cifs_bp_rename_retry: + namelen = pplen; for (temp = direntry; !IS_ROOT(temp);) { namelen += (1 + temp->d_name.len); temp = temp->d_parent; @@ -70,7 +73,6 @@ cifs_bp_rename_retry: if(full_path == NULL) return full_path; full_path[namelen] = 0; /* trailing null */ - for (temp = direntry; !IS_ROOT(temp);) { namelen -= 1 + temp->d_name.len; if (namelen < 0) { @@ -79,7 +81,7 @@ cifs_bp_rename_retry: full_path[namelen] = dirsep; strncpy(full_path + namelen + 1, temp->d_name.name, temp->d_name.len); - cFYI(0, (" name: %s ", full_path + namelen)); + cFYI(0, ("name: %s", full_path + namelen)); } temp = temp->d_parent; if(temp == NULL) { @@ -88,18 +90,23 @@ cifs_bp_rename_retry: return NULL; } } - if (namelen != 0) { + if (namelen != pplen) { cERROR(1, - ("We did not end path lookup where we expected namelen is %d", + ("did not end path lookup where expected namelen is %d", namelen)); - /* presumably this is only possible if we were racing with a rename + /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ kfree(full_path); - namelen = 0; goto cifs_bp_rename_retry; } - + /* DIR_SEP already set for byte 0 / vs \ but not for + subsequent slashes in prepath which currently must + be entered the right way - not sure if there is an alternative + since the '\' is a valid posix character so we can not switch + those safely to '/' if any are found in the middle of the prepath */ + /* BB test paths to Windows with '/' in the midst of prepath */ + strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen); return full_path; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 067648b7179b..18fcec190f8b 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name, rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, ea_value, buf_size, ACL_TYPE_ACCESS); - CIFSSMBClose(xid, pTcon, fid) + CIFSSMBClose(xid, pTcon, fid); } } */ /* BB enable after fixing up return data */ From 6b70c9559bcf381a6521e38b0dd5d3d4d905868a Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 21 Sep 2006 07:35:29 +0000 Subject: [PATCH 0342/1063] [CIFS] New POSIX locking code not setting rc properly to zero on successful unlock in case where server does not support POSIX locks and nobrl is not specified. Signed-off-by: Steve French --- fs/cifs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e9c5ba9084fc..ddb012a68023 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) int stored_rc = 0; struct cifsLockInfo *li, *tmp; + rc = 0; down(&fid->lock_sem); list_for_each_entry_safe(li, tmp, &fid->llist, llist) { if (pfLock->fl_start <= li->offset && @@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) kfree(li); } } - up(&fid->lock_sem); + up(&fid->lock_sem); } } From 838fdb4d2d0e4730364220b51be28a42d04c665e Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Thu, 14 Sep 2006 14:18:38 -0500 Subject: [PATCH 0343/1063] [POWERPC] Add AT_PLATFORM value for Xilinx Virtex-4 FX Jakub noticed the cputable.c entry for Xilinx Virtex-4 FX was missing a .platform value, so the AT_PLATFORM value wouldn't be set correctly. This adds it. Signed-off-by: Peter Bergner Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/cputable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index db65c9f6559a..190a57e20765 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -949,6 +949,7 @@ struct cpu_spec cpu_specs[] = { PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, .icache_bsize = 32, .dcache_bsize = 32, + .platform = "ppc405", }, { /* 405EP */ .pvr_mask = 0xffff0000, From 4dbefe6459555d6fb9d08743615fbaa53894beb2 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Fri, 15 Sep 2006 14:53:10 -0500 Subject: [PATCH 0344/1063] [POWERPC] PPC: Fix xmon stack frame address in backtrace The stack frame address was being printed incorrectly in the backtrace option of XMON on PPC. This patch fixes it to print the actual stack address instead of the address of the local variable that contains it. Signed-off-by: Josh Boyer Signed-off-by: Paul Mackerras --- arch/ppc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c index 25d032b2aec7..b1a91744fd2d 100644 --- a/arch/ppc/xmon/xmon.c +++ b/arch/ppc/xmon/xmon.c @@ -806,7 +806,7 @@ backtrace(struct pt_regs *excp) for (; sp != 0; sp = stack[0]) { if (mread(sp, stack, sizeof(stack)) != sizeof(stack)) break; - printf("[%.8lx] ", stack); + printf("[%.8lx] ", stack[0]); xmon_print_symbol(stack[1], " ", "\n"); if (stack[1] == (unsigned) &ret_from_except || stack[1] == (unsigned) &ret_from_except_full From af525592187951a595c73de11b48969a13b5d5a3 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Fri, 15 Sep 2006 18:55:34 -0500 Subject: [PATCH 0345/1063] [POWERPC] EEH: balance pcidev_get/put calls This corrects a pci_dev get/put imbalance that can occur only in highly unlikely situations (kmalloc failures, pci devices with overlapping resource addresses). No actual failures seen, this was spotted during code review. Signed-off-by: Linas Vepstas Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh_cache.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index c37a8497c60f..b6b462d3c604 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -157,6 +157,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, if (!piar) return NULL; + pci_dev_get(dev); piar->addr_lo = alo; piar->addr_hi = ahi; piar->pcidev = dev; @@ -178,7 +179,6 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) struct device_node *dn; struct pci_dn *pdn; int i; - int inserted = 0; dn = pci_device_to_OF_node(dev); if (!dn) { @@ -197,9 +197,6 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) return; } - /* The cache holds a reference to the device... */ - pci_dev_get(dev); - /* Walk resources on this device, poke them into the tree */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { unsigned long start = pci_resource_start(dev,i); @@ -212,12 +209,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) if (start == 0 || ~start == 0 || end == 0 || ~end == 0) continue; pci_addr_cache_insert(dev, start, end, flags); - inserted = 1; } - - /* If there was nothing to add, the cache has no reference... */ - if (!inserted) - pci_dev_put(dev); } /** @@ -240,7 +232,6 @@ void pci_addr_cache_insert_device(struct pci_dev *dev) static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) { struct rb_node *n; - int removed = 0; restart: n = rb_first(&pci_io_addr_cache_root.rb_root); @@ -250,16 +241,12 @@ restart: if (piar->pcidev == dev) { rb_erase(n, &pci_io_addr_cache_root.rb_root); - removed = 1; + pci_dev_put(piar->pcidev); kfree(piar); goto restart; } n = rb_next(n); } - - /* The cache no longer holds its reference to this device... */ - if (removed) - pci_dev_put(dev); } /** From cb5b562444c27cf53f5d297bd7a89807ea614cf3 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Fri, 15 Sep 2006 18:56:35 -0500 Subject: [PATCH 0346/1063] [POWERPC] EEH: code comment cleanup Clean up subroutine documentation; mostly formatting changes, with some new content. Signed-off-by: Linas Vepstas Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh.c | 19 +++++++++++---- arch/powerpc/platforms/pseries/eeh_driver.c | 27 ++++++++++++++++----- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 5a23ce5e16ff..fb91842fc819 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -449,7 +449,11 @@ EXPORT_SYMBOL(eeh_check_failure); /* ------------------------------------------------------------- */ /* The code below deals with error recovery */ -/** Return negative value if a permanent error, else return +/** + * eeh_slot_availability - returns error status of slot + * @pdn pci device node + * + * Return negative value if a permanent error, else return * a number of milliseconds to wait until the PCI slot is * ready to be used. */ @@ -477,8 +481,10 @@ eeh_slot_availability(struct pci_dn *pdn) return -1; } -/** rtas_pci_slot_reset raises/lowers the pci #RST line - * state: 1/0 to raise/lower the #RST +/** + * rtas_pci_slot_reset - raises/lowers the pci #RST line + * @pdn pci device node + * @state: 1/0 to raise/lower the #RST * * Clear the EEH-frozen condition on a slot. This routine * asserts the PCI #RST line if the 'state' argument is '1', @@ -518,8 +524,9 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) } } -/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second - * dn -- device node to be reset. +/** + * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second + * @pdn: pci device node to be reset. * * Return 0 if success, else a non-zero value. */ @@ -582,6 +589,8 @@ rtas_set_slot_reset(struct pci_dn *pdn) /** * __restore_bars - Restore the Base Address Registers + * @pdn: pci device node + * * Loads the PCI configuration space base address registers, * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 3269d2cd428b..045cd7a37339 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -77,8 +77,12 @@ static int irq_in_use(unsigned int irq) } /* ------------------------------------------------------- */ -/** eeh_report_error - report an EEH error to each device, - * collect up and merge the device responses. +/** + * eeh_report_error - report pci error to each device driver + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response + * passed back in "userdata". */ static void eeh_report_error(struct pci_dev *dev, void *userdata) @@ -108,8 +112,8 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata) rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; } -/** eeh_report_reset -- tell this device that the pci slot - * has been reset. +/** + * eeh_report_reset - tell device that slot has been reset */ static void eeh_report_reset(struct pci_dev *dev, void *userdata) @@ -132,6 +136,10 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) driver->err_handler->slot_reset(dev); } +/** + * eeh_report_resume - tell device to resume normal operations + */ + static void eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; @@ -148,6 +156,13 @@ static void eeh_report_resume(struct pci_dev *dev, void *userdata) driver->err_handler->resume(dev); } +/** + * eeh_report_failure - tell device driver that device is dead. + * + * This informs the device driver that the device is permanently + * dead, and that no further recovery attempts will be made on it. + */ + static void eeh_report_failure(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; @@ -190,11 +205,11 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) /** * eeh_reset_device() -- perform actual reset of a pci slot - * Args: bus: pointer to the pci bus structure corresponding + * @bus: pointer to the pci bus structure corresponding * to the isolated slot. A non-null value will * cause all devices under the bus to be removed * and then re-added. - * pe_dn: pointer to a "Partionable Endpoint" device node. + * @pe_dn: pointer to a "Partionable Endpoint" device node. * This is the top-level structure on which pci * bus resets can be performed. */ From 47b5c838af92d3504e99633bf568578203b7305f Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Fri, 15 Sep 2006 18:57:42 -0500 Subject: [PATCH 0347/1063] [POWERPC] EEH: enable MMIO/DMA on frozen slot Add wrapper around the rtas call to enable MMIO or DMA on a frozen pci slot. Signed-off-by: Linas Vepstas Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh.c | 29 ++++++++++++++++++++++++++++ include/asm-powerpc/ppc-pci.h | 11 +++++++++++ 2 files changed, 40 insertions(+) diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index fb91842fc819..4534886e3b4e 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -481,6 +481,35 @@ eeh_slot_availability(struct pci_dn *pdn) return -1; } +/** + * rtas_pci_enable - enable MMIO or DMA transfers for this slot + * @pdn pci device node + */ + +int +rtas_pci_enable(struct pci_dn *pdn, int function) +{ + int config_addr; + int rc; + + /* Use PE configuration address, if present */ + config_addr = pdn->eeh_config_addr; + if (pdn->eeh_pe_config_addr) + config_addr = pdn->eeh_pe_config_addr; + + rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL, + config_addr, + BUID_HI(pdn->phb->buid), + BUID_LO(pdn->phb->buid), + function); + + if (rc) + printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n", + function, rc, pdn->node->full_name); + + return rc; +} + /** * rtas_pci_slot_reset - raises/lowers the pci #RST line * @pdn pci device node diff --git a/include/asm-powerpc/ppc-pci.h b/include/asm-powerpc/ppc-pci.h index cf79bc7ebb55..1115756c79f9 100644 --- a/include/asm-powerpc/ppc-pci.h +++ b/include/asm-powerpc/ppc-pci.h @@ -68,6 +68,17 @@ struct pci_dev *pci_get_device_by_addr(unsigned long addr); */ void eeh_slot_error_detail (struct pci_dn *pdn, int severity); +/** + * rtas_pci_enableo - enable IO transfers for this slot + * @pdn: pci device node + * @function: either EEH_THAW_MMIO or EEH_THAW_DMA + * + * Enable I/O transfers to this slot + */ +#define EEH_THAW_MMIO 2 +#define EEH_THAW_DMA 3 +int rtas_pci_enable(struct pci_dn *pdn, int function); + /** * rtas_set_slot_reset -- unfreeze a frozen slot * From 6a1ca373a16b0e170164ab8a2d6d01eab2a22f6e Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Fri, 15 Sep 2006 18:58:59 -0500 Subject: [PATCH 0348/1063] [POWERPC] EEH: support MMIO enable recovery step Update to the PowerPC PCI error recovery code. Add code to enable MMIO if a device driver reports that it is capable of recovering on its own. One anticipated use of this having a device driver enable MMIO so that it can take a register dump, which might then be followed by the device driver requesting a full reset. Signed-off-by: Linas Vepstas Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh_driver.c | 81 ++++++++++++++++----- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 045cd7a37339..c2bc9904f1cb 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -100,14 +100,38 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata) PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; disable_irq_nosync(dev->irq); } - if (!driver->err_handler) - return; - if (!driver->err_handler->error_detected) + if (!driver->err_handler || + !driver->err_handler->error_detected) return; rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); if (*res == PCI_ERS_RESULT_NONE) *res = rc; - if (*res == PCI_ERS_RESULT_NEED_RESET) return; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; +} + +/** + * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled + * + * Report an EEH error to each device driver, collect up and + * merge the device driver responses. Cumulative response + * passed back in "userdata". + */ + +static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) +{ + enum pci_ers_result rc, *res = userdata; + struct pci_driver *driver = dev->driver; + + // dev->error_state = pci_channel_mmio_enabled; + + if (!driver || + !driver->err_handler || + !driver->err_handler->mmio_enabled) + return; + + rc = driver->err_handler->mmio_enabled (dev); + if (*res == PCI_ERS_RESULT_NONE) *res = rc; if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; } @@ -118,6 +142,7 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata) static void eeh_report_reset(struct pci_dev *dev, void *userdata) { + enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; struct device_node *dn = pci_device_to_OF_node(dev); @@ -128,12 +153,14 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; enable_irq(dev->irq); } - if (!driver->err_handler) - return; - if (!driver->err_handler->slot_reset) + if (!driver->err_handler || + !driver->err_handler->slot_reset) return; - driver->err_handler->slot_reset(dev); + rc = driver->err_handler->slot_reset(dev); + if (*res == PCI_ERS_RESULT_NONE) *res = rc; + if (*res == PCI_ERS_RESULT_DISCONNECT && + rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; } /** @@ -362,22 +389,42 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) goto hard_fail; } + /* If all devices reported they can proceed, then re-enable MMIO */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO); + + if (rc) { + result = PCI_ERS_RESULT_NEED_RESET; + } else { + result = PCI_ERS_RESULT_NONE; + pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); + } + } + + /* If all devices reported they can proceed, then re-enable DMA */ + if (result == PCI_ERS_RESULT_CAN_RECOVER) { + rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA); + + if (rc) + result = PCI_ERS_RESULT_NEED_RESET; + } + + /* If any device has a hard failure, then shut off everything. */ + if (result == PCI_ERS_RESULT_DISCONNECT) + goto hard_fail; + /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { rc = eeh_reset_device(frozen_pdn, NULL); if (rc) goto hard_fail; - pci_walk_bus(frozen_bus, eeh_report_reset, NULL); + result = PCI_ERS_RESULT_NONE; + pci_walk_bus(frozen_bus, eeh_report_reset, &result); } - /* If all devices reported they can proceed, the re-enable PIO */ - if (result == PCI_ERS_RESULT_CAN_RECOVER) { - /* XXX Not supported; we brute-force reset the device */ - rc = eeh_reset_device(frozen_pdn, NULL); - if (rc) - goto hard_fail; - pci_walk_bus(frozen_bus, eeh_report_reset, NULL); - } + /* All devices should claim they have recovered by now. */ + if (result != PCI_ERS_RESULT_RECOVERED) + goto hard_fail; /* Tell all device drivers that they can resume operations */ pci_walk_bus(frozen_bus, eeh_report_resume, NULL); From 8b9b5a77e3aeb9650b511a8be4c61632999537db Mon Sep 17 00:00:00 2001 From: Amy Fong Date: Mon, 18 Sep 2006 23:07:24 -0400 Subject: [PATCH 0349/1063] [POWERPC] Fix compile error in sbc8560 The following fixes compile errors in sbc8560. Signed-off-by: Amy Fong Signed-off-by: Paul Mackerras --- arch/ppc/platforms/85xx/sbc8560.h | 1 + arch/ppc/platforms/85xx/sbc85xx.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/ppc/platforms/85xx/sbc8560.h b/arch/ppc/platforms/85xx/sbc8560.h index c7d61cf3a449..e5e156f60100 100644 --- a/arch/ppc/platforms/85xx/sbc8560.h +++ b/arch/ppc/platforms/85xx/sbc8560.h @@ -14,6 +14,7 @@ #define __MACH_SBC8560_H__ #include +#include #define CPM_MAP_ADDR (CCSRBAR + MPC85xx_CPM_OFFSET) diff --git a/arch/ppc/platforms/85xx/sbc85xx.h b/arch/ppc/platforms/85xx/sbc85xx.h index 21ea7a55639b..51df4dc04e22 100644 --- a/arch/ppc/platforms/85xx/sbc85xx.h +++ b/arch/ppc/platforms/85xx/sbc85xx.h @@ -49,4 +49,22 @@ extern void sbc8560_init_IRQ(void) __init; #define MPC85XX_PCI1_IO_SIZE 0x01000000 +/* FCC1 Clock Source Configuration. These can be + * redefined in the board specific file. + * Can only choose from CLK9-12 */ +#define F1_RXCLK 12 +#define F1_TXCLK 11 + +/* FCC2 Clock Source Configuration. These can be + * redefined in the board specific file. + * Can only choose from CLK13-16 */ +#define F2_RXCLK 13 +#define F2_TXCLK 14 + +/* FCC3 Clock Source Configuration. These can be + * redefined in the board specific file. + * Can only choose from CLK13-16 */ +#define F3_RXCLK 15 +#define F3_TXCLK 16 + #endif /* __PLATFORMS_85XX_SBC85XX_H__ */ From 7da8a2e5c1fd2ee513fdeac8d13c4f3623838fd0 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 20 Sep 2006 09:11:59 -0500 Subject: [PATCH 0350/1063] [POWERPC] 40x: Fix debug status register defines This fixes some debug register defines on PPC 40x that were incorrect. Signed-off-by: Josh Boyer Signed-off-by: Paul Mackerras --- include/asm-ppc/reg_booke.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h index 4944c0fb8bea..602fbadeaf48 100644 --- a/include/asm-ppc/reg_booke.h +++ b/include/asm-ppc/reg_booke.h @@ -300,14 +300,14 @@ do { \ #define DBSR_IC 0x80000000 /* Instruction Completion */ #define DBSR_BT 0x40000000 /* Branch taken */ #define DBSR_TIE 0x10000000 /* Trap Instruction debug Event */ -#define DBSR_IAC1 0x00800000 /* Instruction Address Compare 1 Event */ -#define DBSR_IAC2 0x00400000 /* Instruction Address Compare 2 Event */ -#define DBSR_IAC3 0x00200000 /* Instruction Address Compare 3 Event */ -#define DBSR_IAC4 0x00100000 /* Instruction Address Compare 4 Event */ -#define DBSR_DAC1R 0x00080000 /* Data Address Compare 1 Read Event */ -#define DBSR_DAC1W 0x00040000 /* Data Address Compare 1 Write Event */ -#define DBSR_DAC2R 0x00020000 /* Data Address Compare 2 Read Event */ -#define DBSR_DAC2W 0x00010000 /* Data Address Compare 2 Write Event */ +#define DBSR_IAC1 0x04000000 /* Instruction Address Compare 1 Event */ +#define DBSR_IAC2 0x02000000 /* Instruction Address Compare 2 Event */ +#define DBSR_IAC3 0x00080000 /* Instruction Address Compare 3 Event */ +#define DBSR_IAC4 0x00040000 /* Instruction Address Compare 4 Event */ +#define DBSR_DAC1R 0x01000000 /* Data Address Compare 1 Read Event */ +#define DBSR_DAC1W 0x00800000 /* Data Address Compare 1 Write Event */ +#define DBSR_DAC2R 0x00400000 /* Data Address Compare 2 Read Event */ +#define DBSR_DAC2W 0x00200000 /* Data Address Compare 2 Write Event */ #endif /* Bit definitions related to the ESR. */ From b8c06a2ab68661bf841e21003f4447f8d422aed3 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 22 Sep 2006 01:14:52 +0000 Subject: [PATCH 0351/1063] [CIFS] statfs for cifs unix extensions no longer experimental Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3cd750029be2..c3ef1c0d0e68 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; /* undefined */ buf->f_ffree = 0; /* unlimited */ -#ifdef CONFIG_CIFS_EXPERIMENTAL /* BB we could add a second check for a QFS Unix capability bit */ /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */ if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS & @@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) /* Only need to call the old QFSInfo if failed on newer one */ if(rc) -#endif /* CIFS_EXPERIMENTAL */ rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* Old Windows servers do not support level 103, retry with level From caf81329c39b5c48f6cc0d78fa159b5a587e37f9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 21 Sep 2006 18:00:00 +1000 Subject: [PATCH 0352/1063] [POWERPC] Merge iSeries i/o operations with the rest This patch changes the io operations so that they are out of line if CONFIG_PPC_ISERIES is set and includes a firmware feature check in that case. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/io.c | 14 ++ arch/powerpc/platforms/iseries/pci.c | 280 ++++++++++++++++++----- include/asm-powerpc/io.h | 146 ++++++------ include/asm-powerpc/iseries/iseries_io.h | 60 ----- 4 files changed, 318 insertions(+), 182 deletions(-) delete mode 100644 include/asm-powerpc/iseries/iseries_io.h diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 80a3209acef4..e98180686b35 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -22,12 +22,16 @@ #include #include +#include +#include void _insb(volatile u8 __iomem *port, void *buf, long count) { u8 *tbuf = buf; u8 tmp; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); @@ -44,6 +48,8 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) { const u8 *tbuf = buf; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); @@ -59,6 +65,8 @@ void _insw_ns(volatile u16 __iomem *port, void *buf, long count) u16 *tbuf = buf; u16 tmp; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); @@ -75,6 +83,8 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) { const u16 *tbuf = buf; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); @@ -90,6 +100,8 @@ void _insl_ns(volatile u32 __iomem *port, void *buf, long count) u32 *tbuf = buf; u32 tmp; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); @@ -106,6 +118,8 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) { const u32 *tbuf = buf; + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + if (unlikely(count <= 0)) return; asm volatile("sync"); diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c index f4d427a7bb2d..3eb12065df23 100644 --- a/arch/powerpc/platforms/iseries/pci.c +++ b/arch/powerpc/platforms/iseries/pci.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -269,46 +270,6 @@ void pcibios_fixup_resources(struct pci_dev *pdev) { } -/* - * I/0 Memory copy MUST use mmio commands on iSeries - * To do; For performance, include the hv call directly - */ -void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count) -{ - u8 ByteValue = c; - long NumberOfBytes = Count; - - while (NumberOfBytes > 0) { - iSeries_Write_Byte(ByteValue, dest++); - -- NumberOfBytes; - } -} -EXPORT_SYMBOL(iSeries_memset_io); - -void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count) -{ - char *src = source; - long NumberOfBytes = count; - - while (NumberOfBytes > 0) { - iSeries_Write_Byte(*src++, dest++); - -- NumberOfBytes; - } -} -EXPORT_SYMBOL(iSeries_memcpy_toio); - -void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count) -{ - char *dst = dest; - long NumberOfBytes = count; - - while (NumberOfBytes > 0) { - *dst++ = iSeries_Read_Byte(src++); - -- NumberOfBytes; - } -} -EXPORT_SYMBOL(iSeries_memcpy_fromio); - /* * Look down the chain to find the matching Device Device */ @@ -491,7 +452,7 @@ static inline struct device_node *xlate_iomm_address( * iSeries_Read_Word = Read Word (16 bit) * iSeries_Read_Long = Read Long (32 bit) */ -u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) +static u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -518,9 +479,8 @@ u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) return (u8)ret.value; } -EXPORT_SYMBOL(iSeries_Read_Byte); -u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) +static u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -548,9 +508,8 @@ u16 iSeries_Read_Word(const volatile void __iomem *IoAddress) return swab16((u16)ret.value); } -EXPORT_SYMBOL(iSeries_Read_Word); -u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) +static u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -578,7 +537,6 @@ u32 iSeries_Read_Long(const volatile void __iomem *IoAddress) return swab32((u32)ret.value); } -EXPORT_SYMBOL(iSeries_Read_Long); /* * Write MM I/O Instructions for the iSeries @@ -587,7 +545,7 @@ EXPORT_SYMBOL(iSeries_Read_Long); * iSeries_Write_Word = Write Word(16 bit) * iSeries_Write_Long = Write Long(32 bit) */ -void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) +static void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -612,9 +570,8 @@ void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress) rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0); } while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0); } -EXPORT_SYMBOL(iSeries_Write_Byte); -void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) +static void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -639,9 +596,8 @@ void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress) rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0); } while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0); } -EXPORT_SYMBOL(iSeries_Write_Word); -void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) +static void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) { u64 BarOffset; u64 dsa; @@ -666,4 +622,224 @@ void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress) rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0); } while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0); } -EXPORT_SYMBOL(iSeries_Write_Long); + +extern unsigned char __raw_readb(const volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return *(volatile unsigned char __force *)addr; +} +EXPORT_SYMBOL(__raw_readb); + +extern unsigned short __raw_readw(const volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return *(volatile unsigned short __force *)addr; +} +EXPORT_SYMBOL(__raw_readw); + +extern unsigned int __raw_readl(const volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return *(volatile unsigned int __force *)addr; +} +EXPORT_SYMBOL(__raw_readl); + +extern unsigned long __raw_readq(const volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return *(volatile unsigned long __force *)addr; +} +EXPORT_SYMBOL(__raw_readq); + +extern void __raw_writeb(unsigned char v, volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + *(volatile unsigned char __force *)addr = v; +} +EXPORT_SYMBOL(__raw_writeb); + +extern void __raw_writew(unsigned short v, volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + *(volatile unsigned short __force *)addr = v; +} +EXPORT_SYMBOL(__raw_writew); + +extern void __raw_writel(unsigned int v, volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + *(volatile unsigned int __force *)addr = v; +} +EXPORT_SYMBOL(__raw_writel); + +extern void __raw_writeq(unsigned long v, volatile void __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + *(volatile unsigned long __force *)addr = v; +} +EXPORT_SYMBOL(__raw_writeq); + +int in_8(const volatile unsigned char __iomem *addr) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return iSeries_Read_Byte(addr); + return __in_8(addr); +} +EXPORT_SYMBOL(in_8); + +void out_8(volatile unsigned char __iomem *addr, int val) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + iSeries_Write_Byte(val, addr); + else + __out_8(addr, val); +} +EXPORT_SYMBOL(out_8); + +int in_le16(const volatile unsigned short __iomem *addr) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return iSeries_Read_Word(addr); + return __in_le16(addr); +} +EXPORT_SYMBOL(in_le16); + +int in_be16(const volatile unsigned short __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return __in_be16(addr); +} +EXPORT_SYMBOL(in_be16); + +void out_le16(volatile unsigned short __iomem *addr, int val) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + iSeries_Write_Word(val, addr); + else + __out_le16(addr, val); +} +EXPORT_SYMBOL(out_le16); + +void out_be16(volatile unsigned short __iomem *addr, int val) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + __out_be16(addr, val); +} +EXPORT_SYMBOL(out_be16); + +unsigned in_le32(const volatile unsigned __iomem *addr) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + return iSeries_Read_Long(addr); + return __in_le32(addr); +} +EXPORT_SYMBOL(in_le32); + +unsigned in_be32(const volatile unsigned __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return __in_be32(addr); +} +EXPORT_SYMBOL(in_be32); + +void out_le32(volatile unsigned __iomem *addr, int val) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + iSeries_Write_Long(val, addr); + else + __out_le32(addr, val); +} +EXPORT_SYMBOL(out_le32); + +void out_be32(volatile unsigned __iomem *addr, int val) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + __out_be32(addr, val); +} +EXPORT_SYMBOL(out_be32); + +unsigned long in_le64(const volatile unsigned long __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return __in_le64(addr); +} +EXPORT_SYMBOL(in_le64); + +unsigned long in_be64(const volatile unsigned long __iomem *addr) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + return __in_be64(addr); +} +EXPORT_SYMBOL(in_be64); + +void out_le64(volatile unsigned long __iomem *addr, unsigned long val) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + __out_le64(addr, val); +} +EXPORT_SYMBOL(out_le64); + +void out_be64(volatile unsigned long __iomem *addr, unsigned long val) +{ + BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES)); + + __out_be64(addr, val); +} +EXPORT_SYMBOL(out_be64); + +void memset_io(volatile void __iomem *addr, int c, unsigned long n) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) { + volatile char __iomem *d = addr; + + while (n-- > 0) { + iSeries_Write_Byte(c, d++); + } + } else + eeh_memset_io(addr, c, n); +} +EXPORT_SYMBOL(memset_io); + +void memcpy_fromio(void *dest, const volatile void __iomem *src, + unsigned long n) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) { + char *d = dest; + const volatile char __iomem *s = src; + + while (n-- > 0) { + *d++ = iSeries_Read_Byte(s++); + } + } else + eeh_memcpy_fromio(dest, src, n); +} +EXPORT_SYMBOL(memcpy_fromio); + +void memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) +{ + if (firmware_has_feature(FW_FEATURE_ISERIES)) { + const char *s = src; + volatile char __iomem *d = dest; + + while (n-- > 0) { + iSeries_Write_Byte(*s++, d++); + } + } else + eeh_memcpy_toio(dest, src, n); +} +EXPORT_SYMBOL(memcpy_toio); diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 174fb89d5eda..46bae1cf385b 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -20,9 +20,6 @@ extern int check_legacy_ioport(unsigned long base_port); #include #include #include -#ifdef CONFIG_PPC_ISERIES -#include -#endif #include #include @@ -37,41 +34,53 @@ extern unsigned long isa_io_base; extern unsigned long pci_io_base; #ifdef CONFIG_PPC_ISERIES -/* __raw_* accessors aren't supported on iSeries */ -#define __raw_readb(addr) { BUG(); 0; } -#define __raw_readw(addr) { BUG(); 0; } -#define __raw_readl(addr) { BUG(); 0; } -#define __raw_readq(addr) { BUG(); 0; } -#define __raw_writeb(v, addr) { BUG(); 0; } -#define __raw_writew(v, addr) { BUG(); 0; } -#define __raw_writel(v, addr) { BUG(); 0; } -#define __raw_writeq(v, addr) { BUG(); 0; } -#define readb(addr) iSeries_Read_Byte(addr) -#define readw(addr) iSeries_Read_Word(addr) -#define readl(addr) iSeries_Read_Long(addr) -#define writeb(data, addr) iSeries_Write_Byte((data),(addr)) -#define writew(data, addr) iSeries_Write_Word((data),(addr)) -#define writel(data, addr) iSeries_Write_Long((data),(addr)) -#define memset_io(a,b,c) iSeries_memset_io((a),(b),(c)) -#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c)) -#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c)) -#define inb(addr) readb(((void __iomem *)(long)(addr))) -#define inw(addr) readw(((void __iomem *)(long)(addr))) -#define inl(addr) readl(((void __iomem *)(long)(addr))) -#define outb(data,addr) writeb(data,((void __iomem *)(long)(addr))) -#define outw(data,addr) writew(data,((void __iomem *)(long)(addr))) -#define outl(data,addr) writel(data,((void __iomem *)(long)(addr))) -/* - * The *_ns versions below don't do byte-swapping. - * Neither do the standard versions now, these are just here - * for older code. - */ -#define insb(port, buf, ns) _insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define insw(port, buf, ns) _insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) -#define insl(port, buf, nl) _insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) +extern int in_8(const volatile unsigned char __iomem *addr); +extern void out_8(volatile unsigned char __iomem *addr, int val); +extern int in_le16(const volatile unsigned short __iomem *addr); +extern int in_be16(const volatile unsigned short __iomem *addr); +extern void out_le16(volatile unsigned short __iomem *addr, int val); +extern void out_be16(volatile unsigned short __iomem *addr, int val); +extern unsigned in_le32(const volatile unsigned __iomem *addr); +extern unsigned in_be32(const volatile unsigned __iomem *addr); +extern void out_le32(volatile unsigned __iomem *addr, int val); +extern void out_be32(volatile unsigned __iomem *addr, int val); +extern unsigned long in_le64(const volatile unsigned long __iomem *addr); +extern unsigned long in_be64(const volatile unsigned long __iomem *addr); +extern void out_le64(volatile unsigned long __iomem *addr, unsigned long val); +extern void out_be64(volatile unsigned long __iomem *addr, unsigned long val); -#else +extern unsigned char __raw_readb(const volatile void __iomem *addr); +extern unsigned short __raw_readw(const volatile void __iomem *addr); +extern unsigned int __raw_readl(const volatile void __iomem *addr); +extern unsigned long __raw_readq(const volatile void __iomem *addr); +extern void __raw_writeb(unsigned char v, volatile void __iomem *addr); +extern void __raw_writew(unsigned short v, volatile void __iomem *addr); +extern void __raw_writel(unsigned int v, volatile void __iomem *addr); +extern void __raw_writeq(unsigned long v, volatile void __iomem *addr); + +extern void memset_io(volatile void __iomem *addr, int c, unsigned long n); +extern void memcpy_fromio(void *dest, const volatile void __iomem *src, + unsigned long n); +extern void memcpy_toio(volatile void __iomem *dest, const void *src, + unsigned long n); + +#else /* CONFIG_PPC_ISERIES */ + +#define in_8(addr) __in_8((addr)) +#define out_8(addr, val) __out_8((addr), (val)) +#define in_le16(addr) __in_le16((addr)) +#define in_be16(addr) __in_be16((addr)) +#define out_le16(addr, val) __out_le16((addr), (val)) +#define out_be16(addr, val) __out_be16((addr), (val)) +#define in_le32(addr) __in_le32((addr)) +#define in_be32(addr) __in_be32((addr)) +#define out_le32(addr, val) __out_le32((addr), (val)) +#define out_be32(addr, val) __out_be32((addr), (val)) +#define in_le64(addr) __in_le64((addr)) +#define in_be64(addr) __in_be64((addr)) +#define out_le64(addr, val) __out_le64((addr), (val)) +#define out_be64(addr, val) __out_be64((addr), (val)) static inline unsigned char __raw_readb(const volatile void __iomem *addr) { @@ -105,23 +114,11 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) { *(volatile unsigned long __force *)addr = v; } -#define readb(addr) eeh_readb(addr) -#define readw(addr) eeh_readw(addr) -#define readl(addr) eeh_readl(addr) -#define readq(addr) eeh_readq(addr) -#define writeb(data, addr) eeh_writeb((data), (addr)) -#define writew(data, addr) eeh_writew((data), (addr)) -#define writel(data, addr) eeh_writel((data), (addr)) -#define writeq(data, addr) eeh_writeq((data), (addr)) #define memset_io(a,b,c) eeh_memset_io((a),(b),(c)) #define memcpy_fromio(a,b,c) eeh_memcpy_fromio((a),(b),(c)) #define memcpy_toio(a,b,c) eeh_memcpy_toio((a),(b),(c)) -#define inb(port) eeh_inb((unsigned long)port) -#define outb(val, port) eeh_outb(val, (unsigned long)port) -#define inw(port) eeh_inw((unsigned long)port) -#define outw(val, port) eeh_outw(val, (unsigned long)port) -#define inl(port) eeh_inl((unsigned long)port) -#define outl(val, port) eeh_outl(val, (unsigned long)port) + +#endif /* CONFIG_PPC_ISERIES */ /* * The insw/outsw/insl/outsl macros don't do byte-swapping. @@ -132,12 +129,25 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr) #define insw(port, buf, ns) eeh_insw_ns((port), (buf), (ns)) #define insl(port, buf, nl) eeh_insl_ns((port), (buf), (nl)) -#endif - #define outsb(port, buf, ns) _outsb((u8 __iomem *)((port)+pci_io_base), (buf), (ns)) #define outsw(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns)) #define outsl(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl)) +#define readb(addr) eeh_readb(addr) +#define readw(addr) eeh_readw(addr) +#define readl(addr) eeh_readl(addr) +#define readq(addr) eeh_readq(addr) +#define writeb(data, addr) eeh_writeb((data), (addr)) +#define writew(data, addr) eeh_writew((data), (addr)) +#define writel(data, addr) eeh_writel((data), (addr)) +#define writeq(data, addr) eeh_writeq((data), (addr)) +#define inb(port) eeh_inb((unsigned long)port) +#define outb(val, port) eeh_outb(val, (unsigned long)port) +#define inw(port) eeh_inw((unsigned long)port) +#define outw(val, port) eeh_outw(val, (unsigned long)port) +#define inl(port) eeh_inl((unsigned long)port) +#define outl(val, port) eeh_outl(val, (unsigned long)port) + #define readb_relaxed(addr) readb(addr) #define readw_relaxed(addr) readw(addr) #define readl_relaxed(addr) readl(addr) @@ -258,7 +268,7 @@ static inline void iosync(void) * and should not be used directly by device drivers. Use inb/readb * instead. */ -static inline int in_8(const volatile unsigned char __iomem *addr) +static inline int __in_8(const volatile unsigned char __iomem *addr) { int ret; @@ -267,14 +277,14 @@ static inline int in_8(const volatile unsigned char __iomem *addr) return ret; } -static inline void out_8(volatile unsigned char __iomem *addr, int val) +static inline void __out_8(volatile unsigned char __iomem *addr, int val) { __asm__ __volatile__("sync; stb%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); get_paca()->io_sync = 1; } -static inline int in_le16(const volatile unsigned short __iomem *addr) +static inline int __in_le16(const volatile unsigned short __iomem *addr) { int ret; @@ -283,7 +293,7 @@ static inline int in_le16(const volatile unsigned short __iomem *addr) return ret; } -static inline int in_be16(const volatile unsigned short __iomem *addr) +static inline int __in_be16(const volatile unsigned short __iomem *addr) { int ret; @@ -292,21 +302,21 @@ static inline int in_be16(const volatile unsigned short __iomem *addr) return ret; } -static inline void out_le16(volatile unsigned short __iomem *addr, int val) +static inline void __out_le16(volatile unsigned short __iomem *addr, int val) { __asm__ __volatile__("sync; sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); get_paca()->io_sync = 1; } -static inline void out_be16(volatile unsigned short __iomem *addr, int val) +static inline void __out_be16(volatile unsigned short __iomem *addr, int val) { __asm__ __volatile__("sync; sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); get_paca()->io_sync = 1; } -static inline unsigned in_le32(const volatile unsigned __iomem *addr) +static inline unsigned __in_le32(const volatile unsigned __iomem *addr) { unsigned ret; @@ -315,7 +325,7 @@ static inline unsigned in_le32(const volatile unsigned __iomem *addr) return ret; } -static inline unsigned in_be32(const volatile unsigned __iomem *addr) +static inline unsigned __in_be32(const volatile unsigned __iomem *addr) { unsigned ret; @@ -324,21 +334,21 @@ static inline unsigned in_be32(const volatile unsigned __iomem *addr) return ret; } -static inline void out_le32(volatile unsigned __iomem *addr, int val) +static inline void __out_le32(volatile unsigned __iomem *addr, int val) { __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); get_paca()->io_sync = 1; } -static inline void out_be32(volatile unsigned __iomem *addr, int val) +static inline void __out_be32(volatile unsigned __iomem *addr, int val) { __asm__ __volatile__("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); get_paca()->io_sync = 1; } -static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) +static inline unsigned long __in_le64(const volatile unsigned long __iomem *addr) { unsigned long tmp, ret; @@ -358,7 +368,7 @@ static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) return ret; } -static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) +static inline unsigned long __in_be64(const volatile unsigned long __iomem *addr) { unsigned long ret; @@ -367,7 +377,7 @@ static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) return ret; } -static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long val) +static inline void __out_le64(volatile unsigned long __iomem *addr, unsigned long val) { unsigned long tmp; @@ -385,15 +395,13 @@ static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long get_paca()->io_sync = 1; } -static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) +static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned long val) { __asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); get_paca()->io_sync = 1; } -#ifndef CONFIG_PPC_ISERIES #include -#endif /** * check_signature - find BIOS signatures @@ -409,7 +417,6 @@ static inline int check_signature(const volatile void __iomem * io_addr, const unsigned char *signature, int length) { int retval = 0; -#ifndef CONFIG_PPC_ISERIES do { if (readb(io_addr) != *signature) goto out; @@ -419,7 +426,6 @@ static inline int check_signature(const volatile void __iomem * io_addr, } while (length); retval = 1; out: -#endif return retval; } diff --git a/include/asm-powerpc/iseries/iseries_io.h b/include/asm-powerpc/iseries/iseries_io.h deleted file mode 100644 index f29009bd63c9..000000000000 --- a/include/asm-powerpc/iseries/iseries_io.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _ASM_POWERPC_ISERIES_ISERIES_IO_H -#define _ASM_POWERPC_ISERIES_ISERIES_IO_H - - -#ifdef CONFIG_PPC_ISERIES -#include -/* - * Created by Allan Trautman on Thu Dec 28 2000. - * - * Remaps the io.h for the iSeries Io - * Copyright (C) 2000 Allan H Trautman, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the: - * Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - * - * Change Activity: - * Created December 28, 2000 - * End Change Activity - */ - -#ifdef CONFIG_PCI -extern u8 iSeries_Read_Byte(const volatile void __iomem * IoAddress); -extern u16 iSeries_Read_Word(const volatile void __iomem * IoAddress); -extern u32 iSeries_Read_Long(const volatile void __iomem * IoAddress); -extern void iSeries_Write_Byte(u8 IoData, volatile void __iomem * IoAddress); -extern void iSeries_Write_Word(u16 IoData, volatile void __iomem * IoAddress); -extern void iSeries_Write_Long(u32 IoData, volatile void __iomem * IoAddress); - -extern void iSeries_memset_io(volatile void __iomem *dest, char x, size_t n); -extern void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, - size_t n); -extern void iSeries_memcpy_fromio(void *dest, - const volatile void __iomem *source, size_t n); -#else -static inline u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress) -{ - return 0xff; -} - -static inline void iSeries_Write_Byte(u8 IoData, - volatile void __iomem *IoAddress) -{ -} -#endif /* CONFIG_PCI */ - -#endif /* CONFIG_PPC_ISERIES */ -#endif /* _ASM_POWERPC_ISERIES_ISERIES_IO_H */ From 2954da897c40de0f3abdd6a100f2978f30d04068 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 21 Sep 2006 18:21:35 +1000 Subject: [PATCH 0353/1063] [POWERPC] Remove DISCONTIGMEM cruft from page.h This looks like cruft to me, these functions don't exist AFAICT, and I can't see that it's possible to even enable DISCONTIGMEM on powerpc anymore. CC'ing some folks who might know better, based on the who-touched-it-last principle. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-powerpc/page.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h index fb597b37c2a2..b4d38b0b15f8 100644 --- a/include/asm-powerpc/page.h +++ b/include/asm-powerpc/page.h @@ -55,12 +55,6 @@ #define PAGE_OFFSET ASM_CONST(CONFIG_KERNEL_START) #define KERNELBASE (PAGE_OFFSET + PHYSICAL_START) -#ifdef CONFIG_DISCONTIGMEM -#define page_to_pfn(page) discontigmem_page_to_pfn(page) -#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) -#define pfn_valid(pfn) discontigmem_pfn_valid(pfn) -#endif - #ifdef CONFIG_FLATMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif From 7d452c326c2ac879aced884411a0fe3ba75d9c87 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Sep 2006 12:29:51 +0200 Subject: [PATCH 0354/1063] [POWERPC] powerpc: fix building gdb against asm/ptrace.h Ulrich Weigand found a bug with the current version of the asm-powerpc/ptrace.h that prevents building at least the SPU target version of gdb, since some ptrace opcodes are not defined. The problem seems to have originated in the merging of 32 and 64 bit versions of that file, the problem is that some opcodes are only valid on 64 bit kernels, but are also used by 32 bit programs, so they can't depends on the __powerpc64__ symbol. Signed-off-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- include/asm-powerpc/ptrace.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h index dc4cb9cc73a1..4435efe85d0e 100644 --- a/include/asm-powerpc/ptrace.h +++ b/include/asm-powerpc/ptrace.h @@ -215,12 +215,10 @@ do { \ #define PTRACE_GETVRREGS 18 #define PTRACE_SETVRREGS 19 -#ifndef __powerpc64__ /* Get/set all the upper 32-bits of the SPE registers, accumulator, and * spefscr, in one go */ #define PTRACE_GETEVRREGS 20 #define PTRACE_SETEVRREGS 21 -#endif /* __powerpc64__ */ /* * Get or set a debug register. The first 16 are DABR registers and the @@ -235,7 +233,6 @@ do { \ #define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */ #define PPC_PTRACE_SETFPREGS 0x96 /* Set FPRs 0 - 31 */ -#ifdef __powerpc64__ /* Calls to trace a 64bit program from a 32bit program */ #define PPC_PTRACE_PEEKTEXT_3264 0x95 #define PPC_PTRACE_PEEKDATA_3264 0x94 @@ -243,6 +240,5 @@ do { \ #define PPC_PTRACE_POKEDATA_3264 0x92 #define PPC_PTRACE_PEEKUSR_3264 0x91 #define PPC_PTRACE_POKEUSR_3264 0x90 -#endif /* __powerpc64__ */ #endif /* _ASM_POWERPC_PTRACE_H */ From ed709d134deeaea7925a3d748b33ca7e58cc683d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 21 Sep 2006 13:10:51 -0500 Subject: [PATCH 0355/1063] [POWERPC] Fix IPIC pending register assignments This patch fixes the assignment of pending registers to IRQ numbers for the IPIC; the code previously assigned all IRQs to the high pending word regardless of which word the interrupt belonged to. Signed-off-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/ipic.c | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 70e707785d49..0251b7c68d0e 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -210,7 +210,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 4, }, [64] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_A, .force = IPIC_SIFCR_L, @@ -218,7 +218,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 0, }, [65] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_A, .force = IPIC_SIFCR_L, @@ -226,7 +226,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 1, }, [66] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_A, .force = IPIC_SIFCR_L, @@ -234,7 +234,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 2, }, [67] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_A, .force = IPIC_SIFCR_L, @@ -242,7 +242,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 3, }, [68] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_B, .force = IPIC_SIFCR_L, @@ -250,7 +250,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 0, }, [69] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_B, .force = IPIC_SIFCR_L, @@ -258,7 +258,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 1, }, [70] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_B, .force = IPIC_SIFCR_L, @@ -266,7 +266,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 2, }, [71] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = IPIC_SMPRR_B, .force = IPIC_SIFCR_L, @@ -274,91 +274,91 @@ static struct ipic_info ipic_info[] = { .prio_mask = 3, }, [72] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 8, }, [73] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 9, }, [74] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 10, }, [75] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 11, }, [76] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 12, }, [77] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 13, }, [78] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 14, }, [79] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 15, }, [80] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 16, }, [84] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 20, }, [85] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 21, }, [90] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, .bit = 26, }, [91] = { - .pend = IPIC_SIPNR_H, + .pend = IPIC_SIPNR_L, .mask = IPIC_SIMSR_L, .prio = 0, .force = IPIC_SIFCR_L, From d608df5c7da6ee968aa2ad43c596d5f8d4022299 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 21 Sep 2006 14:25:34 -0500 Subject: [PATCH 0356/1063] [POWERPC] Maple U3 HT - reject inappropriate config space access When there is a PCI-X mode 2 capable device behind the HT<->PCI-X bridge, the pci core decides that the device has the extended 4K config space, even though the bus is not operating in mode 2. This is because the u3_ht pci ops silently accept offsets greater than 255 but use only the 8 least significant bits, which means reading at offset 0x100 gets the data at offset 0x0, and causes confusion for lspci. Reject accesses to configuration space offsets greater than 255. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/maple/pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index dc05af5156a9..ec5c14f5ba49 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -213,6 +213,9 @@ static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 0xff) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -245,6 +248,9 @@ static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + if (offset > 0xff) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; From cc9881ce371dc7ff3ef5404feda59566fabaf521 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 21 Sep 2006 14:31:13 -0500 Subject: [PATCH 0357/1063] [POWERPC] Demacrofy arch/powerpc/platforms/maple/pci.c Noticed that the U3_*CFA macros have some typos: #define U3_HT_CFA0(devfn, off) \ ((((unsigned long)devfn) << 8) | offset) (refers to offset rather than off) #define U3_AGP_CFA0(devfn, off) \ ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ (refers to dev_fn rather than devfn) Things happen to work, but there doesn't seem to be any reason these shouldn't be functions. Overall behavior should be unchanged. Signed-off-by: Nathan Lynch Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/maple/pci.c | 44 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index ec5c14f5ba49..c3aa46b8e2b9 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -81,16 +81,20 @@ static void __init fixup_bus_range(struct device_node *bridge) } -#define U3_AGP_CFA0(devfn, off) \ - ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ - | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ - | (((unsigned long)(off)) & 0xFCUL)) +static unsigned long u3_agp_cfa0(u8 devfn, u8 off) +{ + return (1 << (unsigned long)PCI_SLOT(devfn)) | + ((unsigned long)PCI_FUNC(devfn) << 8) | + ((unsigned long)off & 0xFCUL); +} -#define U3_AGP_CFA1(bus, devfn, off) \ - ((((unsigned long)(bus)) << 16) \ - |(((unsigned long)(devfn)) << 8) \ - |(((unsigned long)(off)) & 0xFCUL) \ - |1UL) +static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off) +{ + return ((unsigned long)bus << 16) | + ((unsigned long)devfn << 8) | + ((unsigned long)off & 0xFCUL) | + 1UL; +} static unsigned long u3_agp_cfg_access(struct pci_controller* hose, u8 bus, u8 dev_fn, u8 offset) @@ -100,9 +104,9 @@ static unsigned long u3_agp_cfg_access(struct pci_controller* hose, if (bus == hose->first_busno) { if (dev_fn < (11 << 3)) return 0; - caddr = U3_AGP_CFA0(dev_fn, offset); + caddr = u3_agp_cfa0(dev_fn, offset); } else - caddr = U3_AGP_CFA1(bus, dev_fn, offset); + caddr = u3_agp_cfa1(bus, dev_fn, offset); /* Uninorth will return garbage if we don't read back the value ! */ do { @@ -184,13 +188,15 @@ static struct pci_ops u3_agp_pci_ops = u3_agp_write_config }; +static unsigned long u3_ht_cfa0(u8 devfn, u8 off) +{ + return (devfn << 8) | off; +} -#define U3_HT_CFA0(devfn, off) \ - ((((unsigned long)devfn) << 8) | offset) -#define U3_HT_CFA1(bus, devfn, off) \ - (U3_HT_CFA0(devfn, off) \ - + (((unsigned long)bus) << 16) \ - + 0x01000000UL) +static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off) +{ + return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL; +} static unsigned long u3_ht_cfg_access(struct pci_controller* hose, u8 bus, u8 devfn, u8 offset) @@ -198,9 +204,9 @@ static unsigned long u3_ht_cfg_access(struct pci_controller* hose, if (bus == hose->first_busno) { if (PCI_SLOT(devfn) == 0) return 0; - return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + return ((unsigned long)hose->cfg_data) + u3_ht_cfa0(devfn, offset); } else - return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); + return ((unsigned long)hose->cfg_data) + u3_ht_cfa1(bus, devfn, offset); } static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, From 3d574abd59d49173ac3096a19575a2f7430505be Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 21 Sep 2006 13:11:52 -0500 Subject: [PATCH 0358/1063] [POWERPC] Include in arch/powerpc/sysdev/fsl_soc.h for phys_addr_t. This patch causes fsl_soc.h to import the definition of phys_addr_t itself, rather than relying on its includer to do so. Signed-off-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/sysdev/fsl_soc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index c433d3f39edd..5a3dd480d2fd 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -2,6 +2,8 @@ #define __PPC_FSL_SOC_H #ifdef __KERNEL__ +#include + extern phys_addr_t get_immrbase(void); #endif From e102926385b56e593b995ecc433f041b498a49e1 Mon Sep 17 00:00:00 2001 From: Linas Vepstas Date: Thu, 21 Sep 2006 18:25:56 -0500 Subject: [PATCH 0359/1063] [POWERPC] EEH: Power4 systems sometimes need multiple resets. On detection of an EEH error, some Power4 systems seem to occasionally want to be reset twice before they report themselves as fully recovered. This patch re-arranges the code to attempt additional resets if the first one doesn't take. Signed-off-by: Linas Vepstas Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh.c | 36 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 4534886e3b4e..84bc8f7e17ef 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -478,7 +478,7 @@ eeh_slot_availability(struct pci_dn *pdn) printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n", rc, rets[0], rets[1], rets[2]); - return -1; + return -2; } /** @@ -546,11 +546,10 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid), state); - if (rc) { - printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n", + if (rc) + printk (KERN_WARNING "EEH: Unable to reset the failed slot," + " (%d) #RST=%d dn=%s\n", rc, state, pdn->node->full_name); - return; - } } /** @@ -560,11 +559,8 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state) * Return 0 if success, else a non-zero value. */ -int -rtas_set_slot_reset(struct pci_dn *pdn) +static void __rtas_set_slot_reset(struct pci_dn *pdn) { - int i, rc; - rtas_pci_slot_reset (pdn, 1); /* The PCI bus requires that the reset be held high for at least @@ -585,17 +581,33 @@ rtas_set_slot_reset(struct pci_dn *pdn) * up traffic. */ #define PCI_BUS_SETTLE_TIME_MSEC 1800 msleep (PCI_BUS_SETTLE_TIME_MSEC); +} + +int rtas_set_slot_reset(struct pci_dn *pdn) +{ + int i, rc; + + __rtas_set_slot_reset(pdn); /* Now double check with the firmware to make sure the device is * ready to be used; if not, wait for recovery. */ for (i=0; i<10; i++) { rc = eeh_slot_availability (pdn); - if (rc < 0) - printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name); if (rc == 0) return 0; - if (rc < 0) + + if (rc == -2) { + printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", + i, pdn->node->full_name); + __rtas_set_slot_reset(pdn); + continue; + } + + if (rc < 0) { + printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", + pdn->node->full_name); return -1; + } msleep (rc+100); } From 69917c26c840e7de94522bf90fb190de63bf92bd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 22 Sep 2006 12:56:30 +1000 Subject: [PATCH 0360/1063] [POWERPC] Fix ohare IDE irq workaround on old powermacs Looks like a workaround for old bogus OF bitrot... This fixes it and hence fixes boot on some performa machines. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- drivers/ide/ppc/pmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index fa46856e8068..996c694341bc 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1326,7 +1326,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) if (macio_irq_count(mdev) == 0) { printk(KERN_WARNING "ide%d: no intrs for device %s, using 13\n", i, mdev->ofdev.node->full_name); - irq = 13; + irq = irq_create_mapping(NULL, 13); } else irq = macio_irq(mdev, 0); From 187ef15268e638603dea55a91fdfa29feaed6d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Skinnemoen?= Date: Fri, 22 Sep 2006 10:07:08 +0100 Subject: [PATCH 0361/1063] [MTD] Unlock NOR flash automatically where necessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the MTD_STUPID_LOCK flag which indicates that the flash chip is always locked after power-up, so all sectors need to be unlocked before it is usable. If this flag is set, and the chip provides an unlock() operation, mtd_add_device will unlock the whole MTD device if it's writeable. This means that non-writeable partitions will stay locked. Set MTD_STUPID_LOCK in fixup_use_atmel_lock() so that these chips will work as expected. Signed-off-by: HÃ¥vard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0002.c | 1 + drivers/mtd/mtdcore.c | 10 ++++++++++ include/mtd/mtd-abi.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index a482e8922de1..702ae4cd8691 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -212,6 +212,7 @@ static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param) { mtd->lock = cfi_atmel_lock; mtd->unlock = cfi_atmel_unlock; + mtd->flags |= MTD_STUPID_LOCK; } static struct cfi_fixup cfi_fixup_table[] = { diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 168d3ba063c3..c4d26de74349 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -57,6 +57,16 @@ int add_mtd_device(struct mtd_info *mtd) mtd->index = i; mtd->usecount = 0; + /* Some chips always power up locked. Unlock them now */ + if ((mtd->flags & MTD_WRITEABLE) + && (mtd->flags & MTD_STUPID_LOCK) && mtd->unlock) { + if (mtd->unlock(mtd, 0, mtd->size)) + printk(KERN_WARNING + "%s: unlock failed, " + "writes may not work\n", + mtd->name); + } + DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h index 1da3f7fa7993..b0a67b7ffdcd 100644 --- a/include/mtd/mtd-abi.h +++ b/include/mtd/mtd-abi.h @@ -34,6 +34,7 @@ struct mtd_oob_buf { #define MTD_WRITEABLE 0x400 /* Device is writeable */ #define MTD_BIT_WRITEABLE 0x800 /* Single bits can be flipped */ #define MTD_NO_ERASE 0x1000 /* No erase necessary */ +#define MTD_STUPID_LOCK 0x2000 /* Always locked after reset */ // Some common devices / combinations of capabilities #define MTD_CAP_ROM 0 From cbc88ba83ff772d8c47d95ba0fef38ad888d6fcf Mon Sep 17 00:00:00 2001 From: Michal Piotrowski Date: Fri, 22 Sep 2006 10:09:44 +0100 Subject: [PATCH 0362/1063] [MTD NAND] Remove old code in au1550nd.c Signed-off-by: Michal Piotrowski Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/nand/au1550nd.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c index 31228334da12..09e421a96893 100644 --- a/drivers/mtd/nand/au1550nd.c +++ b/drivers/mtd/nand/au1550nd.c @@ -21,18 +21,7 @@ #include #include -/* fixme: this is ugly */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 0) #include -#else -#include -#ifdef CONFIG_MIPS_PB1550 -#include -#endif -#ifdef CONFIG_MIPS_DB1550 -#include -#endif -#endif /* * MTD structure for NAND controller From f2dd117fa51dd8fc071b1352254c0d14d2399b0a Mon Sep 17 00:00:00 2001 From: Michal Piotrowski Date: Fri, 22 Sep 2006 10:13:46 +0100 Subject: [PATCH 0363/1063] [JFFS2] Remove unneeded ifdefs from jffs2_fs_i.h We certainly don't need the check for Linux version > 2.5.2, and in fact we can also live without the __ECOS check, since we can just add it back in the eCos git tree which is automatically derived from the Linux fs/jffs2 subdirectory in the upstream git tree. Signed-off-by: Michal Piotrowski Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- fs/jffs2/jffs2_fs_i.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 2e0cc8e00b85..3a566077ac95 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -41,11 +41,7 @@ struct jffs2_inode_info { uint16_t flags; uint8_t usercompr; -#if !defined (__ECOS) -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2) struct inode vfs_inode; -#endif -#endif #ifdef CONFIG_JFFS2_FS_POSIX_ACL struct posix_acl *i_acl_access; struct posix_acl *i_acl_default; From f40a6f1cc7fe522e51e1ac4c7ab3035a434f2cef Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Fri, 22 Sep 2006 10:16:16 +0100 Subject: [PATCH 0364/1063] [MTD] Fix ixp4xx partition parsing. If the amount of flash is not divisible by 2 then the mask in parse_mtd_partitions would fail to work as designed. Passing in the base address corrects this problem. Signed-off-by: Brian Walsh Cc: Deepak Sanexa Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/maps/ixp4xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c index 986c58628390..7a828e3e6446 100644 --- a/drivers/mtd/maps/ixp4xx.c +++ b/drivers/mtd/maps/ixp4xx.c @@ -253,7 +253,7 @@ static int ixp4xx_flash_probe(struct platform_device *dev) /* Use the fast version */ info->map.write = ixp4xx_write16, - err = parse_mtd_partitions(info->mtd, probes, &info->partitions, 0); + err = parse_mtd_partitions(info->mtd, probes, &info->partitions, dev->resource->start); if (err > 0) { err = add_mtd_partitions(info->mtd, info->partitions, err); if(err) From 668040fcd1e06fc3e68a92708cbdfa5a52c37d3c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Sep 2006 10:17:28 +0100 Subject: [PATCH 0365/1063] [MTD] blkdev helper code: fix printk format warning Fix printk format warning(s): drivers/mtd/mtd_blkdevs.c:72: warning: long int format, different type arg (arg 2) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/mtd_blkdevs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 458d3c8ae1ee..302bed5f0845 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -69,7 +69,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, return 1; default: - printk(KERN_NOTICE "Unknown request %ld\n", rq_data_dir(req)); + printk(KERN_NOTICE "Unknown request %d\n", rq_data_dir(req)); return 0; } } From dd8e9ed6ed544e2b924429d29cd2a6b55590109b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 22 Sep 2006 10:19:20 +0100 Subject: [PATCH 0366/1063] [MTD] Switch to pci_get_device and do ref counting Signed-off-by: Alan Cox Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/mtd/devices/pmc551.c | 11 ++++++++++- drivers/mtd/maps/amd76xrom.c | 5 +++-- drivers/mtd/maps/ichxrom.c | 3 ++- drivers/mtd/maps/l440gx.c | 12 ++++++++++-- drivers/mtd/maps/scx200_docflash.c | 9 +++++++-- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index 2c0149708739..a03a8a79e5c5 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -674,7 +674,7 @@ static int __init init_pmc551(void) */ for( count = 0; count < MAX_MTD_DEVICES; count++ ) { - if ((PCI_Device = pci_find_device(PCI_VENDOR_ID_V3_SEMI, + if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI, PCI_DEVICE_ID_V3_SEMI_V370PDC, PCI_Device ) ) == NULL) { break; @@ -783,6 +783,10 @@ static int __init init_pmc551(void) kfree(mtd); break; } + + /* Keep a reference as the add_mtd_device worked */ + pci_dev_get(PCI_Device); + printk(KERN_NOTICE "Registered pmc551 memory device.\n"); printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n", priv->asize>>20, @@ -797,6 +801,10 @@ static int __init init_pmc551(void) found++; } + /* Exited early, reference left over */ + if (PCI_Device) + pci_dev_put(PCI_Device); + if( !pmc551list ) { printk(KERN_NOTICE "pmc551: not detected\n"); return -ENODEV; @@ -824,6 +832,7 @@ static void __exit cleanup_pmc551(void) priv->asize>>20, priv->start); iounmap (priv->start); } + pci_dev_put(priv->dev); kfree (mtd->priv); del_mtd_device (mtd); diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c index 447955be18af..797caffb20b1 100644 --- a/drivers/mtd/maps/amd76xrom.c +++ b/drivers/mtd/maps/amd76xrom.c @@ -57,6 +57,7 @@ static void amd76xrom_cleanup(struct amd76xrom_window *window) /* Disable writes through the rom window */ pci_read_config_byte(window->pdev, 0x40, &byte); pci_write_config_byte(window->pdev, 0x40, byte & ~1); + pci_dev_put(window->pdev); } /* Free all of the mtd devices */ @@ -91,7 +92,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev, struct amd76xrom_map_info *map = NULL; unsigned long map_top; - /* Remember the pci dev I find the window in */ + /* Remember the pci dev I find the window in - already have a ref */ window->pdev = pdev; /* Assume the rom window is properly setup, and find it's size */ @@ -302,7 +303,7 @@ static int __init init_amd76xrom(void) struct pci_device_id *id; pdev = NULL; for(id = amd76xrom_pci_tbl; id->vendor; id++) { - pdev = pci_find_device(id->vendor, id->device, NULL); + pdev = pci_get_device(id->vendor, id->device, NULL); if (pdev) { break; } diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c index db4b570d874a..2bb3e63606e5 100644 --- a/drivers/mtd/maps/ichxrom.c +++ b/drivers/mtd/maps/ichxrom.c @@ -61,6 +61,7 @@ static void ichxrom_cleanup(struct ichxrom_window *window) /* Disable writes through the rom window */ pci_read_config_word(window->pdev, BIOS_CNTL, &word); pci_write_config_word(window->pdev, BIOS_CNTL, word & ~1); + pci_dev_put(window->pdev); /* Free all of the mtd devices */ list_for_each_entry_safe(map, scratch, &window->maps, list) { @@ -355,7 +356,7 @@ static int __init init_ichxrom(void) pdev = NULL; for (id = ichxrom_pci_tbl; id->vendor; id++) { - pdev = pci_find_device(id->vendor, id->device, NULL); + pdev = pci_get_device(id->vendor, id->device, NULL); if (pdev) { break; } diff --git a/drivers/mtd/maps/l440gx.c b/drivers/mtd/maps/l440gx.c index 6b784ef5ee70..67620adf4811 100644 --- a/drivers/mtd/maps/l440gx.c +++ b/drivers/mtd/maps/l440gx.c @@ -61,14 +61,17 @@ static int __init init_l440gx(void) struct resource *pm_iobase; __u16 word; - dev = pci_find_device(PCI_VENDOR_ID_INTEL, + dev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, NULL); - pm_dev = pci_find_device(PCI_VENDOR_ID_INTEL, + pm_dev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + pci_dev_put(dev); + if (!dev || !pm_dev) { printk(KERN_NOTICE "L440GX flash mapping: failed to find PIIX4 ISA bridge, cannot continue\n"); + pci_dev_put(pm_dev); return -ENODEV; } @@ -76,6 +79,7 @@ static int __init init_l440gx(void) if (!l440gx_map.virt) { printk(KERN_WARNING "Failed to ioremap L440GX flash region\n"); + pci_dev_put(pm_dev); return -ENOMEM; } simple_map_init(&l440gx_map); @@ -99,8 +103,12 @@ static int __init init_l440gx(void) pm_iobase->start += iobase & ~1; pm_iobase->end += iobase & ~1; + pci_dev_put(pm_dev); + /* Allocate the resource region */ if (pci_assign_resource(pm_dev, PIIXE_IOBASE_RESOURCE) != 0) { + pci_dev_put(dev); + pci_dev_put(pm_dev); printk(KERN_WARNING "Could not allocate pm iobase resource\n"); iounmap(l440gx_map.virt); return -ENXIO; diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c index 7391fd544e86..5e2bce22f37c 100644 --- a/drivers/mtd/maps/scx200_docflash.c +++ b/drivers/mtd/maps/scx200_docflash.c @@ -87,19 +87,23 @@ static int __init init_scx200_docflash(void) printk(KERN_DEBUG NAME ": NatSemi SCx200 DOCCS Flash Driver\n"); - if ((bridge = pci_find_device(PCI_VENDOR_ID_NS, + if ((bridge = pci_get_device(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE, NULL)) == NULL) return -ENODEV; /* check that we have found the configuration block */ - if (!scx200_cb_present()) + if (!scx200_cb_present()) { + pci_dev_put(bridge); return -ENODEV; + } if (probe) { /* Try to use the present flash mapping if any */ pci_read_config_dword(bridge, SCx200_DOCCS_BASE, &base); pci_read_config_dword(bridge, SCx200_DOCCS_CTRL, &ctrl); + pci_dev_put(bridge); + pmr = inl(scx200_cb_base + SCx200_PMR); if (base == 0 @@ -127,6 +131,7 @@ static int __init init_scx200_docflash(void) return -ENOMEM; } } else { + pci_dev_put(bridge); for (u = size; u > 1; u >>= 1) ; if (u != 1) { From 25f0c659fe64832d8ee06aa623fffaad708dcf8b Mon Sep 17 00:00:00 2001 From: Amol Lad Date: Thu, 21 Sep 2006 18:12:43 +0530 Subject: [PATCH 0367/1063] ioremap balanced with iounmap for drivers/mtd subsystem ioremap must be balanced by an iounmap and failing to do so can result in a memory leak. Tested (compilation only) with: - allmodconfig - Modifying drivers/mtd/maps/Kconfig and drivers/mtd/nand/Kconfig to make sure that the changed file is compiling without warning Signed-off-by: Amol Lad Signed-off-by: David Woodhouse --- drivers/mtd/maps/arctic-mtd.c | 14 +++++++++++-- drivers/mtd/maps/beech-mtd.c | 14 +++++++++++-- drivers/mtd/maps/cstm_mips_ixx.c | 18 ++++++++++++++-- drivers/mtd/maps/ebony.c | 4 ++++ drivers/mtd/maps/fortunet.c | 3 +++ drivers/mtd/maps/lasat.c | 2 ++ drivers/mtd/maps/nettel.c | 34 ++++++++++++++++++++++++------- drivers/mtd/maps/ocotea.c | 4 ++++ drivers/mtd/maps/pcmciamtd.c | 4 ++++ drivers/mtd/maps/redwood.c | 11 +++++++++- drivers/mtd/maps/sbc8240.c | 11 +++++++++- drivers/mtd/maps/walnut.c | 4 ++++ drivers/mtd/nand/edb7312.c | 3 +++ drivers/mtd/nand/ppchameleonevb.c | 7 +++++++ include/linux/utsrelease.h | 1 + 15 files changed, 119 insertions(+), 15 deletions(-) create mode 100644 include/linux/utsrelease.h diff --git a/drivers/mtd/maps/arctic-mtd.c b/drivers/mtd/maps/arctic-mtd.c index d95ae582fbe9..642d96bc8919 100644 --- a/drivers/mtd/maps/arctic-mtd.c +++ b/drivers/mtd/maps/arctic-mtd.c @@ -96,6 +96,8 @@ static struct mtd_partition arctic_partitions[PARTITIONS] = { static int __init init_arctic_mtd(void) { + int err = 0; + printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR); arctic_mtd_map.virt = ioremap(PADDR, SIZE); @@ -109,12 +111,20 @@ init_arctic_mtd(void) printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8); arctic_mtd = do_map_probe("cfi_probe", &arctic_mtd_map); - if (!arctic_mtd) + if (!arctic_mtd) { + iounmap((void *) arctic_mtd_map.virt); return -ENXIO; + } arctic_mtd->owner = THIS_MODULE; - return add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS); + err = add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS); + if (err) { + printk("%s: add_mtd_partitions failed\n", NAME); + iounmap((void *) arctic_mtd_map.virt); + } + + return err; } static void __exit diff --git a/drivers/mtd/maps/beech-mtd.c b/drivers/mtd/maps/beech-mtd.c index 5df7361d1407..a64b1a5ab316 100644 --- a/drivers/mtd/maps/beech-mtd.c +++ b/drivers/mtd/maps/beech-mtd.c @@ -72,6 +72,8 @@ static struct mtd_partition beech_partitions[2] = { static int __init init_beech_mtd(void) { + int err = 0; + printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR); beech_mtd_map.virt = ioremap(PADDR, SIZE); @@ -86,12 +88,20 @@ init_beech_mtd(void) printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8); beech_mtd = do_map_probe("cfi_probe", &beech_mtd_map); - if (!beech_mtd) + if (!beech_mtd) { + iounmap((void *) beech_mtd_map.virt); return -ENXIO; + } beech_mtd->owner = THIS_MODULE; - return add_mtd_partitions(beech_mtd, beech_partitions, 2); + err = add_mtd_partitions(beech_mtd, beech_partitions, 2); + if (err) { + printk("%s: add_mtd_partitions failed\n", NAME); + iounmap((void *) beech_mtd_map.virt); + } + + return err; } static void __exit diff --git a/drivers/mtd/maps/cstm_mips_ixx.c b/drivers/mtd/maps/cstm_mips_ixx.c index aa56defb94c8..d6bef100d69a 100644 --- a/drivers/mtd/maps/cstm_mips_ixx.c +++ b/drivers/mtd/maps/cstm_mips_ixx.c @@ -171,7 +171,14 @@ int __init init_cstm_mips_ixx(void) cstm_mips_ixx_map[i].phys = cstm_mips_ixx_board_desc[i].window_addr; cstm_mips_ixx_map[i].virt = ioremap(cstm_mips_ixx_board_desc[i].window_addr, cstm_mips_ixx_board_desc[i].window_size); if (!cstm_mips_ixx_map[i].virt) { + int j = 0; printk(KERN_WARNING "Failed to ioremap\n"); + for (j = 0; j < i; j++) { + if (cstm_mips_ixx_map[j].virt) { + iounmap((void *)cstm_mips_ixx_map[j].virt); + cstm_mips_ixx_map[j].virt = 0; + } + } return -EIO; } cstm_mips_ixx_map[i].name = cstm_mips_ixx_board_desc[i].name; @@ -204,8 +211,15 @@ int __init init_cstm_mips_ixx(void) cstm_mips_ixx_map[i].map_priv_2 = (unsigned long)mymtd; add_mtd_partitions(mymtd, parts, cstm_mips_ixx_board_desc[i].num_partitions); } - else - return -ENXIO; + else { + for (i = 0; i < PHYSMAP_NUMBER; i++) { + if (cstm_mips_ixx_map[i].virt) { + iounmap((void *)cstm_mips_ixx_map[i].virt); + cstm_mips_ixx_map[i].virt = 0; + } + } + return -ENXIO; + } } return 0; } diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c index 641e1dd8479e..1488bb92f26f 100644 --- a/drivers/mtd/maps/ebony.c +++ b/drivers/mtd/maps/ebony.c @@ -108,6 +108,7 @@ int __init init_ebony(void) ARRAY_SIZE(ebony_small_partitions)); } else { printk("map probe failed for flash\n"); + iounmap(ebony_small_map.virt); return -ENXIO; } @@ -117,6 +118,7 @@ int __init init_ebony(void) if (!ebony_large_map.virt) { printk("Failed to ioremap flash\n"); + iounmap(ebony_small_map.virt); return -EIO; } @@ -129,6 +131,8 @@ int __init init_ebony(void) ARRAY_SIZE(ebony_large_partitions)); } else { printk("map probe failed for flash\n"); + iounmap(ebony_small_map.virt); + iounmap(ebony_large_map.virt); return -ENXIO; } diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c index c6bf4e1219ef..7c50c271651c 100644 --- a/drivers/mtd/maps/fortunet.c +++ b/drivers/mtd/maps/fortunet.c @@ -218,8 +218,11 @@ int __init init_fortunet(void) map_regions[ix].map_info.size); if(!map_regions[ix].map_info.virt) { + int j = 0; printk(MTD_FORTUNET_PK "%s flash failed to ioremap!\n", map_regions[ix].map_info.name); + for (j = 0 ; j < ix; j++) + iounmap(map_regions[j].map_info.virt); return -ENXIO; } simple_map_init(&map_regions[ix].map_info); diff --git a/drivers/mtd/maps/lasat.c b/drivers/mtd/maps/lasat.c index 1c13d2dc0cdf..e34376321050 100644 --- a/drivers/mtd/maps/lasat.c +++ b/drivers/mtd/maps/lasat.c @@ -79,6 +79,7 @@ static int __init init_lasat(void) return 0; } + iounmap(lasat_map.virt); return -ENXIO; } @@ -89,6 +90,7 @@ static void __exit cleanup_lasat(void) map_destroy(lasat_mtd); } if (lasat_map.virt) { + iounmap(lasat_map.virt); lasat_map.virt = 0; } } diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c index 0994b5b2e331..198e840ff6db 100644 --- a/drivers/mtd/maps/nettel.c +++ b/drivers/mtd/maps/nettel.c @@ -277,6 +277,7 @@ int __init nettel_init(void) nettel_amd_map.virt = ioremap_nocache(amdaddr, maxsize); if (!nettel_amd_map.virt) { printk("SNAPGEAR: failed to ioremap() BOOTCS\n"); + iounmap(nettel_mmcrp); return(-EIO); } simple_map_init(&nettel_amd_map); @@ -337,7 +338,8 @@ int __init nettel_init(void) nettel_amd_map.virt = NULL; #else /* Only AMD flash supported */ - return(-ENXIO); + rc = -ENXIO; + goto out_unmap2; #endif } @@ -361,14 +363,15 @@ int __init nettel_init(void) nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize); if (!nettel_intel_map.virt) { printk("SNAPGEAR: failed to ioremap() ROMCS1\n"); - return(-EIO); + rc = -EIO; + goto out_unmap2; } simple_map_init(&nettel_intel_map); intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map); if (!intel_mtd) { - iounmap(nettel_intel_map.virt); - return(-ENXIO); + rc = -ENXIO; + goto out_unmap1; } /* Set PAR to the detected size */ @@ -394,13 +397,14 @@ int __init nettel_init(void) nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize); if (!nettel_intel_map.virt) { printk("SNAPGEAR: failed to ioremap() ROMCS1/2\n"); - return(-EIO); + rc = -EIO; + goto out_unmap2; } intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map); if (! intel_mtd) { - iounmap((void *) nettel_intel_map.virt); - return(-ENXIO); + rc = -ENXIO; + goto out_unmap1; } intel1size = intel_mtd->size - intel0size; @@ -456,6 +460,18 @@ int __init nettel_init(void) #endif return(rc); + +#ifdef CONFIG_MTD_CFI_INTELEXT +out_unmap1: + iounmap((void *) nettel_intel_map.virt); +#endif + +out_unmap2: + iounmap(nettel_mmcrp); + iounmap(nettel_amd_map.virt); + + return(rc); + } /****************************************************************************/ @@ -469,6 +485,10 @@ void __exit nettel_cleanup(void) del_mtd_partitions(amd_mtd); map_destroy(amd_mtd); } + if (nettel_mmcrp) { + iounmap(nettel_mmcrp); + nettel_mmcrp = NULL; + } if (nettel_amd_map.virt) { iounmap(nettel_amd_map.virt); nettel_amd_map.virt = NULL; diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c index 2f07602ba940..5522eac8c980 100644 --- a/drivers/mtd/maps/ocotea.c +++ b/drivers/mtd/maps/ocotea.c @@ -97,6 +97,7 @@ int __init init_ocotea(void) ARRAY_SIZE(ocotea_small_partitions)); } else { printk("map probe failed for flash\n"); + iounmap(ocotea_small_map.virt); return -ENXIO; } @@ -106,6 +107,7 @@ int __init init_ocotea(void) if (!ocotea_large_map.virt) { printk("Failed to ioremap flash\n"); + iounmap(ocotea_small_map.virt); return -EIO; } @@ -118,6 +120,8 @@ int __init init_ocotea(void) ARRAY_SIZE(ocotea_large_partitions)); } else { printk("map probe failed for flash\n"); + iounmap(ocotea_small_map.virt); + iounmap(ocotea_large_map.virt); return -ENXIO; } diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index c861134cbc48..995347b1beba 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -602,6 +602,10 @@ static int pcmciamtd_config(struct pcmcia_device *link) ret = pcmcia_request_configuration(link, &link->conf); if(ret != CS_SUCCESS) { cs_error(link, RequestConfiguration, ret); + if (dev->win_base) { + iounmap(dev->win_base); + dev->win_base = NULL; + } return -ENODEV; } diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c index ec8fdae1dd99..2257d2b500c0 100644 --- a/drivers/mtd/maps/redwood.c +++ b/drivers/mtd/maps/redwood.c @@ -126,6 +126,8 @@ static struct mtd_info *redwood_mtd; int __init init_redwood_flash(void) { + int err = 0; + printk(KERN_NOTICE "redwood: flash mapping: %x at %x\n", WINDOW_SIZE, WINDOW_ADDR); @@ -141,11 +143,18 @@ int __init init_redwood_flash(void) if (redwood_mtd) { redwood_mtd->owner = THIS_MODULE; - return add_mtd_partitions(redwood_mtd, + err = add_mtd_partitions(redwood_mtd, redwood_flash_partitions, NUM_REDWOOD_FLASH_PARTITIONS); + if (err) { + printk("init_redwood_flash: add_mtd_partitions failed\n"); + iounmap(redwood_flash_map.virt); + } + return err; + } + iounmap(redwood_flash_map.virt); return -ENXIO; } diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c index 7d0fcf8f4f33..b8c1331b7a04 100644 --- a/drivers/mtd/maps/sbc8240.c +++ b/drivers/mtd/maps/sbc8240.c @@ -156,7 +156,7 @@ int __init init_sbc8240_mtd (void) }; int devicesfound = 0; - int i; + int i,j; for (i = 0; i < NUM_FLASH_BANKS; i++) { printk (KERN_NOTICE MSG_PREFIX @@ -166,6 +166,10 @@ int __init init_sbc8240_mtd (void) (unsigned long) ioremap (pt[i].addr, pt[i].size); if (!sbc8240_map[i].map_priv_1) { printk (MSG_PREFIX "failed to ioremap\n"); + for (j = 0; j < i; j++) { + iounmap((void *) sbc8240_map[j].map_priv_1); + sbc8240_map[j].map_priv_1 = 0; + } return -EIO; } simple_map_init(&sbc8240_mtd[i]); @@ -175,6 +179,11 @@ int __init init_sbc8240_mtd (void) if (sbc8240_mtd[i]) { sbc8240_mtd[i]->module = THIS_MODULE; devicesfound++; + } else { + if (sbc8240_map[i].map_priv_1) { + iounmap((void *) sbc8240_map[i].map_priv_1); + sbc8240_map[i].map_priv_1 = 0; + } } } diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c index ec80eec376bf..ca932122fb64 100644 --- a/drivers/mtd/maps/walnut.c +++ b/drivers/mtd/maps/walnut.c @@ -68,6 +68,7 @@ int __init init_walnut(void) if (WALNUT_FLASH_ONBD_N(fpga_brds1)) { printk("The on-board flash is disabled (U79 sw 5)!"); + iounmap(fpga_status_adr); return -EIO; } if (WALNUT_FLASH_SRAM_SEL(fpga_brds1)) @@ -81,6 +82,7 @@ int __init init_walnut(void) if (!walnut_map.virt) { printk("Failed to ioremap flash.\n"); + iounmap(fpga_status_adr); return -EIO; } @@ -93,9 +95,11 @@ int __init init_walnut(void) ARRAY_SIZE(walnut_partitions)); } else { printk("map probe failed for flash\n"); + iounmap(fpga_status_adr); return -ENXIO; } + iounmap(fpga_status_adr); return 0; } diff --git a/drivers/mtd/nand/edb7312.c b/drivers/mtd/nand/edb7312.c index 516c0e5e564c..12017f3c6bd6 100644 --- a/drivers/mtd/nand/edb7312.c +++ b/drivers/mtd/nand/edb7312.c @@ -198,6 +198,9 @@ static void __exit ep7312_cleanup(void) /* Release resources, unregister device */ nand_release(ap7312_mtd); + /* Release io resource */ + iounmap((void *)this->IO_ADDR_R); + /* Free the MTD device structure */ kfree(ep7312_mtd); } diff --git a/drivers/mtd/nand/ppchameleonevb.c b/drivers/mtd/nand/ppchameleonevb.c index 22fa65c12ab9..eb7d4d443deb 100644 --- a/drivers/mtd/nand/ppchameleonevb.c +++ b/drivers/mtd/nand/ppchameleonevb.c @@ -276,6 +276,7 @@ static int __init ppchameleonevb_init(void) /* Scan to find existence of the device (it could not be mounted) */ if (nand_scan(ppchameleon_mtd, 1)) { iounmap((void *)ppchameleon_fio_base); + ppchameleon_fio_base = NULL; kfree(ppchameleon_mtd); goto nand_evb_init; } @@ -314,6 +315,8 @@ static int __init ppchameleonevb_init(void) ppchameleonevb_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL); if (!ppchameleonevb_mtd) { printk("Unable to allocate PPChameleonEVB NAND MTD device structure.\n"); + if (ppchameleon_fio_base) + iounmap(ppchameleon_fio_base); return -ENOMEM; } @@ -322,6 +325,8 @@ static int __init ppchameleonevb_init(void) if (!ppchameleonevb_fio_base) { printk("ioremap PPChameleonEVB NAND flash failed\n"); kfree(ppchameleonevb_mtd); + if (ppchameleon_fio_base) + iounmap(ppchameleon_fio_base); return -EIO; } @@ -378,6 +383,8 @@ static int __init ppchameleonevb_init(void) if (nand_scan(ppchameleonevb_mtd, 1)) { iounmap((void *)ppchameleonevb_fio_base); kfree(ppchameleonevb_mtd); + if (ppchameleon_fio_base) + iounmap(ppchameleon_fio_base); return -ENXIO; } #ifdef CONFIG_MTD_PARTITIONS diff --git a/include/linux/utsrelease.h b/include/linux/utsrelease.h new file mode 100644 index 000000000000..7fe1de0ad4c1 --- /dev/null +++ b/include/linux/utsrelease.h @@ -0,0 +1 @@ +#define UTS_RELEASE "2.6.18" From 17c2dae3aaff9b1e5d83996a5f098ad693f3aeca Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 21 Sep 2006 23:16:48 +0200 Subject: [PATCH 0368/1063] [MTD] physmap: add power management support Implement PM handling for physmap. Idea from Steven Scholz, patch by David Anders. Signed-off-by: Lennert Buytenhek Signed-off-by: David Woodhouse --- drivers/mtd/maps/physmap.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c index 7799a25a7f2a..bc7cc71788bc 100644 --- a/drivers/mtd/maps/physmap.c +++ b/drivers/mtd/maps/physmap.c @@ -158,9 +158,42 @@ err_out: return err; } +#ifdef CONFIG_PM +static int physmap_flash_suspend(struct platform_device *dev, pm_message_t state) +{ + struct physmap_flash_info *info = platform_get_drvdata(dev); + int ret = 0; + + if (info) + ret = info->mtd->suspend(info->mtd); + + return ret; +} + +static int physmap_flash_resume(struct platform_device *dev) +{ + struct physmap_flash_info *info = platform_get_drvdata(dev); + if (info) + info->mtd->resume(info->mtd); + return 0; +} + +static void physmap_flash_shutdown(struct platform_device *dev) +{ + struct physmap_flash_info *info = platform_get_drvdata(dev); + if (info && info->mtd->suspend(info->mtd) == 0) + info->mtd->resume(info->mtd); +} +#endif + static struct platform_driver physmap_flash_driver = { .probe = physmap_flash_probe, .remove = physmap_flash_remove, +#ifdef CONFIG_PM + .suspend = physmap_flash_suspend, + .resume = physmap_flash_resume, + .shutdown = physmap_flash_shutdown, +#endif .driver = { .name = "physmap-flash", }, From 6a545a0d6021a4d759ba6d0c1082d1abf8d64c84 Mon Sep 17 00:00:00 2001 From: Frank Haverkamp Date: Wed, 20 Sep 2006 17:24:52 +0200 Subject: [PATCH 0369/1063] [MTD NAND] Fix in typo ndfc.c causing wrong ECC layout Due to this typo, a wrong ECC layout table is chosen. Signed-off-by: Frank Haverkamp Signed-off-by: David Woodhouse --- drivers/mtd/nand/ndfc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c index e5bd88f2d560..039c759cfbfc 100644 --- a/drivers/mtd/nand/ndfc.c +++ b/drivers/mtd/nand/ndfc.c @@ -168,7 +168,7 @@ static void ndfc_chip_init(struct ndfc_nand_mtd *mtd) chip->ecc.mode = NAND_ECC_HW; chip->ecc.size = 256; chip->ecc.bytes = 3; - chip->ecclayout = mtd->pl_chip->ecclayout; + chip->ecclayout = chip->ecc.layout = mtd->pl_chip->ecclayout; mtd->mtd.priv = chip; mtd->mtd.owner = THIS_MODULE; } From e417fcfb857b809e5dabc9b252ad70f090d553d1 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 21 Sep 2006 03:47:48 +0200 Subject: [PATCH 0370/1063] [MTD] Remove iq80310 map driver The iq80310 mtd map driver depends on ARCH_IQ80310, which isn't defined anywhere in the tree (as we don't have 80310 support), and furthermore, everything the driver does can be done with physmap instead. Signed-off-by: Lennert Buytenhek Signed-off-by: David Woodhouse --- drivers/mtd/maps/Kconfig | 8 --- drivers/mtd/maps/Makefile | 1 - drivers/mtd/maps/iq80310.c | 118 ------------------------------------- 3 files changed, 127 deletions(-) delete mode 100644 drivers/mtd/maps/iq80310.c diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 64d1b6a6c920..24747bdc3e19 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -447,14 +447,6 @@ config MTD_DC21285 21285 bridge used with Intel's StrongARM processors. More info at . -config MTD_IQ80310 - tristate "CFI Flash device mapped on the XScale IQ80310 board" - depends on MTD_CFI && ARCH_IQ80310 - help - This enables access routines for the flash chips on the Intel XScale - IQ80310 evaluation board. If you have one of these boards and would - like to use the flash chips on it, say 'Y'. - config MTD_IXP4XX tristate "CFI Flash device mapped on Intel IXP4xx based systems" depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile index ab71f172eb77..191c1928bbec 100644 --- a/drivers/mtd/maps/Makefile +++ b/drivers/mtd/maps/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_MTD_CFI_FLAGADM) += cfi_flagadm.o obj-$(CONFIG_MTD_CSTM_MIPS_IXX) += cstm_mips_ixx.o obj-$(CONFIG_MTD_DC21285) += dc21285.o obj-$(CONFIG_MTD_DILNETPC) += dilnetpc.o -obj-$(CONFIG_MTD_IQ80310) += iq80310.o obj-$(CONFIG_MTD_L440GX) += l440gx.o obj-$(CONFIG_MTD_AMD76XROM) += amd76xrom.o obj-$(CONFIG_MTD_ICHXROM) += ichxrom.o diff --git a/drivers/mtd/maps/iq80310.c b/drivers/mtd/maps/iq80310.c deleted file mode 100644 index 62d9e87d84e2..000000000000 --- a/drivers/mtd/maps/iq80310.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * $Id: iq80310.c,v 1.21 2005/11/07 11:14:27 gleixner Exp $ - * - * Mapping for the Intel XScale IQ80310 evaluation board - * - * Author: Nicolas Pitre - * Copyright: (C) 2001 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define WINDOW_ADDR 0 -#define WINDOW_SIZE 8*1024*1024 -#define BUSWIDTH 1 - -static struct mtd_info *mymtd; - -static struct map_info iq80310_map = { - .name = "IQ80310 flash", - .size = WINDOW_SIZE, - .bankwidth = BUSWIDTH, - .phys = WINDOW_ADDR -}; - -static struct mtd_partition iq80310_partitions[4] = { - { - .name = "Firmware", - .size = 0x00080000, - .offset = 0, - .mask_flags = MTD_WRITEABLE /* force read-only */ - },{ - .name = "Kernel", - .size = 0x000a0000, - .offset = 0x00080000, - },{ - .name = "Filesystem", - .size = 0x00600000, - .offset = 0x00120000 - },{ - .name = "RedBoot", - .size = 0x000e0000, - .offset = 0x00720000, - .mask_flags = MTD_WRITEABLE - } -}; - -static struct mtd_info *mymtd; -static struct mtd_partition *parsed_parts; -static const char *probes[] = { "RedBoot", "cmdlinepart", NULL }; - -static int __init init_iq80310(void) -{ - struct mtd_partition *parts; - int nb_parts = 0; - int parsed_nr_parts = 0; - int ret; - - iq80310_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE); - if (!iq80310_map.virt) { - printk("Failed to ioremap\n"); - return -EIO; - } - simple_map_init(&iq80310_map); - - mymtd = do_map_probe("cfi_probe", &iq80310_map); - if (!mymtd) { - iounmap((void *)iq80310_map.virt); - return -ENXIO; - } - mymtd->owner = THIS_MODULE; - - ret = parse_mtd_partitions(mymtd, probes, &parsed_parts, 0); - - if (ret > 0) - parsed_nr_parts = ret; - - if (parsed_nr_parts > 0) { - parts = parsed_parts; - nb_parts = parsed_nr_parts; - } else { - parts = iq80310_partitions; - nb_parts = ARRAY_SIZE(iq80310_partitions); - } - add_mtd_partitions(mymtd, parts, nb_parts); - return 0; -} - -static void __exit cleanup_iq80310(void) -{ - if (mymtd) { - del_mtd_partitions(mymtd); - map_destroy(mymtd); - kfree(parsed_parts); - } - if (iq80310_map.virt) - iounmap((void *)iq80310_map.virt); -} - -module_init(init_iq80310); -module_exit(cleanup_iq80310); - - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Nicolas Pitre "); -MODULE_DESCRIPTION("MTD map driver for Intel XScale IQ80310 evaluation board"); From cdf0a7d16980858e72f5d26bfe48abf01112fab5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 19 Sep 2006 21:55:06 +0200 Subject: [PATCH 0371/1063] [MTD] pmc551 whitespace cleanup Spaces were used for indent, there was more than 80 columns per line. Get rid of that stuff. Signed-off-by: Jiri Slaby Signed-off-by: David Woodhouse --- drivers/mtd/devices/pmc551.c | 1095 +++++++++++++++++----------------- 1 file changed, 562 insertions(+), 533 deletions(-) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index a03a8a79e5c5..4d4023601d22 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -4,82 +4,82 @@ * PMC551 PCI Mezzanine Ram Device * * Author: - * Mark Ferrell - * Copyright 1999,2000 Nortel Networks + * Mark Ferrell + * Copyright 1999,2000 Nortel Networks * * License: - * As part of this driver was derived from the slram.c driver it - * falls under the same license, which is GNU General Public - * License v2 + * As part of this driver was derived from the slram.c driver it + * falls under the same license, which is GNU General Public + * License v2 * * Description: - * This driver is intended to support the PMC551 PCI Ram device - * from Ramix Inc. The PMC551 is a PMC Mezzanine module for - * cPCI embedded systems. The device contains a single SROM - * that initially programs the V370PDC chipset onboard the - * device, and various banks of DRAM/SDRAM onboard. This driver - * implements this PCI Ram device as an MTD (Memory Technology - * Device) so that it can be used to hold a file system, or for - * added swap space in embedded systems. Since the memory on - * this board isn't as fast as main memory we do not try to hook - * it into main memory as that would simply reduce performance - * on the system. Using it as a block device allows us to use - * it as high speed swap or for a high speed disk device of some - * sort. Which becomes very useful on diskless systems in the - * embedded market I might add. + * This driver is intended to support the PMC551 PCI Ram device + * from Ramix Inc. The PMC551 is a PMC Mezzanine module for + * cPCI embedded systems. The device contains a single SROM + * that initially programs the V370PDC chipset onboard the + * device, and various banks of DRAM/SDRAM onboard. This driver + * implements this PCI Ram device as an MTD (Memory Technology + * Device) so that it can be used to hold a file system, or for + * added swap space in embedded systems. Since the memory on + * this board isn't as fast as main memory we do not try to hook + * it into main memory as that would simply reduce performance + * on the system. Using it as a block device allows us to use + * it as high speed swap or for a high speed disk device of some + * sort. Which becomes very useful on diskless systems in the + * embedded market I might add. * * Notes: - * Due to what I assume is more buggy SROM, the 64M PMC551 I - * have available claims that all 4 of it's DRAM banks have 64M - * of ram configured (making a grand total of 256M onboard). - * This is slightly annoying since the BAR0 size reflects the - * aperture size, not the dram size, and the V370PDC supplies no - * other method for memory size discovery. This problem is - * mostly only relevant when compiled as a module, as the - * unloading of the module with an aperture size smaller then - * the ram will cause the driver to detect the onboard memory - * size to be equal to the aperture size when the module is - * reloaded. Soooo, to help, the module supports an msize - * option to allow the specification of the onboard memory, and - * an asize option, to allow the specification of the aperture - * size. The aperture must be equal to or less then the memory - * size, the driver will correct this if you screw it up. This - * problem is not relevant for compiled in drivers as compiled - * in drivers only init once. + * Due to what I assume is more buggy SROM, the 64M PMC551 I + * have available claims that all 4 of it's DRAM banks have 64M + * of ram configured (making a grand total of 256M onboard). + * This is slightly annoying since the BAR0 size reflects the + * aperture size, not the dram size, and the V370PDC supplies no + * other method for memory size discovery. This problem is + * mostly only relevant when compiled as a module, as the + * unloading of the module with an aperture size smaller then + * the ram will cause the driver to detect the onboard memory + * size to be equal to the aperture size when the module is + * reloaded. Soooo, to help, the module supports an msize + * option to allow the specification of the onboard memory, and + * an asize option, to allow the specification of the aperture + * size. The aperture must be equal to or less then the memory + * size, the driver will correct this if you screw it up. This + * problem is not relevant for compiled in drivers as compiled + * in drivers only init once. * * Credits: - * Saeed Karamooz of Ramix INC. for the - * initial example code of how to initialize this device and for - * help with questions I had concerning operation of the device. + * Saeed Karamooz of Ramix INC. for the + * initial example code of how to initialize this device and for + * help with questions I had concerning operation of the device. * - * Most of the MTD code for this driver was originally written - * for the slram.o module in the MTD drivers package which - * allows the mapping of system memory into an MTD device. - * Since the PMC551 memory module is accessed in the same - * fashion as system memory, the slram.c code became a very nice - * fit to the needs of this driver. All we added was PCI - * detection/initialization to the driver and automatically figure - * out the size via the PCI detection.o, later changes by Corey - * Minyard set up the card to utilize a 1M sliding apature. + * Most of the MTD code for this driver was originally written + * for the slram.o module in the MTD drivers package which + * allows the mapping of system memory into an MTD device. + * Since the PMC551 memory module is accessed in the same + * fashion as system memory, the slram.c code became a very nice + * fit to the needs of this driver. All we added was PCI + * detection/initialization to the driver and automatically figure + * out the size via the PCI detection.o, later changes by Corey + * Minyard set up the card to utilize a 1M sliding apature. * - * Corey Minyard - * * Modified driver to utilize a sliding aperture instead of - * mapping all memory into kernel space which turned out to - * be very wasteful. - * * Located a bug in the SROM's initialization sequence that - * made the memory unusable, added a fix to code to touch up - * the DRAM some. + * Corey Minyard + * * Modified driver to utilize a sliding aperture instead of + * mapping all memory into kernel space which turned out to + * be very wasteful. + * * Located a bug in the SROM's initialization sequence that + * made the memory unusable, added a fix to code to touch up + * the DRAM some. * * Bugs/FIXME's: - * * MUST fix the init function to not spin on a register - * waiting for it to set .. this does not safely handle busted - * devices that never reset the register correctly which will - * cause the system to hang w/ a reboot being the only chance at - * recover. [sort of fixed, could be better] - * * Add I2C handling of the SROM so we can read the SROM's information - * about the aperture size. This should always accurately reflect the - * onboard memory size. - * * Comb the init routine. It's still a bit cludgy on a few things. + * * MUST fix the init function to not spin on a register + * waiting for it to set .. this does not safely handle busted + * devices that never reset the register correctly which will + * cause the system to hang w/ a reboot being the only chance at + * recover. [sort of fixed, could be better] + * * Add I2C handling of the SROM so we can read the SROM's information + * about the aperture size. This should always accurately reflect the + * onboard memory size. + * * Comb the init routine. It's still a bit cludgy on a few things. */ #include @@ -105,74 +105,77 @@ static struct mtd_info *pmc551list; -static int pmc551_erase (struct mtd_info *mtd, struct erase_info *instr) +static int pmc551_erase(struct mtd_info *mtd, struct erase_info *instr) { - struct mypriv *priv = mtd->priv; - u32 soff_hi, soff_lo; /* start address offset hi/lo */ - u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ - unsigned long end; + struct mypriv *priv = mtd->priv; + u32 soff_hi, soff_lo; /* start address offset hi/lo */ + u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ + unsigned long end; u_char *ptr; size_t retlen; #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr, (long)instr->len); + printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr, + (long)instr->len); #endif - end = instr->addr + instr->len - 1; + end = instr->addr + instr->len - 1; - /* Is it past the end? */ - if ( end > mtd->size ) { + /* Is it past the end? */ + if (end > mtd->size) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n", (long)end, (long)mtd->size); + printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n", + (long)end, (long)mtd->size); #endif - return -EINVAL; - } + return -EINVAL; + } - eoff_hi = end & ~(priv->asize - 1); - soff_hi = instr->addr & ~(priv->asize - 1); - eoff_lo = end & (priv->asize - 1); - soff_lo = instr->addr & (priv->asize - 1); + eoff_hi = end & ~(priv->asize - 1); + soff_hi = instr->addr & ~(priv->asize - 1); + eoff_lo = end & (priv->asize - 1); + soff_lo = instr->addr & (priv->asize - 1); - pmc551_point (mtd, instr->addr, instr->len, &retlen, &ptr); + pmc551_point(mtd, instr->addr, instr->len, &retlen, &ptr); - if ( soff_hi == eoff_hi || mtd->size == priv->asize) { - /* The whole thing fits within one access, so just one shot - will do it. */ - memset(ptr, 0xff, instr->len); - } else { - /* We have to do multiple writes to get all the data - written. */ - while (soff_hi != eoff_hi) { + if (soff_hi == eoff_hi || mtd->size == priv->asize) { + /* The whole thing fits within one access, so just one shot + will do it. */ + memset(ptr, 0xff, instr->len); + } else { + /* We have to do multiple writes to get all the data + written. */ + while (soff_hi != eoff_hi) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk( KERN_DEBUG "pmc551_erase() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); + printk(KERN_DEBUG "pmc551_erase() soff_hi: %ld, " + "eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); #endif - memset(ptr, 0xff, priv->asize); - if (soff_hi + priv->asize >= mtd->size) { - goto out; - } - soff_hi += priv->asize; - pmc551_point (mtd,(priv->base_map0|soff_hi), - priv->asize, &retlen, &ptr); - } - memset (ptr, 0xff, eoff_lo); - } + memset(ptr, 0xff, priv->asize); + if (soff_hi + priv->asize >= mtd->size) { + goto out; + } + soff_hi += priv->asize; + pmc551_point(mtd, (priv->base_map0 | soff_hi), + priv->asize, &retlen, &ptr); + } + memset(ptr, 0xff, eoff_lo); + } -out: + out: instr->state = MTD_ERASE_DONE; #ifdef CONFIG_MTD_PMC551_DEBUG printk(KERN_DEBUG "pmc551_erase() done\n"); #endif - mtd_erase_callback(instr); - return 0; + mtd_erase_callback(instr); + return 0; } - -static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char **mtdbuf) +static int pmc551_point(struct mtd_info *mtd, loff_t from, size_t len, + size_t * retlen, u_char ** mtdbuf) { - struct mypriv *priv = mtd->priv; - u32 soff_hi; - u32 soff_lo; + struct mypriv *priv = mtd->priv; + u32 soff_hi; + u32 soff_lo; #ifdef CONFIG_MTD_PMC551_DEBUG printk(KERN_DEBUG "pmc551_point(%ld, %ld)\n", (long)from, (long)len); @@ -180,18 +183,19 @@ static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t * if (from + len > mtd->size) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n", (long)from+len, (long)mtd->size); + printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n", + (long)from + len, (long)mtd->size); #endif return -EINVAL; } - soff_hi = from & ~(priv->asize - 1); - soff_lo = from & (priv->asize - 1); + soff_hi = from & ~(priv->asize - 1); + soff_lo = from & (priv->asize - 1); /* Cheap hack optimization */ - if( priv->curr_map0 != from ) { - pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0, - (priv->base_map0 | soff_hi) ); + if (priv->curr_map0 != from) { + pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0, + (priv->base_map0 | soff_hi)); priv->curr_map0 = soff_hi; } @@ -200,137 +204,144 @@ static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t * return 0; } - -static void pmc551_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, size_t len) +static void pmc551_unpoint(struct mtd_info *mtd, u_char * addr, loff_t from, + size_t len) { #ifdef CONFIG_MTD_PMC551_DEBUG printk(KERN_DEBUG "pmc551_unpoint()\n"); #endif } - -static int pmc551_read (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) +static int pmc551_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t * retlen, u_char * buf) { - struct mypriv *priv = mtd->priv; - u32 soff_hi, soff_lo; /* start address offset hi/lo */ - u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ - unsigned long end; + struct mypriv *priv = mtd->priv; + u32 soff_hi, soff_lo; /* start address offset hi/lo */ + u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ + unsigned long end; u_char *ptr; - u_char *copyto = buf; + u_char *copyto = buf; #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n", (long)from, (long)len, (long)priv->asize); + printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n", + (long)from, (long)len, (long)priv->asize); #endif - end = from + len - 1; + end = from + len - 1; - /* Is it past the end? */ - if (end > mtd->size) { + /* Is it past the end? */ + if (end > mtd->size) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n", (long) end, (long)mtd->size); + printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n", + (long)end, (long)mtd->size); #endif - return -EINVAL; - } + return -EINVAL; + } - soff_hi = from & ~(priv->asize - 1); - eoff_hi = end & ~(priv->asize - 1); - soff_lo = from & (priv->asize - 1); - eoff_lo = end & (priv->asize - 1); + soff_hi = from & ~(priv->asize - 1); + eoff_hi = end & ~(priv->asize - 1); + soff_lo = from & (priv->asize - 1); + eoff_lo = end & (priv->asize - 1); - pmc551_point (mtd, from, len, retlen, &ptr); + pmc551_point(mtd, from, len, retlen, &ptr); - if (soff_hi == eoff_hi) { - /* The whole thing fits within one access, so just one shot - will do it. */ - memcpy(copyto, ptr, len); - copyto += len; - } else { - /* We have to do multiple writes to get all the data - written. */ - while (soff_hi != eoff_hi) { + if (soff_hi == eoff_hi) { + /* The whole thing fits within one access, so just one shot + will do it. */ + memcpy(copyto, ptr, len); + copyto += len; + } else { + /* We have to do multiple writes to get all the data + written. */ + while (soff_hi != eoff_hi) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk( KERN_DEBUG "pmc551_read() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); + printk(KERN_DEBUG "pmc551_read() soff_hi: %ld, " + "eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); #endif - memcpy(copyto, ptr, priv->asize); - copyto += priv->asize; - if (soff_hi + priv->asize >= mtd->size) { - goto out; - } - soff_hi += priv->asize; - pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr); - } - memcpy(copyto, ptr, eoff_lo); - copyto += eoff_lo; - } + memcpy(copyto, ptr, priv->asize); + copyto += priv->asize; + if (soff_hi + priv->asize >= mtd->size) { + goto out; + } + soff_hi += priv->asize; + pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr); + } + memcpy(copyto, ptr, eoff_lo); + copyto += eoff_lo; + } -out: + out: #ifdef CONFIG_MTD_PMC551_DEBUG printk(KERN_DEBUG "pmc551_read() done\n"); #endif - *retlen = copyto - buf; - return 0; + *retlen = copyto - buf; + return 0; } -static int pmc551_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) +static int pmc551_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t * retlen, const u_char * buf) { - struct mypriv *priv = mtd->priv; - u32 soff_hi, soff_lo; /* start address offset hi/lo */ - u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ - unsigned long end; + struct mypriv *priv = mtd->priv; + u32 soff_hi, soff_lo; /* start address offset hi/lo */ + u32 eoff_hi, eoff_lo; /* end address offset hi/lo */ + unsigned long end; u_char *ptr; - const u_char *copyfrom = buf; - + const u_char *copyfrom = buf; #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n", (long)to, (long)len, (long)priv->asize); + printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n", + (long)to, (long)len, (long)priv->asize); #endif - end = to + len - 1; - /* Is it past the end? or did the u32 wrap? */ - if (end > mtd->size ) { + end = to + len - 1; + /* Is it past the end? or did the u32 wrap? */ + if (end > mtd->size) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, size: %ld, to: %ld)\n", (long) end, (long)mtd->size, (long)to); + printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, " + "size: %ld, to: %ld)\n", (long)end, (long)mtd->size, + (long)to); #endif - return -EINVAL; - } + return -EINVAL; + } - soff_hi = to & ~(priv->asize - 1); - eoff_hi = end & ~(priv->asize - 1); - soff_lo = to & (priv->asize - 1); - eoff_lo = end & (priv->asize - 1); + soff_hi = to & ~(priv->asize - 1); + eoff_hi = end & ~(priv->asize - 1); + soff_lo = to & (priv->asize - 1); + eoff_lo = end & (priv->asize - 1); - pmc551_point (mtd, to, len, retlen, &ptr); + pmc551_point(mtd, to, len, retlen, &ptr); - if (soff_hi == eoff_hi) { - /* The whole thing fits within one access, so just one shot - will do it. */ - memcpy(ptr, copyfrom, len); - copyfrom += len; - } else { - /* We have to do multiple writes to get all the data - written. */ - while (soff_hi != eoff_hi) { + if (soff_hi == eoff_hi) { + /* The whole thing fits within one access, so just one shot + will do it. */ + memcpy(ptr, copyfrom, len); + copyfrom += len; + } else { + /* We have to do multiple writes to get all the data + written. */ + while (soff_hi != eoff_hi) { #ifdef CONFIG_MTD_PMC551_DEBUG - printk( KERN_DEBUG "pmc551_write() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); + printk(KERN_DEBUG "pmc551_write() soff_hi: %ld, " + "eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi); #endif - memcpy(ptr, copyfrom, priv->asize); - copyfrom += priv->asize; - if (soff_hi >= mtd->size) { - goto out; - } - soff_hi += priv->asize; - pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr); - } - memcpy(ptr, copyfrom, eoff_lo); - copyfrom += eoff_lo; - } + memcpy(ptr, copyfrom, priv->asize); + copyfrom += priv->asize; + if (soff_hi >= mtd->size) { + goto out; + } + soff_hi += priv->asize; + pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr); + } + memcpy(ptr, copyfrom, eoff_lo); + copyfrom += eoff_lo; + } -out: + out: #ifdef CONFIG_MTD_PMC551_DEBUG printk(KERN_DEBUG "pmc551_write() done\n"); #endif - *retlen = copyfrom - buf; - return 0; + *retlen = copyfrom - buf; + return 0; } /* @@ -345,58 +356,58 @@ out: * mechanism * returns the size of the memory region found. */ -static u32 fixup_pmc551 (struct pci_dev *dev) +static u32 fixup_pmc551(struct pci_dev *dev) { #ifdef CONFIG_MTD_PMC551_BUGFIX - u32 dram_data; + u32 dram_data; #endif - u32 size, dcmd, cfg, dtmp; - u16 cmd, tmp, i; + u32 size, dcmd, cfg, dtmp; + u16 cmd, tmp, i; u8 bcmd, counter; - /* Sanity Check */ - if(!dev) { - return -ENODEV; - } + /* Sanity Check */ + if (!dev) { + return -ENODEV; + } /* * Attempt to reset the card * FIXME: Stop Spinning registers */ - counter=0; + counter = 0; /* unlock registers */ - pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5 ); + pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5); /* read in old data */ - pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd ); + pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd); /* bang the reset line up and down for a few */ - for(i=0;i<10;i++) { - counter=0; + for (i = 0; i < 10; i++) { + counter = 0; bcmd &= ~0x80; - while(counter++ < 100) { + while (counter++ < 100) { pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd); } - counter=0; + counter = 0; bcmd |= 0x80; - while(counter++ < 100) { + while (counter++ < 100) { pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd); } } - bcmd |= (0x40|0x20); + bcmd |= (0x40 | 0x20); pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd); - /* + /* * Take care and turn off the memory on the device while we * tweak the configurations */ - pci_read_config_word(dev, PCI_COMMAND, &cmd); - tmp = cmd & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY); - pci_write_config_word(dev, PCI_COMMAND, tmp); + pci_read_config_word(dev, PCI_COMMAND, &cmd); + tmp = cmd & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, tmp); /* * Disable existing aperture before probing memory size */ pci_read_config_dword(dev, PMC551_PCI_MEM_MAP0, &dcmd); - dtmp=(dcmd|PMC551_PCI_MEM_MAP_ENABLE|PMC551_PCI_MEM_MAP_REG_EN); + dtmp = (dcmd | PMC551_PCI_MEM_MAP_ENABLE | PMC551_PCI_MEM_MAP_REG_EN); pci_write_config_dword(dev, PMC551_PCI_MEM_MAP0, dtmp); /* * Grab old BAR0 config so that we can figure out memory size @@ -407,220 +418,231 @@ static u32 fixup_pmc551 (struct pci_dev *dev) * then write all 1's to the memory space, read back the result into * "size", and then write back all the old config. */ - pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &cfg ); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &cfg); #ifndef CONFIG_MTD_PMC551_BUGFIX - pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, ~0 ); - pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &size ); - size = (size&PCI_BASE_ADDRESS_MEM_MASK); - size &= ~(size-1); - pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg ); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, ~0); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &size); + size = (size & PCI_BASE_ADDRESS_MEM_MASK); + size &= ~(size - 1); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, cfg); #else - /* - * Get the size of the memory by reading all the DRAM size values - * and adding them up. - * - * KLUDGE ALERT: the boards we are using have invalid column and - * row mux values. We fix them here, but this will break other - * memory configurations. - */ - pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data); - size = PMC551_DRAM_BLK_GET_SIZE(dram_data); - dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); - dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); - pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data); + /* + * Get the size of the memory by reading all the DRAM size values + * and adding them up. + * + * KLUDGE ALERT: the boards we are using have invalid column and + * row mux values. We fix them here, but this will break other + * memory configurations. + */ + pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data); + size = PMC551_DRAM_BLK_GET_SIZE(dram_data); + dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); + dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); + pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data); - pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data); - size += PMC551_DRAM_BLK_GET_SIZE(dram_data); - dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); - dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); - pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data); + pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data); + size += PMC551_DRAM_BLK_GET_SIZE(dram_data); + dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); + dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); + pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data); - pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data); - size += PMC551_DRAM_BLK_GET_SIZE(dram_data); - dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); - dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); - pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data); + pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data); + size += PMC551_DRAM_BLK_GET_SIZE(dram_data); + dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); + dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); + pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data); - pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data); - size += PMC551_DRAM_BLK_GET_SIZE(dram_data); - dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); - dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); - pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data); + pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data); + size += PMC551_DRAM_BLK_GET_SIZE(dram_data); + dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5); + dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9); + pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data); - /* - * Oops .. something went wrong - */ - if( (size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) { - return -ENODEV; - } -#endif /* CONFIG_MTD_PMC551_BUGFIX */ + /* + * Oops .. something went wrong + */ + if ((size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) { + return -ENODEV; + } +#endif /* CONFIG_MTD_PMC551_BUGFIX */ - if ((cfg&PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) { - return -ENODEV; + if ((cfg & PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) { + return -ENODEV; } - /* - * Precharge Dram - */ - pci_write_config_word( dev, PMC551_SDRAM_MA, 0x0400 ); - pci_write_config_word( dev, PMC551_SDRAM_CMD, 0x00bf ); + /* + * Precharge Dram + */ + pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0400); + pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x00bf); - /* - * Wait until command has gone through - * FIXME: register spinning issue - */ - do { pci_read_config_word( dev, PMC551_SDRAM_CMD, &cmd ); - if(counter++ > 100)break; - } while ( (PCI_COMMAND_IO) & cmd ); + /* + * Wait until command has gone through + * FIXME: register spinning issue + */ + do { + pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd); + if (counter++ > 100) + break; + } while ((PCI_COMMAND_IO) & cmd); - /* + /* * Turn on auto refresh * The loop is taken directly from Ramix's example code. I assume that * this must be held high for some duration of time, but I can find no * documentation refrencing the reasons why. - */ - for ( i = 1; i<=8 ; i++) { - pci_write_config_word (dev, PMC551_SDRAM_CMD, 0x0df); + */ + for (i = 1; i <= 8; i++) { + pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0df); - /* - * Make certain command has gone through - * FIXME: register spinning issue - */ - counter=0; - do { pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd); - if(counter++ > 100)break; - } while ( (PCI_COMMAND_IO) & cmd ); - } + /* + * Make certain command has gone through + * FIXME: register spinning issue + */ + counter = 0; + do { + pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd); + if (counter++ > 100) + break; + } while ((PCI_COMMAND_IO) & cmd); + } - pci_write_config_word ( dev, PMC551_SDRAM_MA, 0x0020); - pci_write_config_word ( dev, PMC551_SDRAM_CMD, 0x0ff); + pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0020); + pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0ff); - /* - * Wait until command completes - * FIXME: register spinning issue - */ - counter=0; - do { pci_read_config_word ( dev, PMC551_SDRAM_CMD, &cmd); - if(counter++ > 100)break; - } while ( (PCI_COMMAND_IO) & cmd ); + /* + * Wait until command completes + * FIXME: register spinning issue + */ + counter = 0; + do { + pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd); + if (counter++ > 100) + break; + } while ((PCI_COMMAND_IO) & cmd); - pci_read_config_dword ( dev, PMC551_DRAM_CFG, &dcmd); - dcmd |= 0x02000000; - pci_write_config_dword ( dev, PMC551_DRAM_CFG, dcmd); + pci_read_config_dword(dev, PMC551_DRAM_CFG, &dcmd); + dcmd |= 0x02000000; + pci_write_config_dword(dev, PMC551_DRAM_CFG, dcmd); - /* - * Check to make certain fast back-to-back, if not - * then set it so - */ - pci_read_config_word( dev, PCI_STATUS, &cmd); - if((cmd&PCI_COMMAND_FAST_BACK) == 0) { - cmd |= PCI_COMMAND_FAST_BACK; - pci_write_config_word( dev, PCI_STATUS, cmd); - } + /* + * Check to make certain fast back-to-back, if not + * then set it so + */ + pci_read_config_word(dev, PCI_STATUS, &cmd); + if ((cmd & PCI_COMMAND_FAST_BACK) == 0) { + cmd |= PCI_COMMAND_FAST_BACK; + pci_write_config_word(dev, PCI_STATUS, cmd); + } - /* - * Check to make certain the DEVSEL is set correctly, this device - * has a tendancy to assert DEVSEL and TRDY when a write is performed - * to the memory when memory is read-only - */ - if((cmd&PCI_STATUS_DEVSEL_MASK) != 0x0) { - cmd &= ~PCI_STATUS_DEVSEL_MASK; - pci_write_config_word( dev, PCI_STATUS, cmd ); - } - /* - * Set to be prefetchable and put everything back based on old cfg. + /* + * Check to make certain the DEVSEL is set correctly, this device + * has a tendancy to assert DEVSEL and TRDY when a write is performed + * to the memory when memory is read-only + */ + if ((cmd & PCI_STATUS_DEVSEL_MASK) != 0x0) { + cmd &= ~PCI_STATUS_DEVSEL_MASK; + pci_write_config_word(dev, PCI_STATUS, cmd); + } + /* + * Set to be prefetchable and put everything back based on old cfg. * it's possible that the reset of the V370PDC nuked the original * setup - */ + */ /* - cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH; - pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg ); - */ + cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH; + pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg ); + */ - /* - * Turn PCI memory and I/O bus access back on - */ - pci_write_config_word( dev, PCI_COMMAND, - PCI_COMMAND_MEMORY | PCI_COMMAND_IO ); + /* + * Turn PCI memory and I/O bus access back on + */ + pci_write_config_word(dev, PCI_COMMAND, + PCI_COMMAND_MEMORY | PCI_COMMAND_IO); #ifdef CONFIG_MTD_PMC551_DEBUG - /* - * Some screen fun - */ - printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at 0x%llx\n", - (size<1024)?size:(size<1048576)?size>>10:size>>20, - (size<1024)?'B':(size<1048576)?'K':'M', - size, ((dcmd&(0x1<<3)) == 0)?"non-":"", - (unsigned long long)((dev->resource[0].start)&PCI_BASE_ADDRESS_MEM_MASK)); + /* + * Some screen fun + */ + printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at " + "0x%llx\n", (size < 1024) ? size : (size < 1048576) ? + size >> 10 : size >> 20, + (size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size, + ((dcmd & (0x1 << 3)) == 0) ? "non-" : "", + (unsigned long long)((dev->resource[0].start) & + PCI_BASE_ADDRESS_MEM_MASK)); - /* - * Check to see the state of the memory - */ - pci_read_config_dword( dev, PMC551_DRAM_BLK0, &dcmd ); - printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n" - "pmc551: DRAM_BLK0 Size: %d at %d\n" - "pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n", - (((0x1<<1)&dcmd) == 0)?"RW":"RO", - (((0x1<<0)&dcmd) == 0)?"Off":"On", - PMC551_DRAM_BLK_GET_SIZE(dcmd), - ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) ); + /* + * Check to see the state of the memory + */ + pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dcmd); + printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n" + "pmc551: DRAM_BLK0 Size: %d at %d\n" + "pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n", + (((0x1 << 1) & dcmd) == 0) ? "RW" : "RO", + (((0x1 << 0) & dcmd) == 0) ? "Off" : "On", + PMC551_DRAM_BLK_GET_SIZE(dcmd), + ((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7), + ((dcmd >> 9) & 0xF)); - pci_read_config_dword( dev, PMC551_DRAM_BLK1, &dcmd ); - printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n" - "pmc551: DRAM_BLK1 Size: %d at %d\n" - "pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n", - (((0x1<<1)&dcmd) == 0)?"RW":"RO", - (((0x1<<0)&dcmd) == 0)?"Off":"On", - PMC551_DRAM_BLK_GET_SIZE(dcmd), - ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) ); + pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dcmd); + printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n" + "pmc551: DRAM_BLK1 Size: %d at %d\n" + "pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n", + (((0x1 << 1) & dcmd) == 0) ? "RW" : "RO", + (((0x1 << 0) & dcmd) == 0) ? "Off" : "On", + PMC551_DRAM_BLK_GET_SIZE(dcmd), + ((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7), + ((dcmd >> 9) & 0xF)); - pci_read_config_dword( dev, PMC551_DRAM_BLK2, &dcmd ); - printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n" - "pmc551: DRAM_BLK2 Size: %d at %d\n" - "pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n", - (((0x1<<1)&dcmd) == 0)?"RW":"RO", - (((0x1<<0)&dcmd) == 0)?"Off":"On", - PMC551_DRAM_BLK_GET_SIZE(dcmd), - ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) ); + pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dcmd); + printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n" + "pmc551: DRAM_BLK2 Size: %d at %d\n" + "pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n", + (((0x1 << 1) & dcmd) == 0) ? "RW" : "RO", + (((0x1 << 0) & dcmd) == 0) ? "Off" : "On", + PMC551_DRAM_BLK_GET_SIZE(dcmd), + ((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7), + ((dcmd >> 9) & 0xF)); - pci_read_config_dword( dev, PMC551_DRAM_BLK3, &dcmd ); - printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n" - "pmc551: DRAM_BLK3 Size: %d at %d\n" - "pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n", - (((0x1<<1)&dcmd) == 0)?"RW":"RO", - (((0x1<<0)&dcmd) == 0)?"Off":"On", - PMC551_DRAM_BLK_GET_SIZE(dcmd), - ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) ); + pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dcmd); + printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n" + "pmc551: DRAM_BLK3 Size: %d at %d\n" + "pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n", + (((0x1 << 1) & dcmd) == 0) ? "RW" : "RO", + (((0x1 << 0) & dcmd) == 0) ? "Off" : "On", + PMC551_DRAM_BLK_GET_SIZE(dcmd), + ((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7), + ((dcmd >> 9) & 0xF)); - pci_read_config_word( dev, PCI_COMMAND, &cmd ); - printk( KERN_DEBUG "pmc551: Memory Access %s\n", - (((0x1<<1)&cmd) == 0)?"off":"on" ); - printk( KERN_DEBUG "pmc551: I/O Access %s\n", - (((0x1<<0)&cmd) == 0)?"off":"on" ); + pci_read_config_word(dev, PCI_COMMAND, &cmd); + printk(KERN_DEBUG "pmc551: Memory Access %s\n", + (((0x1 << 1) & cmd) == 0) ? "off" : "on"); + printk(KERN_DEBUG "pmc551: I/O Access %s\n", + (((0x1 << 0) & cmd) == 0) ? "off" : "on"); - pci_read_config_word( dev, PCI_STATUS, &cmd ); - printk( KERN_DEBUG "pmc551: Devsel %s\n", - ((PCI_STATUS_DEVSEL_MASK&cmd)==0x000)?"Fast": - ((PCI_STATUS_DEVSEL_MASK&cmd)==0x200)?"Medium": - ((PCI_STATUS_DEVSEL_MASK&cmd)==0x400)?"Slow":"Invalid" ); + pci_read_config_word(dev, PCI_STATUS, &cmd); + printk(KERN_DEBUG "pmc551: Devsel %s\n", + ((PCI_STATUS_DEVSEL_MASK & cmd) == 0x000) ? "Fast" : + ((PCI_STATUS_DEVSEL_MASK & cmd) == 0x200) ? "Medium" : + ((PCI_STATUS_DEVSEL_MASK & cmd) == 0x400) ? "Slow" : "Invalid"); - printk( KERN_DEBUG "pmc551: %sFast Back-to-Back\n", - ((PCI_COMMAND_FAST_BACK&cmd) == 0)?"Not ":"" ); + printk(KERN_DEBUG "pmc551: %sFast Back-to-Back\n", + ((PCI_COMMAND_FAST_BACK & cmd) == 0) ? "Not " : ""); - pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd ); - printk( KERN_DEBUG "pmc551: EEPROM is under %s control\n" - "pmc551: System Control Register is %slocked to PCI access\n" - "pmc551: System Control Register is %slocked to EEPROM access\n", - (bcmd&0x1)?"software":"hardware", - (bcmd&0x20)?"":"un", (bcmd&0x40)?"":"un"); + pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd); + printk(KERN_DEBUG "pmc551: EEPROM is under %s control\n" + "pmc551: System Control Register is %slocked to PCI access\n" + "pmc551: System Control Register is %slocked to EEPROM access\n", + (bcmd & 0x1) ? "software" : "hardware", + (bcmd & 0x20) ? "" : "un", (bcmd & 0x40) ? "" : "un"); #endif - return size; + return size; } /* * Kernel version specific module stuffages */ - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mark Ferrell "); MODULE_DESCRIPTION(PMC551_VERSION); @@ -628,11 +650,11 @@ MODULE_DESCRIPTION(PMC551_VERSION); /* * Stuff these outside the ifdef so as to not bust compiled in driver support */ -static int msize=0; +static int msize = 0; #if defined(CONFIG_MTD_PMC551_APERTURE_SIZE) -static int asize=CONFIG_MTD_PMC551_APERTURE_SIZE +static int asize = CONFIG_MTD_PMC551_APERTURE_SIZE #else -static int asize=0; +static int asize = 0; #endif module_param(msize, int, 0); @@ -645,172 +667,179 @@ MODULE_PARM_DESC(asize, "aperture size, must be <= memsize [1-1024]"); */ static int __init init_pmc551(void) { - struct pci_dev *PCI_Device = NULL; - struct mypriv *priv; - int count, found=0; - struct mtd_info *mtd; - u32 length = 0; + struct pci_dev *PCI_Device = NULL; + struct mypriv *priv; + int count, found = 0; + struct mtd_info *mtd; + u32 length = 0; - if(msize) { - msize = (1 << (ffs(msize) - 1))<<20; - if (msize > (1<<30)) { - printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n", msize); + if (msize) { + msize = (1 << (ffs(msize) - 1)) << 20; + if (msize > (1 << 30)) { + printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n", + msize); return -EINVAL; } } - if(asize) { - asize = (1 << (ffs(asize) - 1))<<20; - if (asize > (1<<30) ) { - printk(KERN_NOTICE "pmc551: Invalid aperture size [%d]\n", asize); + if (asize) { + asize = (1 << (ffs(asize) - 1)) << 20; + if (asize > (1 << 30)) { + printk(KERN_NOTICE "pmc551: Invalid aperture size " + "[%d]\n", asize); return -EINVAL; } } - printk(KERN_INFO PMC551_VERSION); + printk(KERN_INFO PMC551_VERSION); - /* - * PCU-bus chipset probe. - */ - for( count = 0; count < MAX_MTD_DEVICES; count++ ) { + /* + * PCU-bus chipset probe. + */ + for (count = 0; count < MAX_MTD_DEVICES; count++) { - if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI, - PCI_DEVICE_ID_V3_SEMI_V370PDC, - PCI_Device ) ) == NULL) { - break; - } + if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI, + PCI_DEVICE_ID_V3_SEMI_V370PDC, + PCI_Device)) == NULL) { + break; + } - printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n", - (unsigned long long)PCI_Device->resource[0].start); + printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n", + (unsigned long long)PCI_Device->resource[0].start); - /* - * The PMC551 device acts VERY weird if you don't init it - * first. i.e. it will not correctly report devsel. If for - * some reason the sdram is in a wrote-protected state the - * device will DEVSEL when it is written to causing problems - * with the oldproc.c driver in - * some kernels (2.2.*) - */ - if((length = fixup_pmc551(PCI_Device)) <= 0) { - printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n"); - break; - } + /* + * The PMC551 device acts VERY weird if you don't init it + * first. i.e. it will not correctly report devsel. If for + * some reason the sdram is in a wrote-protected state the + * device will DEVSEL when it is written to causing problems + * with the oldproc.c driver in + * some kernels (2.2.*) + */ + if ((length = fixup_pmc551(PCI_Device)) <= 0) { + printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n"); + break; + } /* * This is needed until the driver is capable of reading the * onboard I2C SROM to discover the "real" memory size. */ - if(msize) { + if (msize) { length = msize; - printk(KERN_NOTICE "pmc551: Using specified memory size 0x%x\n", length); + printk(KERN_NOTICE "pmc551: Using specified memory " + "size 0x%x\n", length); } else { msize = length; } - mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); - if (!mtd) { - printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n"); - break; - } - - memset(mtd, 0, sizeof(struct mtd_info)); - - priv = kmalloc (sizeof(struct mypriv), GFP_KERNEL); - if (!priv) { - printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n"); - kfree(mtd); - break; - } - memset(priv, 0, sizeof(*priv)); - mtd->priv = priv; - priv->dev = PCI_Device; - - if(asize > length) { - printk(KERN_NOTICE "pmc551: reducing aperture size to fit %dM\n",length>>20); - priv->asize = asize = length; - } else if (asize == 0 || asize == length) { - printk(KERN_NOTICE "pmc551: Using existing aperture size %dM\n", length>>20); - priv->asize = asize = length; - } else { - printk(KERN_NOTICE "pmc551: Using specified aperture size %dM\n", asize>>20); - priv->asize = asize; - } - priv->start = ioremap(((PCI_Device->resource[0].start) - & PCI_BASE_ADDRESS_MEM_MASK), - priv->asize); - - if (!priv->start) { - printk(KERN_NOTICE "pmc551: Unable to map IO space\n"); - kfree(mtd->priv); - kfree(mtd); + mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); + if (!mtd) { + printk(KERN_NOTICE "pmc551: Cannot allocate new MTD " + "device.\n"); break; } + memset(mtd, 0, sizeof(struct mtd_info)); + + priv = kmalloc(sizeof(struct mypriv), GFP_KERNEL); + if (!priv) { + printk(KERN_NOTICE "pmc551: Cannot allocate new MTD " + "device.\n"); + kfree(mtd); + break; + } + memset(priv, 0, sizeof(*priv)); + mtd->priv = priv; + priv->dev = PCI_Device; + + if (asize > length) { + printk(KERN_NOTICE "pmc551: reducing aperture size to " + "fit %dM\n", length >> 20); + priv->asize = asize = length; + } else if (asize == 0 || asize == length) { + printk(KERN_NOTICE "pmc551: Using existing aperture " + "size %dM\n", length >> 20); + priv->asize = asize = length; + } else { + printk(KERN_NOTICE "pmc551: Using specified aperture " + "size %dM\n", asize >> 20); + priv->asize = asize; + } + priv->start = ioremap(((PCI_Device->resource[0].start) + & PCI_BASE_ADDRESS_MEM_MASK), + priv->asize); + + if (!priv->start) { + printk(KERN_NOTICE "pmc551: Unable to map IO space\n"); + kfree(mtd->priv); + kfree(mtd); + break; + } #ifdef CONFIG_MTD_PMC551_DEBUG - printk( KERN_DEBUG "pmc551: setting aperture to %d\n", - ffs(priv->asize>>20)-1); + printk(KERN_DEBUG "pmc551: setting aperture to %d\n", + ffs(priv->asize >> 20) - 1); #endif - priv->base_map0 = ( PMC551_PCI_MEM_MAP_REG_EN - | PMC551_PCI_MEM_MAP_ENABLE - | (ffs(priv->asize>>20)-1)<<4 ); - priv->curr_map0 = priv->base_map0; - pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0, - priv->curr_map0 ); + priv->base_map0 = (PMC551_PCI_MEM_MAP_REG_EN + | PMC551_PCI_MEM_MAP_ENABLE + | (ffs(priv->asize >> 20) - 1) << 4); + priv->curr_map0 = priv->base_map0; + pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0, + priv->curr_map0); #ifdef CONFIG_MTD_PMC551_DEBUG - printk( KERN_DEBUG "pmc551: aperture set to %d\n", - (priv->base_map0 & 0xF0)>>4 ); + printk(KERN_DEBUG "pmc551: aperture set to %d\n", + (priv->base_map0 & 0xF0) >> 4); #endif - mtd->size = msize; - mtd->flags = MTD_CAP_RAM; - mtd->erase = pmc551_erase; - mtd->read = pmc551_read; - mtd->write = pmc551_write; - mtd->point = pmc551_point; - mtd->unpoint = pmc551_unpoint; - mtd->type = MTD_RAM; - mtd->name = "PMC551 RAM board"; - mtd->erasesize = 0x10000; - mtd->writesize = 1; - mtd->owner = THIS_MODULE; + mtd->size = msize; + mtd->flags = MTD_CAP_RAM; + mtd->erase = pmc551_erase; + mtd->read = pmc551_read; + mtd->write = pmc551_write; + mtd->point = pmc551_point; + mtd->unpoint = pmc551_unpoint; + mtd->type = MTD_RAM; + mtd->name = "PMC551 RAM board"; + mtd->erasesize = 0x10000; + mtd->writesize = 1; + mtd->owner = THIS_MODULE; - if (add_mtd_device(mtd)) { - printk(KERN_NOTICE "pmc551: Failed to register new device\n"); + if (add_mtd_device(mtd)) { + printk(KERN_NOTICE "pmc551: Failed to register new " + "device\n"); iounmap(priv->start); - kfree(mtd->priv); - kfree(mtd); - break; - } + kfree(mtd->priv); + kfree(mtd); + break; + } - /* Keep a reference as the add_mtd_device worked */ - pci_dev_get(PCI_Device); + /* Keep a reference as the add_mtd_device worked */ + pci_dev_get(PCI_Device); - printk(KERN_NOTICE "Registered pmc551 memory device.\n"); - printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n", - priv->asize>>20, - priv->start, - priv->start + priv->asize); - printk(KERN_NOTICE "Total memory is %d%c\n", - (length<1024)?length: - (length<1048576)?length>>10:length>>20, - (length<1024)?'B':(length<1048576)?'K':'M'); + printk(KERN_NOTICE "Registered pmc551 memory device.\n"); + printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n", + priv->asize >> 20, + priv->start, priv->start + priv->asize); + printk(KERN_NOTICE "Total memory is %d%c\n", + (length < 1024) ? length : + (length < 1048576) ? length >> 10 : length >> 20, + (length < 1024) ? 'B' : (length < 1048576) ? 'K' : 'M'); priv->nextpmc551 = pmc551list; pmc551list = mtd; found++; - } + } - /* Exited early, reference left over */ - if (PCI_Device) - pci_dev_put(PCI_Device); + /* Exited early, reference left over */ + if (PCI_Device) + pci_dev_put(PCI_Device); - if( !pmc551list ) { - printk(KERN_NOTICE "pmc551: not detected\n"); - return -ENODEV; - } else { + if (!pmc551list) { + printk(KERN_NOTICE "pmc551: not detected\n"); + return -ENODEV; + } else { printk(KERN_NOTICE "pmc551: %d pmc551 devices loaded\n", found); - return 0; + return 0; } } @@ -819,24 +848,24 @@ static int __init init_pmc551(void) */ static void __exit cleanup_pmc551(void) { - int found=0; - struct mtd_info *mtd; + int found = 0; + struct mtd_info *mtd; struct mypriv *priv; - while((mtd=pmc551list)) { + while ((mtd = pmc551list)) { priv = mtd->priv; pmc551list = priv->nextpmc551; - if(priv->start) { - printk (KERN_DEBUG "pmc551: unmapping %dM starting at 0x%p\n", - priv->asize>>20, priv->start); - iounmap (priv->start); + if (priv->start) { + printk(KERN_DEBUG "pmc551: unmapping %dM starting at " + "0x%p\n", priv->asize >> 20, priv->start); + iounmap(priv->start); } pci_dev_put(priv->dev); - kfree (mtd->priv); - del_mtd_device (mtd); - kfree (mtd); + kfree(mtd->priv); + del_mtd_device(mtd); + kfree(mtd); found++; } From 7fefb924d7aed7116fe2a68cdbfc9e36318e7300 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 19 Sep 2006 21:55:18 +0200 Subject: [PATCH 0372/1063] [MTD] pmc551 use kzalloc Use kzalloc instad of kmalloc+memset(0). Signed-off-by: Jiri Slaby Signed-off-by: David Woodhouse --- drivers/mtd/devices/pmc551.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index 4d4023601d22..62a918895df4 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -732,23 +732,20 @@ static int __init init_pmc551(void) msize = length; } - mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); + mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL); if (!mtd) { printk(KERN_NOTICE "pmc551: Cannot allocate new MTD " "device.\n"); break; } - memset(mtd, 0, sizeof(struct mtd_info)); - - priv = kmalloc(sizeof(struct mypriv), GFP_KERNEL); + priv = kzalloc(sizeof(struct mypriv), GFP_KERNEL); if (!priv) { printk(KERN_NOTICE "pmc551: Cannot allocate new MTD " "device.\n"); kfree(mtd); break; } - memset(priv, 0, sizeof(*priv)); mtd->priv = priv; priv->dev = PCI_Device; From 98aacdfde05ccf512d4395eed0d4894eea2d163c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 19 Sep 2006 21:55:28 +0200 Subject: [PATCH 0373/1063] [MTD] pmc551 pci cleanup Use pci_resource_start for getting start of regions and pci_iomap to not doing this directly by using dev->resource... (Thanks to Rolf Eike Beer) Signed-off-by: Jiri Slaby Signed-off-by: David Woodhouse --- drivers/mtd/devices/pmc551.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index 62a918895df4..354e1657cc26 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -568,8 +568,7 @@ static u32 fixup_pmc551(struct pci_dev *dev) size >> 10 : size >> 20, (size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size, ((dcmd & (0x1 << 3)) == 0) ? "non-" : "", - (unsigned long long)((dev->resource[0].start) & - PCI_BASE_ADDRESS_MEM_MASK)); + (unsigned long long)pci_resource_start(dev, 0)); /* * Check to see the state of the memory @@ -705,7 +704,7 @@ static int __init init_pmc551(void) } printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n", - (unsigned long long)PCI_Device->resource[0].start); + (unsigned long long)pci_resource_start(PCI_Device, 0)); /* * The PMC551 device acts VERY weird if you don't init it @@ -762,9 +761,7 @@ static int __init init_pmc551(void) "size %dM\n", asize >> 20); priv->asize = asize; } - priv->start = ioremap(((PCI_Device->resource[0].start) - & PCI_BASE_ADDRESS_MEM_MASK), - priv->asize); + priv->start = pci_iomap(PCI_Device, 0, priv->asize); if (!priv->start) { printk(KERN_NOTICE "pmc551: Unable to map IO space\n"); @@ -805,7 +802,7 @@ static int __init init_pmc551(void) if (add_mtd_device(mtd)) { printk(KERN_NOTICE "pmc551: Failed to register new " "device\n"); - iounmap(priv->start); + pci_iounmap(PCI_Device, priv->start); kfree(mtd->priv); kfree(mtd); break; @@ -856,7 +853,7 @@ static void __exit cleanup_pmc551(void) if (priv->start) { printk(KERN_DEBUG "pmc551: unmapping %dM starting at " "0x%p\n", priv->asize >> 20, priv->start); - iounmap(priv->start); + pci_iounmap(priv->dev, priv->start); } pci_dev_put(priv->dev); From 51197abf29657373bcf9803d87da3c3d8fc3a37e Mon Sep 17 00:00:00 2001 From: Claudio Lanconelli Date: Fri, 22 Sep 2006 11:01:37 +0100 Subject: [PATCH 0374/1063] [MTD] Add SSFDC (SmartMedia) read-only translation layer Signed-off-by: Claudio Lanconelli Signed-off-by: David Woodhouse --- drivers/mtd/Kconfig | 8 + drivers/mtd/Makefile | 1 + drivers/mtd/ssfdc.c | 468 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+) create mode 100644 drivers/mtd/ssfdc.c diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 1344ad7a4b14..717e90448fc6 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -263,6 +263,14 @@ config RFD_FTL http://www.gensw.com/pages/prod/bios/rfd.htm +config SSFDC + bool "NAND SSFDC (SmartMedia) read only translation layer" + depends on MTD + default n + help + This enables read only access to SmartMedia formatted NAND + flash. You can mount it with FAT file system. + source "drivers/mtd/chips/Kconfig" source "drivers/mtd/maps/Kconfig" diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index fc9374407c2b..1e36b9aed98b 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_FTL) += ftl.o mtd_blkdevs.o obj-$(CONFIG_NFTL) += nftl.o mtd_blkdevs.o obj-$(CONFIG_INFTL) += inftl.o mtd_blkdevs.o obj-$(CONFIG_RFD_FTL) += rfd_ftl.o mtd_blkdevs.o +obj-$(CONFIG_SSFDC) += ssfdc.o mtd_blkdevs.o nftl-objs := nftlcore.o nftlmount.o inftl-objs := inftlcore.o inftlmount.o diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c new file mode 100644 index 000000000000..ddbf015f4119 --- /dev/null +++ b/drivers/mtd/ssfdc.c @@ -0,0 +1,468 @@ +/* + * Linux driver for SSFDC Flash Translation Layer (Read only) + * (c) 2005 Eptar srl + * Author: Claudio Lanconelli + * + * Based on NTFL and MTDBLOCK_RO drivers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ssfdcr_record { + struct mtd_blktrans_dev mbd; + int usecount; + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + int cis_block; /* block n. containing CIS/IDI */ + int erase_size; /* phys_block_size */ + unsigned short *logic_block_map; /* all zones (max 8192 phys blocks on + the 128MB) */ + int map_len; /* n. phys_blocks on the card */ +}; + +#define SSFDCR_MAJOR 257 +#define SSFDCR_PARTN_BITS 3 + +#define SECTOR_SIZE 512 +#define SECTOR_SHIFT 9 +#define OOB_SIZE 16 + +#define MAX_LOGIC_BLK_PER_ZONE 1000 +#define MAX_PHYS_BLK_PER_ZONE 1024 + +#define KB(x) ( (x) * 1024L ) +#define MB(x) ( KB(x) * 1024L ) + +/** CHS Table + 1MB 2MB 4MB 8MB 16MB 32MB 64MB 128MB +NCylinder 125 125 250 250 500 500 500 500 +NHead 4 4 4 4 4 8 8 16 +NSector 4 8 8 16 16 16 32 32 +SumSector 2,000 4,000 8,000 16,000 32,000 64,000 128,000 256,000 +SectorSize 512 512 512 512 512 512 512 512 +**/ + +typedef struct { + unsigned long size; + unsigned short cyl; + unsigned char head; + unsigned char sec; +} chs_entry_t; + +/* Must be ordered by size */ +static const chs_entry_t chs_table[] = { + { MB( 1), 125, 4, 4 }, + { MB( 2), 125, 4, 8 }, + { MB( 4), 250, 4, 8 }, + { MB( 8), 250, 4, 16 }, + { MB( 16), 500, 4, 16 }, + { MB( 32), 500, 8, 16 }, + { MB( 64), 500, 8, 32 }, + { MB(128), 500, 16, 32 }, + { 0 }, +}; + +static int get_chs(unsigned long size, unsigned short *cyl, unsigned char *head, + unsigned char *sec) +{ + int k; + int found = 0; + + k = 0; + while (chs_table[k].size > 0 && size > chs_table[k].size) + k++; + + if (chs_table[k].size > 0) { + if (cyl) + *cyl = chs_table[k].cyl; + if (head) + *head = chs_table[k].head; + if (sec) + *sec = chs_table[k].sec; + found = 1; + } + + return found; +} + +/* These bytes are the signature for the CIS/IDI sector */ +static const uint8_t cis_numbers[] = { + 0x01, 0x03, 0xD9, 0x01, 0xFF, 0x18, 0x02, 0xDF, 0x01, 0x20 +}; + +/* Read and check for a valid CIS sector */ +static int get_valid_cis_sector(struct mtd_info *mtd) +{ + int ret, k, cis_sector; + size_t retlen; + loff_t offset; + uint8_t sect_buf[SECTOR_SIZE]; + + /* + * Look for CIS/IDI sector on the first GOOD block (give up after 4 bad + * blocks). If the first good block doesn't contain CIS number the flash + * is not SSFDC formatted + */ + cis_sector = -1; + for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) { + if (!mtd->block_isbad(mtd, offset)) { + ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, + sect_buf); + + /* CIS pattern match on the sector buffer */ + if ( ret < 0 || retlen != SECTOR_SIZE ) { + printk(KERN_WARNING + "SSFDC_RO:can't read CIS/IDI sector\n"); + } else if ( !memcmp(sect_buf, cis_numbers, + sizeof(cis_numbers)) ) { + /* Found */ + cis_sector = (int)(offset >> SECTOR_SHIFT); + } else { + DEBUG(MTD_DEBUG_LEVEL1, + "SSFDC_RO: CIS/IDI sector not found" + " on %s (mtd%d)\n", mtd->name, + mtd->index); + } + break; + } + } + + return cis_sector; +} + +/* Read physical sector (wrapper to MTD_READ) */ +static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf, + int sect_no) +{ + int ret; + size_t retlen; + loff_t offset = (loff_t)sect_no << SECTOR_SHIFT; + + ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf); + if (ret < 0 || retlen != SECTOR_SIZE) + return -1; + + return 0; +} + +/* Read redundancy area (wrapper to MTD_READ_OOB */ +static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf) +{ + struct mtd_oob_ops ops; + int ret; + + ops.mode = MTD_OOB_RAW; + ops.ooboffs = 0; + ops.ooblen = mtd->oobsize; + ops.len = OOB_SIZE; + ops.oobbuf = buf; + ops.datbuf = NULL; + + ret = mtd->read_oob(mtd, offs, &ops); + if (ret < 0 || ops.retlen != OOB_SIZE) + return -1; + + return 0; +} + +/* Parity calculator on a word of n bit size */ +static int get_parity(int number, int size) +{ + int k; + int parity; + + parity = 1; + for (k = 0; k < size; k++) { + parity += (number >> k); + parity &= 1; + } + return parity; +} + +/* Read and validate the logical block address field stored in the OOB */ +static int get_logical_address(uint8_t *oob_buf) +{ + int block_address, parity; + int offset[2] = {6, 11}; /* offset of the 2 address fields within OOB */ + int j; + int ok = 0; + + /* + * Look for the first valid logical address + * Valid address has fixed pattern on most significant bits and + * parity check + */ + for (j = 0; j < ARRAY_SIZE(offset); j++) { + block_address = ((int)oob_buf[offset[j]] << 8) | + oob_buf[offset[j]+1]; + + /* Check for the signature bits in the address field (MSBits) */ + if ((block_address & ~0x7FF) == 0x1000) { + parity = block_address & 0x01; + block_address &= 0x7FF; + block_address >>= 1; + + if (get_parity(block_address, 10) != parity) { + DEBUG(MTD_DEBUG_LEVEL0, + "SSFDC_RO: logical address field%d" + "parity error(0x%04X)\n", j+1, + block_address); + } else { + ok = 1; + break; + } + } + } + + if ( !ok ) + block_address = -2; + + DEBUG(MTD_DEBUG_LEVEL3, "SSFDC_RO: get_logical_address() %d\n", + block_address); + + return block_address; +} + +/* Build the logic block map */ +static int build_logical_block_map(struct ssfdcr_record *ssfdc) +{ + unsigned long offset; + uint8_t oob_buf[OOB_SIZE]; + int ret, block_address, phys_block; + struct mtd_info *mtd = ssfdc->mbd.mtd; + + DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: build_block_map() nblks=%d (%luK)\n", + ssfdc->map_len, (unsigned long)ssfdc->map_len * + ssfdc->erase_size / 1024 ); + + /* Scan every physical block, skip CIS block */ + for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len; + phys_block++) { + offset = (unsigned long)phys_block * ssfdc->erase_size; + if (mtd->block_isbad(mtd, offset)) + continue; /* skip bad blocks */ + + ret = read_raw_oob(mtd, offset, oob_buf); + if (ret < 0) { + DEBUG(MTD_DEBUG_LEVEL0, + "SSFDC_RO: mtd read_oob() failed at %lu\n", + offset); + return -1; + } + block_address = get_logical_address(oob_buf); + + /* Skip invalid addresses */ + if (block_address >= 0 && + block_address < MAX_LOGIC_BLK_PER_ZONE) { + int zone_index; + + zone_index = phys_block / MAX_PHYS_BLK_PER_ZONE; + block_address += zone_index * MAX_LOGIC_BLK_PER_ZONE; + ssfdc->logic_block_map[block_address] = + (unsigned short)phys_block; + + DEBUG(MTD_DEBUG_LEVEL2, + "SSFDC_RO: build_block_map() phys_block=%d," + "logic_block_addr=%d, zone=%d\n", + phys_block, block_address, zone_index); + } + } + return 0; +} + +static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) +{ + struct ssfdcr_record *ssfdc; + int cis_sector; + + /* Check for small page NAND flash */ + if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE) + return; + + /* Check for SSDFC format by reading CIS/IDI sector */ + cis_sector = get_valid_cis_sector(mtd); + if (cis_sector == -1) + return; + + ssfdc = kzalloc(sizeof(struct ssfdcr_record), GFP_KERNEL); + if (!ssfdc) { + printk(KERN_WARNING + "SSFDC_RO: out of memory for data structures\n"); + return; + } + + ssfdc->mbd.mtd = mtd; + ssfdc->mbd.devnum = -1; + ssfdc->mbd.blksize = SECTOR_SIZE; + ssfdc->mbd.tr = tr; + ssfdc->mbd.readonly = 1; + + ssfdc->cis_block = cis_sector / (mtd->erasesize >> SECTOR_SHIFT); + ssfdc->erase_size = mtd->erasesize; + ssfdc->map_len = mtd->size / mtd->erasesize; + + DEBUG(MTD_DEBUG_LEVEL1, + "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n", + ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len, + (ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) / + MAX_PHYS_BLK_PER_ZONE); + + /* Set geometry */ + ssfdc->heads = 16; + ssfdc->sectors = 32; + get_chs( mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors); + ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) / + ((long)ssfdc->sectors * (long)ssfdc->heads)); + + DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n", + ssfdc->cylinders, ssfdc->heads , ssfdc->sectors, + (long)ssfdc->cylinders * (long)ssfdc->heads * + (long)ssfdc->sectors ); + + ssfdc->mbd.size = (long)ssfdc->heads * (long)ssfdc->cylinders * + (long)ssfdc->sectors; + + /* Allocate logical block map */ + ssfdc->logic_block_map = kmalloc( sizeof(ssfdc->logic_block_map[0]) * + ssfdc->map_len, GFP_KERNEL); + if (!ssfdc->logic_block_map) { + printk(KERN_WARNING + "SSFDC_RO: out of memory for data structures\n"); + goto out_err; + } + memset(ssfdc->logic_block_map, 0xff, sizeof(ssfdc->logic_block_map[0]) * + ssfdc->map_len); + + /* Build logical block map */ + if (build_logical_block_map(ssfdc) < 0) + goto out_err; + + /* Register device + partitions */ + if (add_mtd_blktrans_dev(&ssfdc->mbd)) + goto out_err; + + printk(KERN_INFO "SSFDC_RO: Found ssfdc%c on mtd%d (%s)\n", + ssfdc->mbd.devnum + 'a', mtd->index, mtd->name); + return; + +out_err: + kfree(ssfdc->logic_block_map); + kfree(ssfdc); +} + +static void ssfdcr_remove_dev(struct mtd_blktrans_dev *dev) +{ + struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev; + + DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: remove_dev (i=%d)\n", dev->devnum); + + del_mtd_blktrans_dev(dev); + kfree(ssfdc->logic_block_map); + kfree(ssfdc); +} + +static int ssfdcr_readsect(struct mtd_blktrans_dev *dev, + unsigned long logic_sect_no, char *buf) +{ + struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev; + int sectors_per_block, offset, block_address; + + sectors_per_block = ssfdc->erase_size >> SECTOR_SHIFT; + offset = (int)(logic_sect_no % sectors_per_block); + block_address = (int)(logic_sect_no / sectors_per_block); + + DEBUG(MTD_DEBUG_LEVEL3, + "SSFDC_RO: ssfdcr_readsect(%lu) sec_per_blk=%d, ofst=%d," + " block_addr=%d\n", logic_sect_no, sectors_per_block, offset, + block_address); + + if (block_address >= ssfdc->map_len) + BUG(); + + block_address = ssfdc->logic_block_map[block_address]; + + DEBUG(MTD_DEBUG_LEVEL3, + "SSFDC_RO: ssfdcr_readsect() phys_block_addr=%d\n", + block_address); + + if (block_address < 0xffff) { + unsigned long sect_no; + + sect_no = (unsigned long)block_address * sectors_per_block + + offset; + + DEBUG(MTD_DEBUG_LEVEL3, + "SSFDC_RO: ssfdcr_readsect() phys_sect_no=%lu\n", + sect_no); + + if (read_physical_sector( ssfdc->mbd.mtd, buf, sect_no ) < 0) + return -EIO; + } else { + memset(buf, 0xff, SECTOR_SIZE); + } + + return 0; +} + +static int ssfdcr_getgeo(struct mtd_blktrans_dev *dev, struct hd_geometry *geo) +{ + struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev; + + DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: ssfdcr_getgeo() C=%d, H=%d, S=%d\n", + ssfdc->cylinders, ssfdc->heads, ssfdc->sectors); + + geo->heads = ssfdc->heads; + geo->sectors = ssfdc->sectors; + geo->cylinders = ssfdc->cylinders; + + return 0; +} + +/**************************************************************************** + * + * Module stuff + * + ****************************************************************************/ + +static struct mtd_blktrans_ops ssfdcr_tr = { + .name = "ssfdc", + .major = SSFDCR_MAJOR, + .part_bits = SSFDCR_PARTN_BITS, + .getgeo = ssfdcr_getgeo, + .readsect = ssfdcr_readsect, + .add_mtd = ssfdcr_add_mtd, + .remove_dev = ssfdcr_remove_dev, + .owner = THIS_MODULE, +}; + +static int __init init_ssfdcr(void) +{ + printk(KERN_INFO "SSFDC read-only Flash Translation layer\n"); + + return register_mtd_blktrans(&ssfdcr_tr); +} + +static void __exit cleanup_ssfdcr(void) +{ + deregister_mtd_blktrans(&ssfdcr_tr); +} + +module_init(init_ssfdcr); +module_exit(cleanup_ssfdcr); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Claudio Lanconelli "); +MODULE_DESCRIPTION("Flash Translation Layer for read-only SSFDC SmartMedia card"); From e4e3295f0c0e1a09a46522359e24c0569e2e7b47 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 22 Sep 2006 11:05:42 +0100 Subject: [PATCH 0375/1063] Revert "[MTD] blkdev helper code: fix printk format warning" This reverts commit 668040fcd1e06fc3e68a92708cbdfa5a52c37d3c. The 'flags' field of the struct request is 'unsigned long'. Quite how Randy came to see 'long int format, different type arg' I don't know, but it doesn't seem to be the case any more. Signed-off-by: David Woodhouse --- drivers/mtd/mtd_blkdevs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 302bed5f0845..458d3c8ae1ee 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -69,7 +69,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, return 1; default: - printk(KERN_NOTICE "Unknown request %d\n", rq_data_dir(req)); + printk(KERN_NOTICE "Unknown request %ld\n", rq_data_dir(req)); return 0; } } From 734a56285dbeedc6cc10aef6f700eeab7c65ea9f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 22 Sep 2006 18:41:20 +0100 Subject: [PATCH 0376/1063] Remove accidentally-added include/linux/utsrelease.h Signed-off-by: David Woodhouse --- include/linux/utsrelease.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/linux/utsrelease.h diff --git a/include/linux/utsrelease.h b/include/linux/utsrelease.h deleted file mode 100644 index 7fe1de0ad4c1..000000000000 --- a/include/linux/utsrelease.h +++ /dev/null @@ -1 +0,0 @@ -#define UTS_RELEASE "2.6.18" From 51bd39860ff829475aef611a3234309e37e090d9 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:26:30 -0700 Subject: [PATCH 0377/1063] [MLSXFRM]: Granular IPSec associations for use in MLS environments The current approach to labeling Security Associations for SELinux purposes uses a one-to-one mapping between xfrm policy rules and security associations. This doesn't address the needs of real world MLS (Multi-level System, traditional Bell-LaPadula) environments where a single xfrm policy rule (pertaining to a range, classified to secret for example) might need to map to multiple Security Associations (one each for classified, secret, top secret and all the compartments applicable to these security levels). This patch set addresses the above problem by allowing for the mapping of a single xfrm policy rule to multiple security associations, with each association used in the security context it is defined for. It also includes the security context to be used in IKE negotiation in the acquire messages sent to the IKE daemon so that a unique SA can be negotiated for each unique security context. A couple of bug fixes are also included; checks to make sure the SAs used by a packet match policy (security context-wise) on the inbound and also that the bundle used for the outbound matches the security context of the flow. This patch set also makes the use of the SELinux sid in flow cache lookups seemless by including the sid in the flow key itself. Also, open requests as well as connection-oriented child sockets are labeled automatically to be at the same level as the peer to allow for use of appropriately labeled IPSec associations. Description of changes: A "sid" member has been added to the flow cache key resulting in the sid being available at all needed locations and the flow cache lookups automatically using the sid. The flow sid is derived from the socket on the outbound and the SAs (unlabeled where an SA was not used) on the inbound. Outbound case: 1. Find policy for the socket. 2. OLD: Find an SA that matches the policy. NEW: Find an SA that matches BOTH the policy and the flow/socket. This is necessary since not every SA that matches the policy can be used for the flow/socket. Consider policy range Secret-TS, and SAs each for Secret and TS. We don't want a TS socket to use the Secret SA. Hence the additional check for the SA Vs. flow/socket. 3. NEW: When looking thru bundles for a policy, make sure the flow/socket can use the bundle. If a bundle is not found, create one, calling for IKE if necessary. If using IKE, include the security context in the acquire message to the IKE daemon. Inbound case: 1. OLD: Find policy for the socket. NEW: Find policy for the incoming packet based on the sid of the SA(s) it used or the unlabeled sid if no SAs were used. (Consider a case where a socket is "authorized" for two policies (unclassified-confidential, secret-top_secret). If the packet has come in using a secret SA, we really ought to be using the latter policy (secret-top_secret).) 2. OLD: BUG: No check to see if the SAs used by the packet agree with the policy sec_ctx-wise. (It was indicated in selinux_xfrm_sock_rcv_skb() that this was being accomplished by (x->id.spi == tmpl->id.spi || !tmpl->id.spi) in xfrm_state_ok, but it turns out tmpl->id.spi would normally be zero (unless xfrm policy rules specify one at the template level, which they usually don't). NEW: The socket is checked for access to the SAs used (based on the sid of the SAs) in selinux_xfrm_sock_rcv_skb(). Forward case: This would be Step 1 from the Inbound case, followed by Steps 2 and 3 from the Outbound case. Outstanding items/issues: - Timewait acknowledgements and such are generated in the current/upstream implementation using a NULL socket resulting in the any_socket sid (SYSTEM_HIGH) to be used. This problem is not addressed by this patch set. This patch: Add new flask definitions to SELinux Adds a new avperm "polmatch" to arbitrate flow/state access to a xfrm policy rule. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- security/selinux/include/av_perm_to_string.h | 1 + security/selinux/include/av_permissions.h | 1 + 2 files changed, 2 insertions(+) diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h index 7c9b58380833..09fc8a2345eb 100644 --- a/security/selinux/include/av_perm_to_string.h +++ b/security/selinux/include/av_perm_to_string.h @@ -241,6 +241,7 @@ S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto") S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom") S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext") + S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch") S_(SECCLASS_PACKET, PACKET__SEND, "send") S_(SECCLASS_PACKET, PACKET__RECV, "recv") S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h index 69fd4b48202c..81f4f526c8b1 100644 --- a/security/selinux/include/av_permissions.h +++ b/security/selinux/include/av_permissions.h @@ -911,6 +911,7 @@ #define ASSOCIATION__SENDTO 0x00000001UL #define ASSOCIATION__RECVFROM 0x00000002UL #define ASSOCIATION__SETCONTEXT 0x00000004UL +#define ASSOCIATION__POLMATCH 0x00000008UL #define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL 0x00000001UL #define NETLINK_KOBJECT_UEVENT_SOCKET__READ 0x00000002UL From 08554d6b33e60aa8ee40bbef94505941c0eefef2 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:27:16 -0700 Subject: [PATCH 0378/1063] [MLSXFRM]: Define new SELinux service routine This defines a routine that combines the Type Enforcement portion of one sid with the MLS portion from the other sid to arrive at a new sid. This would be used to define a sid for a security association that is to be negotiated by IKE as well as for determing the sid for open requests and connection-oriented child sockets. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- security/selinux/include/security.h | 2 + security/selinux/ss/mls.c | 20 --------- security/selinux/ss/mls.h | 20 +++++++++ security/selinux/ss/services.c | 69 +++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 20 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 063af47bb231..911954a692fa 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -78,6 +78,8 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen, int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); +int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); + #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 7bc5b6440f70..e15f7e0399b8 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -211,26 +211,6 @@ int mls_context_isvalid(struct policydb *p, struct context *c) return 1; } -/* - * Copies the MLS range from `src' into `dst'. - */ -static inline int mls_copy_context(struct context *dst, - struct context *src) -{ - int l, rc = 0; - - /* Copy the MLS range from the source context */ - for (l = 0; l < 2; l++) { - dst->range.level[l].sens = src->range.level[l].sens; - rc = ebitmap_cpy(&dst->range.level[l].cat, - &src->range.level[l].cat); - if (rc) - break; - } - - return rc; -} - /* * Set the MLS fields in the security context structure * `context' based on the string representation in diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index fbb42f07dd7c..90c5e88987fa 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -17,6 +17,26 @@ #include "context.h" #include "policydb.h" +/* + * Copies the MLS range from `src' into `dst'. + */ +static inline int mls_copy_context(struct context *dst, + struct context *src) +{ + int l, rc = 0; + + /* Copy the MLS range from the source context */ + for (l = 0; l < 2; l++) { + dst->range.level[l].sens = src->range.level[l].sens; + rc = ebitmap_cpy(&dst->range.level[l].cat, + &src->range.level[l].cat); + if (rc) + break; + } + + return rc; +} + int mls_compute_context_len(struct context *context); void mls_sid_to_context(struct context *context, char **scontext); int mls_context_isvalid(struct policydb *p, struct context *c); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 85e429884393..b00ec69f0ffd 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1817,6 +1817,75 @@ out: return rc; } +/* + * security_sid_mls_copy() - computes a new sid based on the given + * sid and the mls portion of mls_sid. + */ +int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) +{ + struct context *context1; + struct context *context2; + struct context newcon; + char *s; + u32 len; + int rc = 0; + + if (!ss_initialized) { + *new_sid = sid; + goto out; + } + + context_init(&newcon); + + POLICY_RDLOCK; + context1 = sidtab_search(&sidtab, sid); + if (!context1) { + printk(KERN_ERR "security_sid_mls_copy: unrecognized SID " + "%d\n", sid); + rc = -EINVAL; + goto out_unlock; + } + + context2 = sidtab_search(&sidtab, mls_sid); + if (!context2) { + printk(KERN_ERR "security_sid_mls_copy: unrecognized SID " + "%d\n", mls_sid); + rc = -EINVAL; + goto out_unlock; + } + + newcon.user = context1->user; + newcon.role = context1->role; + newcon.type = context1->type; + rc = mls_copy_context(&newcon, context2); + if (rc) + goto out_unlock; + + + /* Check the validity of the new context. */ + if (!policydb_context_isvalid(&policydb, &newcon)) { + rc = convert_context_handle_invalid_context(&newcon); + if (rc) + goto bad; + } + + rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid); + goto out_unlock; + +bad: + if (!context_struct_to_string(&newcon, &s, &len)) { + audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, + "security_sid_mls_copy: invalid context %s", s); + kfree(s); + } + +out_unlock: + POLICY_RDUNLOCK; + context_destroy(&newcon); +out: + return rc; +} + struct selinux_audit_rule { u32 au_seqno; struct context au_ctxt; From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:08:56 -0700 Subject: [PATCH 0379/1063] [MLSXFRM]: Add security sid to sock This adds security for IP sockets at the sock level. Security at the sock level is needed to enforce the SELinux security policy for security associations even when a sock is orphaned (such as in the TCP LAST_ACK state). This will also be used to enforce SELinux controls over data arriving at or leaving a child socket while it's still waiting to be accepted. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 12 ++++++++++ include/net/sock.h | 13 +++++++++++ net/core/sock.c | 2 +- security/dummy.c | 5 ++++ security/selinux/hooks.c | 38 +++++++++++++++++-------------- security/selinux/include/objsec.h | 1 + 6 files changed, 53 insertions(+), 18 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 6bc2aad494ff..4d7fb59996b0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -812,6 +812,8 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. * @sk_getsid: * Retrieve the LSM-specific sid for the sock to enable caching of network * authorizations. @@ -1332,6 +1334,7 @@ struct security_operations { int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ @@ -2885,6 +2888,11 @@ static inline void security_sk_free(struct sock *sk) return security_ops->sk_free_security(sk); } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ + return security_ops->sk_clone_security(sk, newsk); +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return security_ops->sk_getsid(sk, fl, dir); @@ -3011,6 +3019,10 @@ static inline void security_sk_free(struct sock *sk) { } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea233d6..91cdceb3c028 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -972,6 +972,19 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } +static inline void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 51fcfbc041a7..b67d868649cd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -911,7 +911,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) if (newsk != NULL) { struct sk_filter *filter; - memcpy(newsk, sk, sk->sk_prot->obj_size); + sock_copy(newsk, sk); /* SANITY */ sk_node_init(&newsk->sk_node); diff --git a/security/dummy.c b/security/dummy.c index 58c6d399c844..bd3bc5faa9a8 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -805,6 +805,10 @@ static inline void dummy_sk_free_security (struct sock *sk) { } +static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk) +{ +} + static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; @@ -1060,6 +1064,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, socket_getpeersec_dgram); set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); + set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5d1b8c733199..d67abf77584a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -269,15 +269,13 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) { struct sk_security_struct *ssec; - if (family != PF_UNIX) - return 0; - ssec = kzalloc(sizeof(*ssec), priority); if (!ssec) return -ENOMEM; ssec->sk = sk; ssec->peer_sid = SECINITSID_UNLABELED; + ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; return 0; @@ -287,9 +285,6 @@ static void sk_free_security(struct sock *sk) { struct sk_security_struct *ssec = sk->sk_security; - if (sk->sk_family != PF_UNIX) - return; - sk->sk_security = NULL; kfree(ssec); } @@ -3068,6 +3063,7 @@ static void selinux_socket_post_create(struct socket *sock, int family, { struct inode_security_struct *isec; struct task_security_struct *tsec; + struct sk_security_struct *sksec; u32 newsid; isec = SOCK_INODE(sock)->i_security; @@ -3078,6 +3074,11 @@ static void selinux_socket_post_create(struct socket *sock, int family, isec->sid = kern ? SECINITSID_KERNEL : newsid; isec->initialized = 1; + if (sock->sk) { + sksec = sock->sk->sk_security; + sksec->sid = isec->sid; + } + return; } @@ -3551,22 +3552,24 @@ static void selinux_sk_free_security(struct sock *sk) sk_free_security(sk); } +static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) +{ + struct sk_security_struct *ssec = sk->sk_security; + struct sk_security_struct *newssec = newsk->sk_security; + + newssec->sid = ssec->sid; + newssec->peer_sid = ssec->peer_sid; +} + static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) { - struct inode_security_struct *isec; - u32 sock_sid = SECINITSID_ANY_SOCKET; - if (!sk) return selinux_no_sk_sid(fl); + else { + struct sk_security_struct *sksec = sk->sk_security; - read_lock_bh(&sk->sk_callback_lock); - isec = get_sock_isec(sk); - - if (isec) - sock_sid = isec->sid; - - read_unlock_bh(&sk->sk_callback_lock); - return sock_sid; + return sksec->sid; + } } static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) @@ -4618,6 +4621,7 @@ static struct security_operations selinux_ops = { .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, + .sk_clone_security = selinux_sk_clone_security, .sk_getsid = selinux_sk_getsid_security, #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 940178865fc7..79b9e0af19a0 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -99,6 +99,7 @@ struct netif_security_struct { struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ + u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ }; From b6340fcd761acf9249b3acbc95c4dc555d9beb07 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:28:37 -0700 Subject: [PATCH 0380/1063] [MLSXFRM]: Add security sid to flowi This adds security to flow key for labeling of flows as also to allow for making flow cache lookups based on the security label seemless. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- Documentation/networking/secid.txt | 14 ++++++++++++++ include/net/flow.h | 1 + 2 files changed, 15 insertions(+) create mode 100644 Documentation/networking/secid.txt diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt new file mode 100644 index 000000000000..95ea06784333 --- /dev/null +++ b/Documentation/networking/secid.txt @@ -0,0 +1,14 @@ +flowi structure: + +The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate +the label of the flow. This label of the flow is currently used in selecting +matching labeled xfrm(s). + +If this is an outbound flow, the label is derived from the socket, if any, or +the incoming packet this flow is being generated as a response to (e.g. tcp +resets, timewait ack, etc.). It is also conceivable that the label could be +derived from other sources such as process context, device, etc., in special +cases, as may be appropriate. + +If this is an inbound flow, the label is derived from the IPSec security +associations, if any, used by the packet. diff --git a/include/net/flow.h b/include/net/flow.h index 04d89f763451..1cee5a83433a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -78,6 +78,7 @@ struct flowi { #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi + __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); #define FLOW_DIR_IN 0 From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [PATCH 0381/1063] [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 106 ++++++++++++++--- include/net/flow.h | 4 +- net/core/flow.c | 7 +- net/xfrm/xfrm_policy.c | 28 ++--- net/xfrm/xfrm_state.c | 12 +- security/dummy.c | 23 +++- security/selinux/hooks.c | 7 +- security/selinux/include/xfrm.h | 23 +++- security/selinux/xfrm.c | 199 ++++++++++++++++++++++++++------ 9 files changed, 329 insertions(+), 80 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 4d7fb59996b0..2c4921d79d19 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct ctl_table; @@ -825,9 +826,8 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field. - * The security field is initialized to NULL when the xfrm_policy is - * allocated. + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -846,9 +846,14 @@ struct swap_info_struct; * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field. The - * security field is initialized to NULL when the xfrm_state is - * allocated. + * @polsec contains the security context information associated with a xfrm + * policy rule from which to take the base context. polsec must be NULL + * when sec_ctx is specified. + * @secid contains the secid from which to take the mls portion of the context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to either sec_ctx or polsec, with the mls portion + * taken from secid in the latter case. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. @@ -859,13 +864,26 @@ struct swap_info_struct; * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. - * @sk_sid contains the sock security label that is used to authorize + * @fl_secid contains the flow security label that is used to authorize * access to the policy xp. * @dir contains the direction of the flow (input or output). - * Check permission when a sock selects a xfrm_policy for processing + * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. * Return 0 if permission is granted. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_flow_state_match: + * @fl contains the flow key to match. + * @xfrm points to the xfrm_state to match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @fl points to the flow key to set. + * Return 0 if successful decoding. * * Security hooks affecting all Key Management operations * @@ -1343,10 +1361,16 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); + int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -3050,9 +3074,18 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return security_ops->xfrm_policy_delete_security(xp); } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx); + return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0); +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) +{ + if (!polsec) + return 0; + return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid); } static inline int security_xfrm_state_delete(struct xfrm_state *x) @@ -3065,9 +3098,25 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { - return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); + return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); +} + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return security_ops->xfrm_state_pol_flow_match(x, xp, fl); +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return security_ops->xfrm_flow_state_match(fl, xfrm); +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return security_ops->xfrm_decode_session(skb, fl); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3089,7 +3138,14 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return 0; } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { return 0; } @@ -3103,10 +3159,28 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { return 0; } + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, + struct xfrm_state *xfrm) +{ + return 1; +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/flow.h b/include/net/flow.h index 1cee5a83433a..21d988b2058a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -86,10 +86,10 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index 2191af5f26ac..645241165e6c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -32,7 +32,6 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; - u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && - fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; - fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -226,7 +223,7 @@ nocache: void *obj; atomic_t *obj_ref; - resolver(key, sk_sid, family, dir, &obj, &obj_ref); + resolver(key, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3da67ca2c3ce..79405daadc52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; @@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, match = xfrm_selector_match(sel, fl, family); if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { xfrm_pol_hold(pol); break; } @@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); if (match && !err) xfrm_pol_hold(pol); @@ -862,19 +862,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); + + fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, + policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); } @@ -1032,13 +1033,15 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); + err = security_xfrm_decode_session(skb, fl); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } EXPORT_SYMBOL(xfrm_decode_session); @@ -1058,14 +1061,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_policy *pol; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); - sk_sid = security_sk_sid(sk, &fl, fl_dir); - /* First, check used SA against their selectors. */ if (skb->sp) { int i; @@ -1079,10 +1079,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, + pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); if (!pol) @@ -1298,6 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) + return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad5db43..be02bd981d12 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); diff --git a/security/dummy.c b/security/dummy.c index bd3bc5faa9a8..c1f10654871e 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -835,7 +835,8 @@ static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp) return 0; } -static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid) { return 0; } @@ -853,6 +854,23 @@ static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; } + +static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return 1; +} + +static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ static int dummy_register_security (const char *name, struct security_operations *ops) { @@ -1076,6 +1094,9 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, xfrm_state_free_security); set_to_dummy_if_null(ops, xfrm_state_delete_security); set_to_dummy_if_null(ops, xfrm_policy_lookup); + set_to_dummy_if_null(ops, xfrm_state_pol_flow_match); + set_to_dummy_if_null(ops, xfrm_flow_state_match); + set_to_dummy_if_null(ops, xfrm_decode_session); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS set_to_dummy_if_null(ops, key_alloc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d67abf77584a..5c189da07bc9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3468,7 +3468,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb); + err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); out: return err; } @@ -3720,7 +3720,7 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb); + err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4633,6 +4633,9 @@ static struct security_operations selinux_ops = { .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, + .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, + .xfrm_flow_state_match = selinux_xfrm_flow_state_match, + .xfrm_decode_session = selinux_xfrm_decode_session, #endif #ifdef CONFIG_KEYS diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index c96498a10eb8..f51a3e84bd9b 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -2,6 +2,7 @@ * SELinux support for the XFRM LSM hooks * * Author : Trent Jaeger, + * Updated : Venkat Yekkirala, */ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ @@ -10,10 +11,16 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir); +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); + /* * Extract the security blob from the sock (it's actually on the socket) @@ -39,17 +46,21 @@ static inline u32 selinux_no_sk_sid(struct flowi *fl) } #ifdef CONFIG_SECURITY_NETWORK_XFRM -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb); +int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, + struct avc_audit_data *ad); +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad); u32 selinux_socket_getpeer_stream(struct sock *sk); u32 selinux_socket_getpeer_dgram(struct sk_buff *skb); #else -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 6c985ced8102..a502b0540e3d 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -6,7 +6,12 @@ * Authors: Serge Hallyn * Trent Jaeger * + * Updated: Venkat Yekkirala + * + * Granular IPSec Associations for use in MLS environments. + * * Copyright (C) 2005 International Business Machines Corporation + * Copyright (C) 2006 Trusted Computer Solutions, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -67,10 +72,10 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a socket can be used - * with the corresponding xfrm_sec_ctx and direction. + * LSM hook implementation that authorizes that a flow can use + * a xfrm policy rule. */ -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; @@ -84,27 +89,129 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) sel_sid = ctx->ctx_sid; } - rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION, - ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM : - ((dir == FLOW_DIR_OUT) ? ASSOCIATION__SENDTO : - (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))), + rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, NULL); return rc; } +/* + * LSM hook implementation that authorizes that a state matches + * the given policy, flow combo. + */ + +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, + struct flowi *fl) +{ + u32 state_sid; + u32 pol_sid; + int err; + + if (x->security) + state_sid = x->security->ctx_sid; + else + state_sid = SECINITSID_UNLABELED; + + if (xp->security) + pol_sid = xp->security->ctx_sid; + else + pol_sid = SECINITSID_UNLABELED; + + err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, + NULL); + + if (err) + return 0; + + return selinux_xfrm_flow_state_match(fl, x); +} + +/* + * LSM hook implementation that authorizes that a particular outgoing flow + * can use a given security association. + */ + +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + int rc = 0; + u32 sel_sid = SECINITSID_UNLABELED; + struct xfrm_sec_ctx *ctx; + + /* Context sid is either set to label or ANY_ASSOC */ + if ((ctx = xfrm->security)) { + if (!selinux_authorizable_ctx(ctx)) + return 0; + + sel_sid = ctx->ctx_sid; + } + + rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__SENDTO, + NULL)? 0:1; + + return rc; +} + +/* + * LSM hook implementation that determines the sid for the session. + */ + +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct sec_path *sp; + + fl->secid = SECSID_NULL; + + if (skb == NULL) + return 0; + + sp = skb->sp; + if (sp) { + int i, sid_set = 0; + + for (i = sp->len-1; i >= 0; i--) { + struct xfrm_state *x = sp->xvec[i]; + if (selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + + if (!sid_set) { + fl->secid = ctx->ctx_sid; + sid_set = 1; + } + else if (fl->secid != ctx->ctx_sid) + return -EINVAL; + } + } + } + + return 0; +} + /* * Security blob allocation for xfrm_policy and xfrm_state * CTX does not have a meaningful value on input */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) +static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid) { int rc = 0; struct task_security_struct *tsec = current->security; - struct xfrm_sec_ctx *ctx; + struct xfrm_sec_ctx *ctx = NULL; + char *ctx_str = NULL; + u32 str_len; + u32 ctx_sid; + + BUG_ON(uctx && pol); + + if (pol) + goto from_policy; BUG_ON(!uctx); - BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX); + + if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) + return -EINVAL; if (uctx->ctx_len >= PAGE_SIZE) return -ENOMEM; @@ -141,9 +248,41 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us return rc; +from_policy: + BUG_ON(!pol); + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + + rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); + if (rc) + goto out; + + *ctxp = ctx = kmalloc(sizeof(*ctx) + + str_len, + GFP_ATOMIC); + + if (!ctx) { + rc = -ENOMEM; + goto out; + } + + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_sid = ctx_sid; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, + ctx_str, + str_len); + + goto out2; + out: *ctxp = NULL; kfree(ctx); +out2: + kfree(ctx_str); return rc; } @@ -157,7 +296,7 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * BUG_ON(!xp); - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); return err; } @@ -217,13 +356,14 @@ int selinux_xfrm_policy_delete(struct xfrm_policy *xp) * LSM hook implementation that allocs and transfers sec_ctx spec to * xfrm_state. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, + struct xfrm_sec_ctx *pol, u32 secid) { int err; BUG_ON(!x); - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid); return err; } @@ -329,38 +469,30 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { int i, rc = 0; struct sec_path *sp; + u32 sel_sid = SECINITSID_UNLABELED; sp = skb->sp; if (sp) { - /* - * __xfrm_policy_check does not approve unless xfrm_policy_ok - * says that spi's match for policy and the socket. - * - * Only need to verify the existence of an authorizable sp. - */ for (i = 0; i < sp->len; i++) { struct xfrm_state *x = sp->xvec[i]; - if (x && selinux_authorizable_xfrm(x)) - goto accept; + if (x && selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + sel_sid = ctx->ctx_sid; + break; + } } } - /* check SELinux sock for unlabelled access */ - rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, NULL); - if (rc) - goto drop; + rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__RECVFROM, ad); -accept: - return 0; - -drop: return rc; } @@ -371,7 +503,8 @@ drop: * If we do have a authorizable security association, then it has already been * checked in xfrm_policy_lookup hook. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { struct dst_entry *dst; int rc = 0; @@ -391,7 +524,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) } rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, NULL); + ASSOCIATION__SENDTO, ad); out: return rc; } From 0d681623d30c6565e8b62889f3aa3f4d4662c3e8 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Mon, 24 Jul 2006 23:30:44 -0700 Subject: [PATCH 0382/1063] [MLSXFRM]: Add security context to acquire messages using netlink This includes the security context of a security association created for use by IKE in the acquire messages sent to IKE daemons using netlink/xfrm_user. This would allow the daemons to include the security context in the negotiation, so that the resultant association is unique to that security context. Signed-off-by: Serge Hallyn Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fa79ddc4239e..dac8db1088bc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -911,27 +911,40 @@ rtattr_failure: return -1; } -static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) { - if (xp->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - xp->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; + struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = xp->security->ctx_doi; - uctx->ctx_alg = xp->security->ctx_alg; - uctx->ctx_len = xp->security->ctx_len; - memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); - } - return 0; + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); + return 0; rtattr_failure: return -1; } +static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) +{ + if (x->security) { + return copy_sec_ctx(x->security, skb); + } + return 0; +} + +static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +{ + if (xp->security) { + return copy_sec_ctx(xp->security, skb); + } + return 0; +} + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -1710,7 +1723,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) + if (copy_to_user_state_sec_ctx(x, skb)) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; From 4e2ba18eae7f370c7c3ed96eaca747cc9b39f917 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:31:14 -0700 Subject: [PATCH 0383/1063] [MLSXFRM]: Add security context to acquire messages using PF_KEY This includes the security context of a security association created for use by IKE in the acquire messages sent to IKE daemons using PF_KEY. This would allow the daemons to include the security context in the negotiation, so that the resultant association is unique to that security context. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- net/key/af_key.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 3a95b2ee4690..a065e1a67773 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2708,6 +2708,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct #endif int sockaddr_size; int size; + struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *xfrm_ctx; + int ctx_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -2723,6 +2726,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) size += count_esp_combs(t); + if ((xfrm_ctx = x->security)) { + ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); + size += sizeof(struct sadb_x_sec_ctx) + ctx_size; + } + skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2818,6 +2826,20 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) dump_esp_combs(skb, t); + /* security context */ + if (xfrm_ctx) { + sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, + sizeof(struct sadb_x_sec_ctx) + ctx_size); + sec_ctx->sadb_x_sec_len = + (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); + sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; + sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; + sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; + sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; + memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, + xfrm_ctx->ctx_len); + } + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:12:42 -0700 Subject: [PATCH 0384/1063] [MLSXFRM]: Add flow labeling This labels the flows that could utilize IPSec xfrms at the points the flows are defined so that IPSec policy and SAs at the right label can be used. The following protos are currently not handled, but they should continue to be able to use single-labeled IPSec like they currently do. ipmr ip_gre ipip igmp sit sctp ip6_tunnel (IPv6 over IPv6 tunnel device) decnet Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 38 +++++++++++++++++++++----------- include/net/route.h | 3 +++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 6 +++++ net/ipv4/af_inet.c | 1 + net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 2 ++ net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/syncookies.c | 1 + net/ipv4/udp.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/datagram.c | 2 ++ net/ipv6/icmp.c | 2 ++ net/ipv6/inet6_connection_sock.c | 1 + net/ipv6/ndisc.c | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/tcp_ipv6.c | 7 ++++++ net/ipv6/udp.c | 2 ++ net/xfrm/xfrm_policy.c | 3 +-- security/dummy.c | 7 +++--- security/selinux/hooks.c | 8 +++---- security/selinux/include/xfrm.h | 14 +----------- security/selinux/xfrm.c | 11 +++++---- 26 files changed, 79 insertions(+), 40 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 2c4921d79d19..f3909d189fe0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include #include #include +#include struct ctl_table; @@ -815,8 +816,8 @@ struct swap_info_struct; * Deallocate security structure. * @sk_clone_security: * Clone/copy security structure. - * @sk_getsid: - * Retrieve the LSM-specific sid for the sock to enable caching of network + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. * * Security hooks for XFRM operations. @@ -882,8 +883,9 @@ struct swap_info_struct; * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. - * @fl points to the flow key to set. - * Return 0 if successful decoding. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. * * Security hooks affecting all Key Management operations * @@ -1353,7 +1355,7 @@ struct security_operations { int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); + void (*sk_getsecid) (struct sock *sk, u32 *secid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1370,7 +1372,7 @@ struct security_operations { int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); - int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) return security_ops->sk_clone_security(sk, newsk); } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return security_ops->sk_getsid(sk, fl, dir); + security_ops->sk_getsecid(sk, &fl->secid); } #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, @@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s return security_ops->xfrm_flow_state_match(fl, xfrm); } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { - return security_ops->xfrm_decode_session(skb, fl); + return security_ops->xfrm_decode_session(skb, secid, 1); +} + +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +{ + int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); + + BUG_ON(rc); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, return 1; } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { return 0; } +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +{ +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/route.h b/include/net/route.h index c4a068692dcc..7f93ac0e0899 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -32,6 +32,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -166,6 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, ip_rt_put(*rp); *rp = NULL; } + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } @@ -182,6 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.proto = protocol; ip_rt_put(*rp); *rp = NULL; + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7f56f7e8f571..386498053b1c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -678,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, } }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 610c722ac27f..53d255c01431 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { @@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -842,6 +847,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c84a32070f8d..fc40da3b6d39 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1074,6 +1074,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4c86ac3d882d..6ad797c14163 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e50a1bfd7ccc..772b4eac78bc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2ede167e045..308bdeac3455 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -1366,6 +1367,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 269bc2067cb8..7f905bf2bde5 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; + security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 62b2762a2420..fe44cb50a1c5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -484,6 +484,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f67..307dc3c0d635 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -259,6 +259,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f136cec96d95..a4d005eccc7f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ac85e9c532c2..82a1b1a328db 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 3b55b4c8e2d1..c73508e090a6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -156,6 +156,8 @@ ipv4_connected: if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); + if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 356a8a7ef22a..dbfce089e916 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bf491077b822..7a51a258615d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b50055b9278d..67cfc3813c32 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -419,6 +419,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; + security_sk_classify_flow(ndisc_socket->sk, fl); } static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8629ba195d2d..c4eba1aeb323 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb) ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; + security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 15b862d8acab..d5040e172292 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 802a1a6b1037..46922e57e311 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = &final; } + security_sk_classify_flow(sk, &fl); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; @@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; @@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { @@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { @@ -923,6 +929,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d54f246411e..82c7c9cde2a8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,6 +782,8 @@ do_udp_sendmsg: connected = 0; } + security_sk_classify_flow(sk, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 79405daadc52..32c963c90573 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -863,7 +863,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; @@ -1039,7 +1038,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); - err = security_xfrm_decode_session(skb, fl); + err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); return err; } diff --git a/security/dummy.c b/security/dummy.c index c1f10654871e..c0ff6b9bfd7d 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -809,9 +809,8 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * { } -static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -866,7 +865,7 @@ static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm return 1; } -static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall) { return 0; } @@ -1083,7 +1082,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); - set_to_dummy_if_null(ops, sk_getsid); + set_to_dummy_if_null(ops, sk_getsecid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5c189da07bc9..4e5989d584ce 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3561,14 +3561,14 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) newssec->peer_sid = ssec->peer_sid; } -static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +static void selinux_sk_getsecid(struct sock *sk, u32 *secid) { if (!sk) - return selinux_no_sk_sid(fl); + *secid = SECINITSID_ANY_SOCKET; else { struct sk_security_struct *sksec = sk->sk_security; - return sksec->sid; + *secid = sksec->sid; } } @@ -4622,7 +4622,7 @@ static struct security_operations selinux_ops = { .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, - .sk_getsid = selinux_sk_getsid_security, + .sk_getsecid = selinux_sk_getsecid, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index f51a3e84bd9b..8e45c1d588a8 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -19,7 +19,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall); /* @@ -33,18 +33,6 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk) return SOCK_INODE(sk->sk_socket)->i_security; } - -static inline u32 selinux_no_sk_sid(struct flowi *fl) -{ - /* NOTE: no sock occurs on ICMP reply, forwards, ... */ - /* icmp_reply: authorize as kernel packet */ - if (fl && fl->proto == IPPROTO_ICMP) { - return SECINITSID_KERNEL; - } - - return SECINITSID_ANY_SOCKET; -} - #ifdef CONFIG_SECURITY_NETWORK_XFRM int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, struct avc_audit_data *ad); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index a502b0540e3d..c750ef7af66f 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -158,11 +158,11 @@ int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) * LSM hook implementation that determines the sid for the session. */ -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) { struct sec_path *sp; - fl->secid = SECSID_NULL; + *sid = SECSID_NULL; if (skb == NULL) return 0; @@ -177,10 +177,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) struct xfrm_sec_ctx *ctx = x->security; if (!sid_set) { - fl->secid = ctx->ctx_sid; + *sid = ctx->ctx_sid; sid_set = 1; + + if (!ckall) + break; } - else if (fl->secid != ctx->ctx_sid) + else if (*sid != ctx->ctx_sid) return -EINVAL; } } From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [PATCH 0385/1063] [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 19 ++++++++++++++++--- include/net/xfrm.h | 2 +- net/key/af_key.c | 15 +++++++++++---- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 13 +++++++++++-- security/dummy.c | 3 ++- security/selinux/include/xfrm.h | 3 ++- security/selinux/xfrm.c | 33 ++++++++++++++++++++++----------- 8 files changed, 66 insertions(+), 24 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index f3909d189fe0..8e3dc6c51a6d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -827,8 +827,10 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @sk refers to the sock from which to derive the security context. * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. + * field is initialized to NULL when the xfrm_policy is allocated. Only + * one of sec_ctx or sock can be specified. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -1359,7 +1361,8 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); @@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); +} + +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3ecd9fa1ed4b..00bf86e6e82b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,7 +362,7 @@ struct xfrm_mgr char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); - struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); + struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; diff --git a/net/key/af_key.c b/net/key/af_key.c index a065e1a67773..797c744a8438 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2843,14 +2843,14 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, +static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2891,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = family; + xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2907,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) + sec_ctx->sadb_x_sec_len) { + *dir = -EINVAL; goto out; + } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2918,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index be02bd981d12..1c796087ee78 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac8db1088bc..f70e158874d2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1757,7 +1757,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1765,7 +1765,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1807,6 +1807,15 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, copy_from_user_policy(xp, p); copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; diff --git a/security/dummy.c b/security/dummy.c index c0ff6b9bfd7d..66cc06404930 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -815,7 +815,8 @@ static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk) { return 0; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 8e45c1d588a8..1822c73e5085 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -7,7 +7,8 @@ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c750ef7af66f..d3690f985135 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -208,10 +208,8 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, BUG_ON(uctx && pol); - if (pol) - goto from_policy; - - BUG_ON(!uctx); + if (!uctx) + goto not_from_user; if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) return -EINVAL; @@ -251,11 +249,14 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, return rc; -from_policy: - BUG_ON(!pol); - rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); - if (rc) - goto out; +not_from_user: + if (pol) { + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + } + else + ctx_sid = sid; rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); if (rc) @@ -293,13 +294,23 @@ out2: * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *uctx, struct sock *sk) { int err; + u32 sid; BUG_ON(!xp); + BUG_ON(uctx && sk); - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); + if (sk) { + struct sk_security_struct *ssec = sk->sk_security; + sid = ssec->sid; + } + else + sid = SECSID_NULL; + + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid); return err; } From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:50 -0700 Subject: [PATCH 0386/1063] [MLSXFRM]: Auto-labeling of child sockets This automatically labels the TCP, Unix stream, and dccp child sockets as well as openreqs to be at the same MLS level as the peer. This will result in the selection of appropriately labeled IPSec Security Associations. This also uses the sock's sid (as opposed to the isec sid) in SELinux enforcement of secmark in rcv_skb and postroute_last hooks. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 55 +++++++++++++ include/net/request_sock.h | 1 + include/net/sock.h | 1 + net/dccp/ipv4.c | 3 + net/dccp/ipv6.c | 7 +- net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ipv4.c | 3 + net/ipv6/tcp_ipv6.c | 6 +- security/dummy.c | 24 ++++++ security/selinux/hooks.c | 137 ++++++++++++++++++++++---------- security/selinux/xfrm.c | 1 - 12 files changed, 197 insertions(+), 51 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 8e3dc6c51a6d..bb4c80fdfe7a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; +struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -819,6 +820,14 @@ struct swap_info_struct; * @sk_getsecid: * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -1358,6 +1367,11 @@ struct security_operations { void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock* sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); + void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { security_ops->sk_getsecid(sk, &fl->secid); } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + security_ops->req_classify_flow(req, fl); +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ + security_ops->sock_graft(sk, parent); +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return security_ops->inet_conn_request(sk, skb, req); +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ + security_ops->inet_csk_clone(newsk, req); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c5d7f920c352..8e165ca16bd8 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -53,6 +53,7 @@ struct request_sock { unsigned long expires; struct request_sock_ops *rsk_ops; struct sock *sk; + u32 secid; }; static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) diff --git a/include/net/sock.h b/include/net/sock.h index 91cdceb3c028..337ebec84c70 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -969,6 +969,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; + security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 386498053b1c..171d363876ee 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 53d255c01431..231bc7c7e749 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -424,7 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -626,7 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -709,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 772b4eac78bc..07204391d083 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,7 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 307dc3c0d635..661e0a4bca72 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,7 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a9..43f6740244f8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 46922e57e311..302786a11cd6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -470,7 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -826,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; + security_inet_conn_request(sk, skb, req); + if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -929,7 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/security/dummy.c b/security/dummy.c index 66cc06404930..1c45f8e4aad1 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -812,6 +812,26 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { } + +static inline void dummy_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int dummy_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void dummy_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} + +static inline void dummy_req_classify_flow(const struct request_sock *req, + struct flowi *fl) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1084,6 +1104,10 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsecid); + set_to_dummy_if_null(ops, sock_graft); + set_to_dummy_if_null(ops, inet_conn_request); + set_to_dummy_if_null(ops, inet_csk_clone); + set_to_dummy_if_null(ops, req_classify_flow); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4e5989d584ce..1dc935f7b919 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3328,8 +3328,9 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, /* server child socket */ ssec = newsk->sk_security; ssec->peer_sid = isec->sid; - - return 0; + err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid); + + return err; } static int selinux_socket_unix_may_send(struct socket *sock, @@ -3355,11 +3356,29 @@ static int selinux_socket_unix_may_send(struct socket *sock, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, u32 sock_sid, u16 sock_class, - u16 family, char *addrp, int len) + struct avc_audit_data *ad, u16 family, char *addrp, int len) { int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0; + struct socket *sock; + u16 sock_class = 0; + u32 sock_sid = 0; + + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (sock) { + struct inode *inode; + inode = SOCK_INODE(sock); + if (inode) { + struct inode_security_struct *isec; + isec = inode->i_security; + sock_sid = isec->sid; + sock_class = isec->sclass; + } + } + read_unlock_bh(&sk->sk_callback_lock); + if (!sock_sid) + goto out; if (!skb->dev) goto out; @@ -3419,12 +3438,10 @@ out: static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { u16 family; - u16 sock_class = 0; char *addrp; int len, err = 0; - u32 sock_sid = 0; - struct socket *sock; struct avc_audit_data ad; + struct sk_security_struct *sksec = sk->sk_security; family = sk->sk_family; if (family != PF_INET && family != PF_INET6) @@ -3434,22 +3451,6 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP)) family = PF_INET; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (sock) { - struct inode *inode; - inode = SOCK_INODE(sock); - if (inode) { - struct inode_security_struct *isec; - isec = inode->i_security; - sock_sid = isec->sid; - sock_class = isec->sclass; - } - } - read_unlock_bh(&sk->sk_callback_lock); - if (!sock_sid) - goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]"; ad.u.net.family = family; @@ -3459,16 +3460,15 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; if (selinux_compat_net) - err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid, - sock_class, family, + err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family, addrp, len); else - err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; } @@ -3572,6 +3572,49 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid) } } +void selinux_sock_graft(struct sock* sk, struct socket *parent) +{ + struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + isec->sid = sksec->sid; +} + +int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct sk_security_struct *sksec = sk->sk_security; + int err; + u32 newsid = 0; + u32 peersid; + + err = selinux_xfrm_decode_session(skb, &peersid, 0); + BUG_ON(err); + + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); + if (err) + return err; + + req->secid = newsid; + return 0; +} + +void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) +{ + struct sk_security_struct *newsksec = newsk->sk_security; + + newsksec->sid = req->secid; + /* NOTE: Ideally, we should also get the isec->sid for the + new socket in sync, but we don't have the isec available yet. + So we will wait until sock_graft to do it, by which + time it will have been created and available. */ +} + +void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + fl->secid = req->secid; +} + static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { int err = 0; @@ -3611,12 +3654,24 @@ out: #ifdef CONFIG_NETFILTER static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev, - struct inode_security_struct *isec, struct avc_audit_data *ad, u16 family, char *addrp, int len) { - int err; + int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0; + struct socket *sock; + struct inode *inode; + struct inode_security_struct *isec; + + sock = sk->sk_socket; + if (!sock) + goto out; + + inode = SOCK_INODE(sock); + if (!inode) + goto out; + + isec = inode->i_security; err = sel_netif_sids(dev, &if_sid, NULL); if (err) @@ -3681,26 +3736,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; - struct socket *sock; - struct inode *inode; struct sk_buff *skb = *pskb; - struct inode_security_struct *isec; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; + struct sk_security_struct *sksec; sk = skb->sk; if (!sk) goto out; - sock = sk->sk_socket; - if (!sock) - goto out; - - inode = SOCK_INODE(sock); - if (!inode) - goto out; - - isec = inode->i_security; + sksec = sk->sk_security; AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = dev->name; @@ -3711,16 +3756,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, goto out; if (selinux_compat_net) - err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad, + err = selinux_ip_postroute_last_compat(sk, dev, &ad, family, addrp, len); else - err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad); if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); + err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4623,6 +4668,10 @@ static struct security_operations selinux_ops = { .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, .sk_getsecid = selinux_sk_getsecid, + .sock_graft = selinux_sock_graft, + .inet_conn_request = selinux_inet_conn_request, + .inet_csk_clone = selinux_inet_csk_clone, + .req_classify_flow = selinux_req_classify_flow, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d3690f985135..3e742b850af6 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -271,7 +271,6 @@ not_from_user: goto out; } - ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; ctx->ctx_sid = ctx_sid; From a51c64f1e5c2876eab2a32955acd9e8015c91c15 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Thu, 27 Jul 2006 22:01:34 -0700 Subject: [PATCH 0387/1063] [MLSXFRM]: Fix build with SECURITY_NETWORK_XFRM disabled. The following patch will fix the build problem (encountered by Andrew Morton) when SECURITY_NETWORK_XFRM is not enabled. As compared to git-net-selinux_xfrm_decode_session-build-fix.patch in -mm, this patch sets the return parameter sid to SECSID_NULL in selinux_xfrm_decode_session() and handles this value in the caller selinux_inet_conn_request() appropriately. Signed-off-by: Venkat Yekkirala Acked-by: James Morris Signed-off-by: David S. Miller --- security/selinux/hooks.c | 5 +++++ security/selinux/include/xfrm.h | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1dc935f7b919..33028b3b19ce 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3591,6 +3591,11 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, err = selinux_xfrm_decode_session(skb, &peersid, 0); BUG_ON(err); + if (peersid == SECSID_NULL) { + req->secid = sksec->sid; + return 0; + } + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); if (err) return err; diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 1822c73e5085..81eb59890162 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -20,7 +20,6 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); -int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall); /* @@ -41,6 +40,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, struct avc_audit_data *ad); u32 selinux_socket_getpeer_stream(struct sock *sk); u32 selinux_socket_getpeer_dgram(struct sk_buff *skb); +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); #else static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, struct avc_audit_data *ad) @@ -63,6 +63,11 @@ static inline int selinux_socket_getpeer_dgram(struct sk_buff *skb) { return SECSID_NULL; } +static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) +{ + *sid = SECSID_NULL; + return 0; +} #endif #endif /* _SELINUX_XFRM_H_ */ From 8802f616f6de8576805f32e47602816f141118f2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:45:49 -0700 Subject: [PATCH 0388/1063] [NetLabel]: documentation Documentation for the NetLabel system, this includes a basic overview of how NetLabel works, how LSM developers can integrate it into their favorite LSM, as well as documentation on the CIPSO related sysctl variables. Also, due to the difficulty of finding expired IETF drafts, I am including the IETF CIPSO draft that is the basis of the NetLabel CIPSO implementation. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- CREDITS | 7 + Documentation/00-INDEX | 2 + Documentation/netlabel/00-INDEX | 10 + Documentation/netlabel/cipso_ipv4.txt | 48 ++ .../draft-ietf-cipso-ipsecurity-01.txt | 791 ++++++++++++++++++ Documentation/netlabel/introduction.txt | 46 + Documentation/netlabel/lsm_interface.txt | 47 ++ Documentation/networking/ip-sysctl.txt | 35 + 8 files changed, 986 insertions(+) create mode 100644 Documentation/netlabel/00-INDEX create mode 100644 Documentation/netlabel/cipso_ipv4.txt create mode 100644 Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt create mode 100644 Documentation/netlabel/introduction.txt create mode 100644 Documentation/netlabel/lsm_interface.txt diff --git a/CREDITS b/CREDITS index 0fe904ebb7c7..cc3453a55fb9 100644 --- a/CREDITS +++ b/CREDITS @@ -2384,6 +2384,13 @@ N: Thomas Molina E: tmolina@cablespeed.com D: bug fixes, documentation, minor hackery +N: Paul Moore +E: paul.moore@hp.com +D: NetLabel author +S: Hewlett-Packard +S: 110 Spit Brook Road +S: Nashua, NH 03062 + N: James Morris E: jmorris@namei.org W: http://namei.org/ diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 5f7f7d7f77d2..02457ec9c94f 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -184,6 +184,8 @@ mtrr.txt - how to use PPro Memory Type Range Registers to increase performance. nbd.txt - info on a TCP implementation of a network block device. +netlabel/ + - directory with information on the NetLabel subsystem. networking/ - directory with info on various aspects of networking with Linux. nfsroot.txt diff --git a/Documentation/netlabel/00-INDEX b/Documentation/netlabel/00-INDEX new file mode 100644 index 000000000000..837bf35990e2 --- /dev/null +++ b/Documentation/netlabel/00-INDEX @@ -0,0 +1,10 @@ +00-INDEX + - this file. +cipso_ipv4.txt + - documentation on the IPv4 CIPSO protocol engine. +draft-ietf-cipso-ipsecurity-01.txt + - IETF draft of the CIPSO protocol, dated 16 July 1992. +introduction.txt + - NetLabel introduction, READ THIS FIRST. +lsm_interface.txt + - documentation on the NetLabel kernel security module API. diff --git a/Documentation/netlabel/cipso_ipv4.txt b/Documentation/netlabel/cipso_ipv4.txt new file mode 100644 index 000000000000..93dacb132c3c --- /dev/null +++ b/Documentation/netlabel/cipso_ipv4.txt @@ -0,0 +1,48 @@ +NetLabel CIPSO/IPv4 Protocol Engine +============================================================================== +Paul Moore, paul.moore@hp.com + +May 17, 2006 + + * Overview + +The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP +Security Option (CIPSO) draft from July 16, 1992. A copy of this draft can be +found in this directory, consult '00-INDEX' for the filename. While the IETF +draft never made it to an RFC standard it has become a de-facto standard for +labeled networking and is used in many trusted operating systems. + + * Outbound Packet Processing + +The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by +adding the CIPSO label to the socket. This causes all packets leaving the +system through the socket to have the CIPSO IP option applied. The socket's +CIPSO label can be changed at any point in time, however, it is recommended +that it is set upon the socket's creation. The LSM can set the socket's CIPSO +label by using the NetLabel security module API; if the NetLabel "domain" is +configured to use CIPSO for packet labeling then a CIPSO IP option will be +generated and attached to the socket. + + * Inbound Packet Processing + +The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the +IP layer without any special handling required by the LSM. However, in order +to decode and translate the CIPSO label on the packet the LSM must use the +NetLabel security module API to extract the security attributes of the packet. +This is typically done at the socket layer using the 'socket_sock_rcv_skb()' +LSM hook. + + * Label Translation + +The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security +attributes such as sensitivity level and category to values which are +appropriate for the host. These mappings are defined as part of a CIPSO +Domain Of Interpretation (DOI) definition and are configured through the +NetLabel user space communication layer. Each DOI definition can have a +different security attribute mapping table. + + * Label Translation Cache + +The NetLabel system provides a framework for caching security attribute +mappings from the network labels to the corresponding LSM identifiers. The +CIPSO/IPv4 protocol engine supports this caching mechanism. diff --git a/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt new file mode 100644 index 000000000000..256c2c9d4f50 --- /dev/null +++ b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt @@ -0,0 +1,791 @@ +IETF CIPSO Working Group +16 July, 1992 + + + + COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) + + + +1. Status + +This Internet Draft provides the high level specification for a Commercial +IP Security Option (CIPSO). This draft reflects the version as approved by +the CIPSO IETF Working Group. Distribution of this memo is unlimited. + +This document is an Internet Draft. Internet Drafts are working documents +of the Internet Engineering Task Force (IETF), its Areas, and its Working +Groups. Note that other groups may also distribute working documents as +Internet Drafts. + +Internet Drafts are draft documents valid for a maximum of six months. +Internet Drafts may be updated, replaced, or obsoleted by other documents +at any time. It is not appropriate to use Internet Drafts as reference +material or to cite them other than as a "working draft" or "work in +progress." + +Please check the I-D abstract listing contained in each Internet Draft +directory to learn the current status of this or any other Internet Draft. + + + + +2. Background + +Currently the Internet Protocol includes two security options. One of +these options is the DoD Basic Security Option (BSO) (Type 130) which allows +IP datagrams to be labeled with security classifications. This option +provides sixteen security classifications and a variable number of handling +restrictions. To handle additional security information, such as security +categories or compartments, another security option (Type 133) exists and +is referred to as the DoD Extended Security Option (ESO). The values for +the fixed fields within these two options are administered by the Defense +Information Systems Agency (DISA). + +Computer vendors are now building commercial operating systems with +mandatory access controls and multi-level security. These systems are +no longer built specifically for a particular group in the defense or +intelligence communities. They are generally available commercial systems +for use in a variety of government and civil sector environments. + +The small number of ESO format codes can not support all the possible +applications of a commercial security option. The BSO and ESO were +designed to only support the United States DoD. CIPSO has been designed +to support multiple security policies. This Internet Draft provides the +format and procedures required to support a Mandatory Access Control +security policy. Support for additional security policies shall be +defined in future RFCs. + + + + +Internet Draft, Expires 15 Jan 93 [PAGE 1] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + + +3. CIPSO Format + +Option type: 134 (Class 0, Number 6, Copy on Fragmentation) +Option length: Variable + +This option permits security related information to be passed between +systems within a single Domain of Interpretation (DOI). A DOI is a +collection of systems which agree on the meaning of particular values +in the security option. An authority that has been assigned a DOI +identifier will define a mapping between appropriate CIPSO field values +and their human readable equivalent. This authority will distribute that +mapping to hosts within the authority's domain. These mappings may be +sensitive, therefore a DOI authority is not required to make these +mappings available to anyone other than the systems that are included in +the DOI. + +This option MUST be copied on fragmentation. This option appears at most +once in a datagram. All multi-octet fields in the option are defined to be +transmitted in network byte order. The format of this option is as follows: + ++----------+----------+------//------+-----------//---------+ +| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT | ++----------+----------+------//------+-----------//---------+ + + TYPE=134 OPTION DOMAIN OF TAGS + LENGTH INTERPRETATION + + + Figure 1. CIPSO Format + + +3.1 Type + +This field is 1 octet in length. Its value is 134. + + +3.2 Length + +This field is 1 octet in length. It is the total length of the option +including the type and length fields. With the current IP header length +restriction of 40 octets the value of this field MUST not exceed 40. + + +3.3 Domain of Interpretation Identifier + +This field is an unsigned 32 bit integer. The value 0 is reserved and MUST +not appear as the DOI identifier in any CIPSO option. Implementations +should assume that the DOI identifier field is not aligned on any particular +byte boundary. + +To conserve space in the protocol, security levels and categories are +represented by numbers rather than their ASCII equivalent. This requires +a mapping table within CIPSO hosts to map these numbers to their +corresponding ASCII representations. Non-related groups of systems may + + + +Internet Draft, Expires 15 Jan 93 [PAGE 2] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +have their own unique mappings. For example, one group of systems may +use the number 5 to represent Unclassified while another group may use the +number 1 to represent that same security level. The DOI identifier is used +to identify which mapping was used for the values within the option. + + +3.4 Tag Types + +A common format for passing security related information is necessary +for interoperability. CIPSO uses sets of "tags" to contain the security +information relevant to the data in the IP packet. Each tag begins with +a tag type identifier followed by the length of the tag and ends with the +actual security information to be passed. All multi-octet fields in a tag +are defined to be transmitted in network byte order. Like the DOI +identifier field in the CIPSO header, implementations should assume that +all tags, as well as fields within a tag, are not aligned on any particular +octet boundary. The tag types defined in this document contain alignment +bytes to assist alignment of some information, however alignment can not +be guaranteed if CIPSO is not the first IP option. + +CIPSO tag types 0 through 127 are reserved for defining standard tag +formats. Their definitions will be published in RFCs. Tag types whose +identifiers are greater than 127 are defined by the DOI authority and may +only be meaningful in certain Domains of Interpretation. For these tag +types, implementations will require the DOI identifier as well as the tag +number to determine the security policy and the format associated with the +tag. Use of tag types above 127 are restricted to closed networks where +interoperability with other networks will not be an issue. Implementations +that support a tag type greater than 127 MUST support at least one DOI that +requires only tag types 1 to 127. + +Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this +Internet Draft. Types 3 and 4 are reserved for work in progress. +The standard format for all current and future CIPSO tags is shown below: + ++----------+----------+--------//--------+ +| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII | ++----------+----------+--------//--------+ + TAG TAG TAG + TYPE LENGTH INFORMATION + + Figure 2: Standard Tag Format + +In the three tag types described in this document, the length and count +restrictions are based on the current IP limitation of 40 octets for all +IP options. If the IP header is later expanded, then the length and count +restrictions specified in this document may increase to use the full area +provided for IP options. + + +3.4.1 Tag Type Classes + +Tag classes consist of tag types that have common processing requirements +and support the same security policy. The three tags defined in this +Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity + + + +Internet Draft, Expires 15 Jan 93 [PAGE 3] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +class and support the MAC Sensitivity security policy. + + +3.4.2 Tag Type 1 + +This is referred to as the "bit-mapped" tag type. Tag type 1 is included +in the MAC Sensitivity tag type class. The format of this tag type is as +follows: + ++----------+----------+----------+----------+--------//---------+ +| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC | ++----------+----------+----------+----------+--------//---------+ + + TAG TAG ALIGNMENT SENSITIVITY BIT MAP OF + TYPE LENGTH OCTET LEVEL CATEGORIES + + Figure 3. Tag Type 1 Format + + +3.4.2.1 Tag Type + +This field is 1 octet in length and has a value of 1. + + +3.4.2.2 Tag Length + +This field is 1 octet in length. It is the total length of the tag type +including the type and length fields. With the current IP header length +restriction of 40 bytes the value within this field is between 4 and 34. + + +3.4.2.3 Alignment Octet + +This field is 1 octet in length and always has the value of 0. Its purpose +is to align the category bitmap field on an even octet boundary. This will +speed many implementations including router implementations. + + +3.4.2.4 Sensitivity Level + +This field is 1 octet in length. Its value is from 0 to 255. The values +are ordered with 0 being the minimum value and 255 representing the maximum +value. + + +3.4.2.5 Bit Map of Categories + +The length of this field is variable and ranges from 0 to 30 octets. This +provides representation of categories 0 to 239. The ordering of the bits +is left to right or MSB to LSB. For example category 0 is represented by +the most significant bit of the first byte and category 15 is represented +by the least significant bit of the second byte. Figure 4 graphically +shows this ordering. Bit N is binary 1 if category N is part of the label +for the datagram, and bit N is binary 0 if category N is not part of the +label. Except for the optimized tag 1 format described in the next section, + + + +Internet Draft, Expires 15 Jan 93 [PAGE 4] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +minimal encoding SHOULD be used resulting in no trailing zero octets in the +category bitmap. + + octet 0 octet 1 octet 2 octet 3 octet 4 octet 5 + XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . . +bit 01234567 89111111 11112222 22222233 33333333 44444444 +number 012345 67890123 45678901 23456789 01234567 + + Figure 4. Ordering of Bits in Tag 1 Bit Map + + +3.4.2.6 Optimized Tag 1 Format + +Routers work most efficiently when processing fixed length fields. To +support these routers there is an optimized form of tag type 1. The format +does not change. The only change is to the category bitmap which is set to +a constant length of 10 octets. Trailing octets required to fill out the 10 +octets are zero filled. Ten octets, allowing for 80 categories, was chosen +because it makes the total length of the CIPSO option 20 octets. If CIPSO +is the only option then the option will be full word aligned and additional +filler octets will not be required. + + +3.4.3 Tag Type 2 + +This is referred to as the "enumerated" tag type. It is used to describe +large but sparsely populated sets of categories. Tag type 2 is in the MAC +Sensitivity tag type class. The format of this tag type is as follows: + ++----------+----------+----------+----------+-------------//-------------+ +| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC | ++----------+----------+----------+----------+-------------//-------------+ + + TAG TAG ALIGNMENT SENSITIVITY ENUMERATED + TYPE LENGTH OCTET LEVEL CATEGORIES + + Figure 5. Tag Type 2 Format + + +3.4.3.1 Tag Type + +This field is one octet in length and has a value of 2. + + +3.4.3.2 Tag Length + +This field is 1 octet in length. It is the total length of the tag type +including the type and length fields. With the current IP header length +restriction of 40 bytes the value within this field is between 4 and 34. + + +3.4.3.3 Alignment Octet + +This field is 1 octet in length and always has the value of 0. Its purpose +is to align the category field on an even octet boundary. This will + + + +Internet Draft, Expires 15 Jan 93 [PAGE 5] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +speed many implementations including router implementations. + + +3.4.3.4 Sensitivity Level + +This field is 1 octet in length. Its value is from 0 to 255. The values +are ordered with 0 being the minimum value and 255 representing the +maximum value. + + +3.4.3.5 Enumerated Categories + +In this tag, categories are represented by their actual value rather than +by their position within a bit field. The length of each category is 2 +octets. Up to 15 categories may be represented by this tag. Valid values +for categories are 0 to 65534. Category 65535 is not a valid category +value. The categories MUST be listed in ascending order within the tag. + + +3.4.4 Tag Type 5 + +This is referred to as the "range" tag type. It is used to represent +labels where all categories in a range, or set of ranges, are included +in the sensitivity label. Tag type 5 is in the MAC Sensitivity tag type +class. The format of this tag type is as follows: + ++----------+----------+----------+----------+------------//-------------+ +| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL | Top/Bottom | Top/Bottom | ++----------+----------+----------+----------+------------//-------------+ + + TAG TAG ALIGNMENT SENSITIVITY CATEGORY RANGES + TYPE LENGTH OCTET LEVEL + + Figure 6. Tag Type 5 Format + + +3.4.4.1 Tag Type + +This field is one octet in length and has a value of 5. + + +3.4.4.2 Tag Length + +This field is 1 octet in length. It is the total length of the tag type +including the type and length fields. With the current IP header length +restriction of 40 bytes the value within this field is between 4 and 34. + + +3.4.4.3 Alignment Octet + +This field is 1 octet in length and always has the value of 0. Its purpose +is to align the category range field on an even octet boundary. This will +speed many implementations including router implementations. + + + + + +Internet Draft, Expires 15 Jan 93 [PAGE 6] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +3.4.4.4 Sensitivity Level + +This field is 1 octet in length. Its value is from 0 to 255. The values +are ordered with 0 being the minimum value and 255 representing the maximum +value. + + +3.4.4.5 Category Ranges + +A category range is a 4 octet field comprised of the 2 octet index of the +highest numbered category followed by the 2 octet index of the lowest +numbered category. These range endpoints are inclusive within the range of +categories. All categories within a range are included in the sensitivity +label. This tag may contain a maximum of 7 category pairs. The bottom +category endpoint for the last pair in the tag MAY be omitted and SHOULD be +assumed to be 0. The ranges MUST be non-overlapping and be listed in +descending order. Valid values for categories are 0 to 65534. Category +65535 is not a valid category value. + + +3.4.5 Minimum Requirements + +A CIPSO implementation MUST be capable of generating at least tag type 1 in +the non-optimized form. In addition, a CIPSO implementation MUST be able +to receive any valid tag type 1 even those using the optimized tag type 1 +format. + + +4. Configuration Parameters + +The configuration parameters defined below are required for all CIPSO hosts, +gateways, and routers that support multiple sensitivity labels. A CIPSO +host is defined to be the origination or destination system for an IP +datagram. A CIPSO gateway provides IP routing services between two or more +IP networks and may be required to perform label translations between +networks. A CIPSO gateway may be an enhanced CIPSO host or it may just +provide gateway services with no end system CIPSO capabilities. A CIPSO +router is a dedicated IP router that routes IP datagrams between two or more +IP networks. + +An implementation of CIPSO on a host MUST have the capability to reject a +datagram for reasons that the information contained can not be adequately +protected by the receiving host or if acceptance may result in violation of +the host or network security policy. In addition, a CIPSO gateway or router +MUST be able to reject datagrams going to networks that can not provide +adequate protection or may violate the network's security policy. To +provide this capability the following minimal set of configuration +parameters are required for CIPSO implementations: + +HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that +a CIPSO host is authorized to handle. All datagrams that have a label +greater than this maximum MUST be rejected by the CIPSO host. This +parameter does not apply to CIPSO gateways or routers. This parameter need +not be defined explicitly as it can be implicitly derived from the +PORT_LABEL_MAX parameters for the associated interfaces. + + + +Internet Draft, Expires 15 Jan 93 [PAGE 7] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + + +HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that +a CIPSO host is authorized to handle. All datagrams that have a label less +than this minimum MUST be rejected by the CIPSO host. This parameter does +not apply to CIPSO gateways or routers. This parameter need not be defined +explicitly as it can be implicitly derived from the PORT_LABEL_MIN +parameters for the associated interfaces. + +PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for +all datagrams that may exit a particular network interface port. All +outgoing datagrams that have a label greater than this maximum MUST be +rejected by the CIPSO system. The label within this parameter MUST be +less than or equal to the label within the HOST_LABEL_MAX parameter. This +parameter does not apply to CIPSO hosts that support only one network port. + +PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for +all datagrams that may exit a particular network interface port. All +outgoing datagrams that have a label less than this minimum MUST be +rejected by the CIPSO system. The label within this parameter MUST be +greater than or equal to the label within the HOST_LABEL_MIN parameter. +This parameter does not apply to CIPSO hosts that support only one network +port. + +PORT_DOI - This parameter is used to assign a DOI identifier value to a +particular network interface port. All CIPSO labels within datagrams +going out this port MUST use the specified DOI identifier. All CIPSO +hosts and gateways MUST support either this parameter, the NET_DOI +parameter, or the HOST_DOI parameter. + +NET_DOI - This parameter is used to assign a DOI identifier value to a +particular IP network address. All CIPSO labels within datagrams destined +for the particular IP network MUST use the specified DOI identifier. All +CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI +parameter, or the HOST_DOI parameter. + +HOST_DOI - This parameter is used to assign a DOI identifier value to a +particular IP host address. All CIPSO labels within datagrams destined for +the particular IP host will use the specified DOI identifier. All CIPSO +hosts and gateways MUST support either this parameter, the PORT_DOI +parameter, or the NET_DOI parameter. + +This list represents the minimal set of configuration parameters required +to be compliant. Implementors are encouraged to add to this list to +provide enhanced functionality and control. For example, many security +policies may require both incoming and outgoing datagrams be checked against +the port and host label ranges. + + +4.1 Port Range Parameters + +The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters +MAY be in CIPSO or local format. Some CIPSO systems, such as routers, may +want to have the range parameters expressed in CIPSO format so that incoming +labels do not have to be converted to a local format before being compared +against the range. If multiple DOIs are supported by one of these CIPSO + + + +Internet Draft, Expires 15 Jan 93 [PAGE 8] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +systems then multiple port range parameters would be needed, one set for +each DOI supported on a particular port. + +The port range will usually represent the total set of labels that may +exist on the logical network accessed through the corresponding network +interface. It may, however, represent a subset of these labels that are +allowed to enter the CIPSO system. + + +4.2 Single Label CIPSO Hosts + +CIPSO implementations that support only one label are not required to +support the parameters described above. These limited implementations are +only required to support a NET_LABEL parameter. This parameter contains +the CIPSO label that may be inserted in datagrams that exit the host. In +addition, the host MUST reject any incoming datagram that has a label which +is not equivalent to the NET_LABEL parameter. + + +5. Handling Procedures + +This section describes the processing requirements for incoming and +outgoing IP datagrams. Just providing the correct CIPSO label format +is not enough. Assumptions will be made by one system on how a +receiving system will handle the CIPSO label. Wrong assumptions may +lead to non-interoperability or even a security incident. The +requirements described below represent the minimal set needed for +interoperability and that provide users some level of confidence. +Many other requirements could be added to increase user confidence, +however at the risk of restricting creativity and limiting vendor +participation. + + +5.1 Input Procedures + +All datagrams received through a network port MUST have a security label +associated with them, either contained in the datagram or assigned to the +receiving port. Without this label the host, gateway, or router will not +have the information it needs to make security decisions. This security +label will be obtained from the CIPSO if the option is present in the +datagram. See section 4.1.2 for handling procedures for unlabeled +datagrams. This label will be compared against the PORT (if appropriate) +and HOST configuration parameters defined in section 3. + +If any field within the CIPSO option, such as the DOI identifier, is not +recognized the IP datagram is discarded and an ICMP "parameter problem" +(type 12) is generated and returned. The ICMP code field is set to "bad +parameter" (code 0) and the pointer is set to the start of the CIPSO field +that is unrecognized. + +If the contents of the CIPSO are valid but the security label is +outside of the configured host or port label range, the datagram is +discarded and an ICMP "destination unreachable" (type 3) is generated +and returned. The code field of the ICMP is set to "communication with +destination network administratively prohibited" (code 9) or to + + + +Internet Draft, Expires 15 Jan 93 [PAGE 9] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +"communication with destination host administratively prohibited" +(code 10). The value of the code field used is dependent upon whether +the originator of the ICMP message is acting as a CIPSO host or a CIPSO +gateway. The recipient of the ICMP message MUST be able to handle either +value. The same procedure is performed if a CIPSO can not be added to an +IP packet because it is too large to fit in the IP options area. + +If the error is triggered by receipt of an ICMP message, the message +is discarded and no response is permitted (consistent with general ICMP +processing rules). + + +5.1.1 Unrecognized tag types + +The default condition for any CIPSO implementation is that an +unrecognized tag type MUST be treated as a "parameter problem" and +handled as described in section 4.1. A CIPSO implementation MAY allow +the system administrator to identify tag types that may safely be +ignored. This capability is an allowable enhancement, not a +requirement. + + +5.1.2 Unlabeled Packets + +A network port may be configured to not require a CIPSO label for all +incoming datagrams. For this configuration a CIPSO label must be +assigned to that network port and associated with all unlabeled IP +datagrams. This capability might be used for single level networks or +networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts +all operate at the same label. + +If a CIPSO option is required and none is found, the datagram is +discarded and an ICMP "parameter problem" (type 12) is generated and +returned to the originator of the datagram. The code field of the ICMP +is set to "option missing" (code 1) and the ICMP pointer is set to 134 +(the value of the option type for the missing CIPSO option). + + +5.2 Output Procedures + +A CIPSO option MUST appear only once in a datagram. Only one tag type +from the MAC Sensitivity class MAY be included in a CIPSO option. Given +the current set of defined tag types, this means that CIPSO labels at +first will contain only one tag. + +All datagrams leaving a CIPSO system MUST meet the following condition: + + PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX + +If this condition is not satisfied the datagram MUST be discarded. +If the CIPSO system only supports one port, the HOST_LABEL_MIN and the +HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in +the above condition. + +The DOI identifier to be used for all outgoing datagrams is configured by + + + +Internet Draft, Expires 15 Jan 93 [PAGE 10] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + +the administrator. If port level DOI identifier assignment is used, then +the PORT_DOI configuration parameter MUST contain the DOI identifier to +use. If network level DOI assignment is used, then the NET_DOI parameter +MUST contain the DOI identifier to use. And if host level DOI assignment +is employed, then the HOST_DOI parameter MUST contain the DOI identifier +to use. A CIPSO implementation need only support one level of DOI +assignment. + + +5.3 DOI Processing Requirements + +A CIPSO implementation MUST support at least one DOI and SHOULD support +multiple DOIs. System and network administrators are cautioned to +ensure that at least one DOI is common within an IP network to allow for +broadcasting of IP datagrams. + +CIPSO gateways MUST be capable of translating a CIPSO option from one +DOI to another when forwarding datagrams between networks. For +efficiency purposes this capability is only a desired feature for CIPSO +routers. + + +5.4 Label of ICMP Messages + +The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent +to the label of the datagram that caused the ICMP message. If the ICMP was +generated due to a problem associated with the original CIPSO label then the +following responses are allowed: + + a. Use the CIPSO label of the original IP datagram + b. Drop the original datagram with no return message generated + +In most cases these options will have the same effect. If you can not +interpret the label or if it is outside the label range of your host or +interface then an ICMP message with the same label will probably not be +able to exit the system. + + +6. Assignment of DOI Identifier Numbers = + +Requests for assignment of a DOI identifier number should be addressed to +the Internet Assigned Numbers Authority (IANA). + + +7. Acknowledgements + +Much of the material in this RFC is based on (and copied from) work +done by Gary Winiger of Sun Microsystems and published as Commercial +IP Security Option at the INTEROP 89, Commercial IPSO Workshop. + + +8. Author's Address + +To submit mail for distribution to members of the IETF CIPSO Working +Group, send mail to: cipso@wdl1.wdl.loral.com. + + + +Internet Draft, Expires 15 Jan 93 [PAGE 11] + + + +CIPSO INTERNET DRAFT 16 July, 1992 + + + + +To be added to or deleted from this distribution, send mail to: +cipso-request@wdl1.wdl.loral.com. + + +9. References + +RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January +1988. + +RFC 1108, "U.S. Department of Defense Security Options +for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Internet Draft, Expires 15 Jan 93 [PAGE 12] + + + diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt new file mode 100644 index 000000000000..a4ffba1694c8 --- /dev/null +++ b/Documentation/netlabel/introduction.txt @@ -0,0 +1,46 @@ +NetLabel Introduction +============================================================================== +Paul Moore, paul.moore@hp.com + +August 2, 2006 + + * Overview + +NetLabel is a mechanism which can be used by kernel security modules to attach +security attributes to outgoing network packets generated from user space +applications and read security attributes from incoming network packets. It +is composed of three main components, the protocol engines, the communication +layer, and the kernel security module API. + + * Protocol Engines + +The protocol engines are responsible for both applying and retrieving the +network packet's security attributes. If any translation between the network +security attributes and those on the host are required then the protocol +engine will handle those tasks as well. Other kernel subsystems should +refrain from calling the protocol engines directly, instead they should use +the NetLabel kernel security module API described below. + +Detailed information about each NetLabel protocol engine can be found in this +directory, consult '00-INDEX' for filenames. + + * Communication Layer + +The communication layer exists to allow NetLabel configuration and monitoring +from user space. The NetLabel communication layer uses a message based +protocol built on top of the Generic NETLINK transport mechanism. The exact +formatting of these NetLabel messages as well as the Generic NETLINK family +names can be found in the the 'net/netlabel/' directory as comments in the +header files as well as in 'include/net/netlabel.h'. + + * Security Module API + +The purpose of the NetLabel security module API is to provide a protocol +independent interface to the underlying NetLabel protocol engines. In addition +to protocol independence, the security module API is designed to be completely +LSM independent which should allow multiple LSMs to leverage the same code +base. + +Detailed information about the NetLabel security module API can be found in the +'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file +found in this directory. diff --git a/Documentation/netlabel/lsm_interface.txt b/Documentation/netlabel/lsm_interface.txt new file mode 100644 index 000000000000..98dd9f7430f2 --- /dev/null +++ b/Documentation/netlabel/lsm_interface.txt @@ -0,0 +1,47 @@ +NetLabel Linux Security Module Interface +============================================================================== +Paul Moore, paul.moore@hp.com + +May 17, 2006 + + * Overview + +NetLabel is a mechanism which can set and retrieve security attributes from +network packets. It is intended to be used by LSM developers who want to make +use of a common code base for several different packet labeling protocols. +The NetLabel security module API is defined in 'include/net/netlabel.h' but a +brief overview is given below. + + * NetLabel Security Attributes + +Since NetLabel supports multiple different packet labeling protocols and LSMs +it uses the concept of security attributes to refer to the packet's security +labels. The NetLabel security attributes are defined by the +'netlbl_lsm_secattr' structure in the NetLabel header file. Internally the +NetLabel subsystem converts the security attributes to and from the correct +low-level packet label depending on the NetLabel build time and run time +configuration. It is up to the LSM developer to translate the NetLabel +security attributes into whatever security identifiers are in use for their +particular LSM. + + * NetLabel LSM Protocol Operations + +These are the functions which allow the LSM developer to manipulate the labels +on outgoing packets as well as read the labels on incoming packets. Functions +exist to operate both on sockets as well as the sk_buffs directly. These high +level functions are translated into low level protocol operations based on how +the administrator has configured the NetLabel subsystem. + + * NetLabel Label Mapping Cache Operations + +Depending on the exact configuration, translation between the network packet +label and the internal LSM security identifier can be time consuming. The +NetLabel label mapping cache is a caching mechanism which can be used to +sidestep much of this overhead once a mapping has been established. Once the +LSM has received a packet, used NetLabel to decode it's security attributes, +and translated the security attributes into a LSM internal identifier the LSM +can use the NetLabel caching functions to associate the LSM internal +identifier with the network packet's label. This means that in the future +when a incoming packet matches a cached value not only are the internal +NetLabel translation mechanisms bypassed but the LSM translation mechanisms are +bypassed as well which should result in a significant reduction in overhead. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 90ed78110fd4..307cd4ec8edd 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -375,6 +375,41 @@ tcp_slow_start_after_idle - BOOLEAN be timed out after an idle period. Default: 1 +CIPSOv4 Variables: + +cipso_cache_enable - BOOLEAN + If set, enable additions to and lookups from the CIPSO label mapping + cache. If unset, additions are ignored and lookups always result in a + miss. However, regardless of the setting the cache is still + invalidated when required when means you can safely toggle this on and + off and the cache will always be "safe". + Default: 1 + +cipso_cache_bucket_size - INTEGER + The CIPSO label cache consists of a fixed size hash table with each + hash bucket containing a number of cache entries. This variable limits + the number of entries in each hash bucket; the larger the value the + more CIPSO label mappings that can be cached. When the number of + entries in a given hash bucket reaches this limit adding new entries + causes the oldest entry in the bucket to be removed to make room. + Default: 10 + +cipso_rbm_optfmt - BOOLEAN + Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of + the CIPSO draft specification (see Documentation/netlabel for details). + This means that when set the CIPSO tag will be padded with empty + categories in order to make the packet data 32-bit aligned. + Default: 0 + +cipso_rbm_structvalid - BOOLEAN + If set, do a very strict check of the CIPSO option when + ip_options_compile() is called. If unset, relax the checks done during + ip_options_compile(). Either way is "safe" as errors are caught else + where in the CIPSO processing code but setting this to 0 (False) should + result in less work (i.e. it should be faster) but could cause problems + with other implementations that require strict checking. + Default: 0 + IP Variables: ip_local_port_range - 2 INTEGERS From 11a03f78fbf15a866ba3bf6359a75cdfd1ced703 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:46:20 -0700 Subject: [PATCH 0389/1063] [NetLabel]: core network changes Changes to the core network stack to support the NetLabel subsystem. This includes changes to the IPv4 option handling to support CIPSO labels. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/ip.h | 1 + include/net/cipso_ipv4.h | 250 +++++++++++++++++++++++++++++++++ include/net/inet_sock.h | 2 +- include/net/netlabel.h | 291 +++++++++++++++++++++++++++++++++++++++ net/ipv4/ah4.c | 2 +- net/ipv4/ip_options.c | 19 +++ 6 files changed, 563 insertions(+), 2 deletions(-) create mode 100644 include/net/cipso_ipv4.h create mode 100644 include/net/netlabel.h diff --git a/include/linux/ip.h b/include/linux/ip.h index 4b55cf1df732..2f4600146f83 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -57,6 +57,7 @@ #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) +#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_RR (7 |IPOPT_CONTROL) #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h new file mode 100644 index 000000000000..c7175e725804 --- /dev/null +++ b/include/net/cipso_ipv4.h @@ -0,0 +1,250 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _CIPSO_IPV4_H +#define _CIPSO_IPV4_H + +#include +#include +#include +#include + +/* known doi values */ +#define CIPSO_V4_DOI_UNKNOWN 0x00000000 + +/* tag types */ +#define CIPSO_V4_TAG_INVALID 0 +#define CIPSO_V4_TAG_RBITMAP 1 +#define CIPSO_V4_TAG_ENUM 2 +#define CIPSO_V4_TAG_RANGE 5 +#define CIPSO_V4_TAG_PBITMAP 6 +#define CIPSO_V4_TAG_FREEFORM 7 + +/* doi mapping types */ +#define CIPSO_V4_MAP_UNKNOWN 0 +#define CIPSO_V4_MAP_STD 1 +#define CIPSO_V4_MAP_PASS 2 + +/* limits */ +#define CIPSO_V4_MAX_REM_LVLS 256 +#define CIPSO_V4_INV_LVL 0x80000000 +#define CIPSO_V4_MAX_LOC_LVLS (CIPSO_V4_INV_LVL - 1) +#define CIPSO_V4_MAX_REM_CATS 65536 +#define CIPSO_V4_INV_CAT 0x80000000 +#define CIPSO_V4_MAX_LOC_CATS (CIPSO_V4_INV_CAT - 1) + +/* + * CIPSO DOI definitions + */ + +/* DOI definition struct */ +#define CIPSO_V4_TAG_MAXCNT 5 +struct cipso_v4_doi { + u32 doi; + u32 type; + union { + struct cipso_v4_std_map_tbl *std; + } map; + u8 tags[CIPSO_V4_TAG_MAXCNT]; + + u32 valid; + struct list_head list; + struct rcu_head rcu; + struct list_head dom_list; +}; + +/* Standard CIPSO mapping table */ +/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the + * bit is set then consider that value as unspecified, meaning the + * mapping for that particular level/category is invalid */ +struct cipso_v4_std_map_tbl { + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } lvl; + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } cat; +}; + +/* + * Sysctl Variables + */ + +#ifdef CONFIG_NETLABEL +extern int cipso_v4_cache_enabled; +extern int cipso_v4_cache_bucketsize; +extern int cipso_v4_rbm_optfmt; +extern int cipso_v4_rbm_strictvalid; +#endif + +/* + * Helper Functions + */ + +#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) +#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso) + +/* + * DOI List Functions + */ + +#ifdef CONFIG_NETLABEL +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); +int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); +struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); +struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain); +#else +static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_remove(u32 doi, + void (*callback) (struct rcu_head * head)) +{ + return 0; +} + +static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) +{ + return NULL; +} + +static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Label Mapping Cache Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_cache_invalidate(void); +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void cipso_v4_cache_invalidate(void) +{ + return; +} + +static inline int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Protocol Handling Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); +int cipso_v4_socket_setopt(struct socket *sock, + unsigned char *opt, + u32 opt_len); +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +int cipso_v4_socket_getopt(const struct socket *sock, + unsigned char **opt, + u32 *opt_len); +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_validate(unsigned char **option); +#else +static inline void cipso_v4_error(struct sk_buff *skb, + int error, + u32 gateway) +{ + return; +} + +static inline int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_validate(unsigned char **option) +{ + return -ENOSYS; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _CIPSO_IPV4_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1f4a9a60d4cc..f4caad56cd03 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -51,7 +51,7 @@ struct ip_options { ts_needtime:1, ts_needaddr:1; unsigned char router_alert; - unsigned char __pad1; + unsigned char cipso; unsigned char __pad2; unsigned char __data[0]; }; diff --git a/include/net/netlabel.h b/include/net/netlabel.h new file mode 100644 index 000000000000..7cae730832c7 --- /dev/null +++ b/include/net/netlabel.h @@ -0,0 +1,291 @@ +/* + * NetLabel System + * + * The NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_H +#define _NETLABEL_H + +#include +#include +#include + +/* + * NetLabel - A management interface for maintaining network packet label + * mapping tables for explicit packet labling protocols. + * + * Network protocols such as CIPSO and RIPSO require a label translation layer + * to convert the label on the packet into something meaningful on the host + * machine. In the current Linux implementation these mapping tables live + * inside the kernel; NetLabel provides a mechanism for user space applications + * to manage these mapping tables. + * + * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to + * send messages between kernel and user space. The general format of a + * NetLabel message is shown below: + * + * +-----------------+-------------------+--------- --- -- - + * | struct nlmsghdr | struct genlmsghdr | payload + * +-----------------+-------------------+--------- --- -- - + * + * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal. + * The payload is dependent on the subsystem specified in the + * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions + * should be defined in the corresponding net/netlabel/netlabel_.h|c + * file. All of the fields in the NetLabel payload are NETLINK attributes, the + * length of each field is the length of the NETLINK attribute payload, see + * include/net/netlink.h for more information on NETLINK attributes. + * + */ + +/* + * NetLabel NETLINK protocol + */ + +#define NETLBL_PROTO_VERSION 1 + +/* NetLabel NETLINK types/families */ +#define NETLBL_NLTYPE_NONE 0 +#define NETLBL_NLTYPE_MGMT 1 +#define NETLBL_NLTYPE_MGMT_NAME "NLBL_MGMT" +#define NETLBL_NLTYPE_RIPSO 2 +#define NETLBL_NLTYPE_RIPSO_NAME "NLBL_RIPSO" +#define NETLBL_NLTYPE_CIPSOV4 3 +#define NETLBL_NLTYPE_CIPSOV4_NAME "NLBL_CIPSOv4" +#define NETLBL_NLTYPE_CIPSOV6 4 +#define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" +#define NETLBL_NLTYPE_UNLABELED 5 +#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" + +/* NetLabel return codes */ +#define NETLBL_E_OK 0 + +/* + * Helper functions + */ + +#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) +#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) +#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) + +/** + * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer + * @head: the amount of headroom in bytes + * @body: the desired size (minus headroom) in bytes + * @gfp_flags: the alloc flags to pass to alloc_skb() + * + * Description: + * Allocate a NETLINK message buffer based on the sizes given in @head and + * @body. If @head is greater than zero skb_reserve() is called to reserve + * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on + * success, NULL on failure. + * + */ +static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, + size_t body, + int gfp_flags) +{ + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); + if (skb == NULL) + return NULL; + if (head > 0) { + skb_reserve(skb, head); + if (skb_tailroom(skb) < body) { + kfree_skb(skb); + return NULL; + } + } + + return skb; +} + +/* + * NetLabel - Kernel API for accessing the network packet label mappings. + * + * The following functions are provided for use by other kernel modules, + * specifically kernel LSM modules, to provide a consistent, transparent API + * for dealing with explicit packet labeling protocols such as CIPSO and + * RIPSO. The functions defined here are implemented in the + * net/netlabel/netlabel_kapi.c file. + * + */ + +/* Domain mapping definition struct */ +struct netlbl_dom_map; + +/* Domain mapping operations */ +int netlbl_domhsh_remove(const char *domain); + +/* LSM security attributes */ +struct netlbl_lsm_cache { + void (*free) (const void *data); + void *data; +}; +struct netlbl_lsm_secattr { + char *domain; + + u32 mls_lvl; + u32 mls_lvl_vld; + unsigned char *mls_cat; + size_t mls_cat_len; + + struct netlbl_lsm_cache cache; +}; + +/* + * LSM security attribute operations + */ + + +/** + * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct + * @secattr: the struct to initialize + * + * Description: + * Initialize an already allocated netlbl_lsm_secattr struct. Returns zero on + * success, negative values on error. + * + */ +static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) +{ + memset(secattr, 0, sizeof(*secattr)); + return 0; +} + +/** + * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct + * @secattr: the struct to clear + * @clear_cache: cache clear flag + * + * Description: + * Destroys the @secattr struct, including freeing all of the internal buffers. + * If @clear_cache is true then free the cache fields, otherwise leave them + * intact. The struct must be reset with a call to netlbl_secattr_init() + * before reuse. + * + */ +static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) + secattr->cache.free(secattr->cache.data); + kfree(secattr->domain); + kfree(secattr->mls_cat); +} + +/** + * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct + * @flags: the memory allocation flags + * + * Description: + * Allocate and initialize a netlbl_lsm_secattr struct. Returns a valid + * pointer on success, or NULL on failure. + * + */ +static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) +{ + return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); +} + +/** + * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct + * @secattr: the struct to free + * @clear_cache: cache clear flag + * + * Description: + * Frees @secattr including all of the internal buffers. If @clear_cache is + * true then free the cache fields, otherwise leave them intact. + * + */ +static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + netlbl_secattr_destroy(secattr, clear_cache); + kfree(secattr); +} + +/* + * LSM protocol operations + */ + +#ifdef CONFIG_NETLABEL +int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr); +int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +void netlbl_skbuff_err(struct sk_buff *skb, int error); +#else +static inline int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) +{ + return; +} +#endif /* CONFIG_NETLABEL */ + +/* + * LSM label mapping cache operations + */ + +#ifdef CONFIG_NETLABEL +void netlbl_cache_invalidate(void); +int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void netlbl_cache_invalidate(void) +{ + return; +} + +static inline int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _NETLABEL_H */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2b98943e6b02..008e69d2e423 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case 0x86: /* Another "Commercial Security" crap. */ + case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 406056edc02b..e0a93b4fa8cc 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to @@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } + if (sopt->cipso) { + optlen = sptr[sopt->cipso+1]; + dopt->cipso = dopt->optlen+sizeof(struct iphdr); + memcpy(dptr, sptr+sopt->cipso, optlen); + dptr += optlen; + dopt->optlen += optlen; + } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; + case IPOPT_CIPSO: + if (opt->cipso) { + pp_ptr = optptr; + goto error; + } + opt->cipso = optptr - iph; + if (cipso_v4_validate(&optptr)) { + pp_ptr = optptr; + goto error; + } + break; case IPOPT_SEC: case IPOPT_SID: default: From 446fda4f26822b2d42ab3396aafcedf38a9ff2b6 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:48:06 -0700 Subject: [PATCH 0390/1063] [NetLabel]: CIPSOv4 engine Add support for the Commercial IP Security Option (CIPSO) to the IPv4 network stack. CIPSO has become a de-facto standard for trusted/labeled networking amongst existing Trusted Operating Systems such as Trusted Solaris, HP-UX CMW, etc. This implementation is designed to be used with the NetLabel subsystem to provide explicit packet labeling to LSM developers. The CIPSO/IPv4 packet labeling works by the LSM calling a NetLabel API function which attaches a CIPSO label (IPv4 option) to a given socket; this in turn attaches the CIPSO label to every packet leaving the socket without any extra processing on the outbound side. On the inbound side the individual packet's sk_buff is examined through a call to a NetLabel API function to determine if a CIPSO/IPv4 label is present and if so the security attributes of the CIPSO label are returned to the caller of the NetLabel API function. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/sysctl.h | 4 + net/ipv4/Makefile | 1 + net/ipv4/cipso_ipv4.c | 1607 ++++++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 35 + 4 files changed, 1647 insertions(+) create mode 100644 net/ipv4/cipso_ipv4.c diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e4b1a4d4dcf3..af61d9235409 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -411,6 +411,10 @@ enum NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, NET_TCP_SLOW_START_AFTER_IDLE=117, + NET_CIPSOV4_CACHE_ENABLE=118, + NET_CIPSOV4_CACHE_BUCKET_SIZE=119, + NET_CIPSOV4_RBM_OPTFMT=120, + NET_CIPSOV4_RBM_STRICTVALID=121, }; enum { diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4878fc5be85f..f66049e28aeb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o +obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c new file mode 100644 index 000000000000..b82a101c95c5 --- /dev/null +++ b/net/ipv4/cipso_ipv4.c @@ -0,0 +1,1607 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cipso_v4_domhsh_entry { + char *domain; + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* List of available DOI definitions */ +/* XXX - Updates should be minimal so having a single lock for the + * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be + * okay. */ +/* XXX - This currently assumes a minimal number of different DOIs in use, + * if in practice there are a lot of different DOIs this list should + * probably be turned into a hash table or something similar so we + * can do quick lookups. */ +DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); + +/* Label mapping cache */ +int cipso_v4_cache_enabled = 1; +int cipso_v4_cache_bucketsize = 10; +#define CIPSO_V4_CACHE_BUCKETBITS 7 +#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) +#define CIPSO_V4_CACHE_REORDERLIMIT 10 +struct cipso_v4_map_cache_bkt { + spinlock_t lock; + u32 size; + struct list_head list; +}; +struct cipso_v4_map_cache_entry { + u32 hash; + unsigned char *key; + size_t key_len; + + struct netlbl_lsm_cache lsm_data; + + u32 activity; + struct list_head list; +}; +static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; + +/* Restricted bitmap (tag #1) flags */ +int cipso_v4_rbm_optfmt = 0; +int cipso_v4_rbm_strictvalid = 1; + +/* + * Helper Functions + */ + +/** + * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit + * @bitmap: the bitmap + * @bitmap_len: length in bits + * @offset: starting offset + * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit + * + * Description: + * Starting at @offset, walk the bitmap from left to right until either the + * desired bit is found or we reach the end. Return the bit offset, -1 if + * not found, or -2 if error. + */ +static int cipso_v4_bitmap_walk(const unsigned char *bitmap, + u32 bitmap_len, + u32 offset, + u8 state) +{ + u32 bit_spot; + u32 byte_offset; + unsigned char bitmask; + unsigned char byte; + + /* gcc always rounds to zero when doing integer division */ + byte_offset = offset / 8; + byte = bitmap[byte_offset]; + bit_spot = offset; + bitmask = 0x80 >> (offset % 8); + + while (bit_spot < bitmap_len) { + if ((state && (byte & bitmask) == bitmask) || + (state == 0 && (byte & bitmask) == 0)) + return bit_spot; + + bit_spot++; + bitmask >>= 1; + if (bitmask == 0) { + byte = bitmap[++byte_offset]; + bitmask = 0x80; + } + } + + return -1; +} + +/** + * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap + * @bitmap: the bitmap + * @bit: the bit + * @state: if non-zero, set the bit (1) else clear the bit (0) + * + * Description: + * Set a single bit in the bitmask. Returns zero on success, negative values + * on error. + */ +static void cipso_v4_bitmap_setbit(unsigned char *bitmap, + u32 bit, + u8 state) +{ + u32 byte_spot; + u8 bitmask; + + /* gcc always rounds to zero when doing integer division */ + byte_spot = bit / 8; + bitmask = 0x80 >> (bit % 8); + if (state) + bitmap[byte_spot] |= bitmask; + else + bitmap[byte_spot] &= ~bitmask; +} + +/** + * cipso_v4_doi_domhsh_free - Frees a domain list entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a domain list entry can be released + * safely. + * + */ +static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) +{ + struct cipso_v4_domhsh_entry *ptr; + + ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * cipso_v4_cache_entry_free - Frees a cache entry + * @entry: the entry to free + * + * Description: + * This function frees the memory associated with a cache entry. + * + */ +static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) +{ + if (entry->lsm_data.free) + entry->lsm_data.free(entry->lsm_data.data); + kfree(entry->key); + kfree(entry); +} + +/** + * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache + * @key: the hash key + * @key_len: the length of the key in bytes + * + * Description: + * The CIPSO tag hashing function. Returns a 32-bit hash value. + * + */ +static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +/* + * Label Mapping Cache Functions + */ + +/** + * cipso_v4_cache_init - Initialize the CIPSO cache + * + * Description: + * Initializes the CIPSO label mapping cache, this function should be called + * before any of the other functions defined in this file. Returns zero on + * success, negative values on error. + * + */ +static int cipso_v4_cache_init(void) +{ + u32 iter; + + cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, + sizeof(struct cipso_v4_map_cache_bkt), + GFP_KERNEL); + if (cipso_v4_cache == NULL) + return -ENOMEM; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_init(&cipso_v4_cache[iter].lock); + cipso_v4_cache[iter].size = 0; + INIT_LIST_HEAD(&cipso_v4_cache[iter].list); + } + + return 0; +} + +/** + * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache + * + * Description: + * Invalidates and frees any entries in the CIPSO cache. Returns zero on + * success and negative values on failure. + * + */ +void cipso_v4_cache_invalidate(void) +{ + struct cipso_v4_map_cache_entry *entry, *tmp_entry; + u32 iter; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock(&cipso_v4_cache[iter].lock); + list_for_each_entry_safe(entry, + tmp_entry, + &cipso_v4_cache[iter].list, list) { + list_del(&entry->list); + cipso_v4_cache_entry_free(entry); + } + cipso_v4_cache[iter].size = 0; + spin_unlock(&cipso_v4_cache[iter].lock); + } + + return; +} + +/** + * cipso_v4_cache_check - Check the CIPSO cache for a label mapping + * @key: the buffer to check + * @key_len: buffer length in bytes + * @secattr: the security attribute struct to use + * + * Description: + * This function checks the cache to see if a label mapping already exists for + * the given key. If there is a match then the cache is adjusted and the + * @secattr struct is populated with the correct LSM security attributes. The + * cache is adjusted in the following manner if the entry is not already the + * first in the cache bucket: + * + * 1. The cache entry's activity counter is incremented + * 2. The previous (higher ranking) entry's activity counter is decremented + * 3. If the difference between the two activity counters is geater than + * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped + * + * Returns zero on success, -ENOENT for a cache miss, and other negative values + * on error. + * + */ +static int cipso_v4_cache_check(const unsigned char *key, + u32 key_len, + struct netlbl_lsm_secattr *secattr) +{ + u32 bkt; + struct cipso_v4_map_cache_entry *entry; + struct cipso_v4_map_cache_entry *prev_entry = NULL; + u32 hash; + + if (!cipso_v4_cache_enabled) + return -ENOENT; + + hash = cipso_v4_map_cache_hash(key, key_len); + bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock(&cipso_v4_cache[bkt].lock); + list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { + if (entry->hash == hash && + entry->key_len == key_len && + memcmp(entry->key, key, key_len) == 0) { + entry->activity += 1; + secattr->cache.free = entry->lsm_data.free; + secattr->cache.data = entry->lsm_data.data; + if (prev_entry == NULL) { + spin_unlock(&cipso_v4_cache[bkt].lock); + return 0; + } + + if (prev_entry->activity > 0) + prev_entry->activity -= 1; + if (entry->activity > prev_entry->activity && + entry->activity - prev_entry->activity > + CIPSO_V4_CACHE_REORDERLIMIT) { + __list_del(entry->list.prev, entry->list.next); + __list_add(&entry->list, + prev_entry->list.prev, + &prev_entry->list); + } + + spin_unlock(&cipso_v4_cache[bkt].lock); + return 0; + } + prev_entry = entry; + } + spin_unlock(&cipso_v4_cache[bkt].lock); + + return -ENOENT; +} + +/** + * cipso_v4_cache_add - Add an entry to the CIPSO cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add a new entry into the CIPSO label mapping cache. Add the new entry to + * head of the cache bucket's list, if the cache bucket is out of room remove + * the last entry in the list first. It is important to note that there is + * currently no checking for duplicate keys. Returns zero on success, + * negative values on failure. + * + */ +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 bkt; + struct cipso_v4_map_cache_entry *entry = NULL; + struct cipso_v4_map_cache_entry *old_entry = NULL; + unsigned char *cipso_ptr; + u32 cipso_ptr_len; + + if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + return 0; + + cipso_ptr = CIPSO_V4_OPTPTR(skb); + cipso_ptr_len = cipso_ptr[1]; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); + if (entry->key == NULL) { + ret_val = -ENOMEM; + goto cache_add_failure; + } + memcpy(entry->key, cipso_ptr, cipso_ptr_len); + entry->key_len = cipso_ptr_len; + entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); + entry->lsm_data.free = secattr->cache.free; + entry->lsm_data.data = secattr->cache.data; + + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock(&cipso_v4_cache[bkt].lock); + if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache[bkt].size += 1; + } else { + old_entry = list_entry(cipso_v4_cache[bkt].list.prev, + struct cipso_v4_map_cache_entry, list); + list_del(&old_entry->list); + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache_entry_free(old_entry); + } + spin_unlock(&cipso_v4_cache[bkt].lock); + + return 0; + +cache_add_failure: + if (entry) + cipso_v4_cache_entry_free(entry); + return ret_val; +} + +/* + * DOI List Functions + */ + +/** + * cipso_v4_doi_search - Searches for a DOI definition + * @doi: the DOI to search for + * + * Description: + * Search the DOI definition list for a DOI definition with a DOI value that + * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * Returns a pointer to the DOI definition on success and NULL on failure. + */ +static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) +{ + struct cipso_v4_doi *iter; + + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->doi == doi && iter->valid) + return iter; + return NULL; +} + +/** + * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine + * @doi_def: the DOI structure + * + * Description: + * The caller defines a new DOI for use by the CIPSO engine and calls this + * function to add it to the list of acceptable domains. The caller must + * ensure that the mapping table specified in @doi_def->map meets all of the + * requirements of the mapping type (see cipso_ipv4.h for details). Returns + * zero on success and non-zero on failure. + * + */ +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) + return -EINVAL; + + doi_def->valid = 1; + INIT_RCU_HEAD(&doi_def->rcu); + INIT_LIST_HEAD(&doi_def->dom_list); + + rcu_read_lock(); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_rlock; + spin_lock(&cipso_v4_doi_list_lock); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_slock; + list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; + +doi_add_failure_slock: + spin_unlock(&cipso_v4_doi_list_lock); +doi_add_failure_rlock: + rcu_read_unlock(); + return -EEXIST; +} + +/** + * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine + * @doi: the DOI value + * @callback: the DOI cleanup/free callback + * + * Description: + * Removes a DOI definition from the CIPSO engine, @callback is called to + * free any memory. The NetLabel routines will be called to release their own + * LSM domain mappings as well as our own domain list. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) +{ + struct cipso_v4_doi *doi_def; + struct cipso_v4_domhsh_entry *dom_iter; + + rcu_read_lock(); + if (cipso_v4_doi_search(doi) != NULL) { + spin_lock(&cipso_v4_doi_list_lock); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + return -ENOENT; + } + doi_def->valid = 0; + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) + if (dom_iter->valid) + netlbl_domhsh_remove(dom_iter->domain); + cipso_v4_cache_invalidate(); + rcu_read_unlock(); + + call_rcu(&doi_def->rcu, callback); + return 0; + } + rcu_read_unlock(); + + return -ENOENT; +} + +/** + * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * @doi: the DOI value + * + * Description: + * Searches for a valid DOI definition and if one is found it is returned to + * the caller. Otherwise NULL is returned. The caller must ensure that + * rcu_read_lock() is held while accessing the returned definition. + * + */ +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return cipso_v4_doi_search(doi); +} + +/** + * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff + * @headroom: the amount of headroom to allocate for the sk_buff + * + * Description: + * Dump a list of all the configured DOI values into a sk_buff. The returned + * sk_buff has room at the front of the sk_buff for @headroom bytes. See + * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This + * function may fail if another process is changing the DOI list at the same + * time. Returns a pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +{ + struct sk_buff *skb = NULL; + struct cipso_v4_doi *iter; + u32 doi_cnt = 0; + ssize_t buf_len; + + buf_len = NETLBL_LEN_U32; + rcu_read_lock(); + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->valid) { + doi_cnt += 1; + buf_len += 2 * NETLBL_LEN_U32; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto doi_dump_all_failure; + + if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) + goto doi_dump_all_failure; + buf_len -= NETLBL_LEN_U32; + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->valid) { + if (buf_len < 2 * NETLBL_LEN_U32) + goto doi_dump_all_failure; + if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) + goto doi_dump_all_failure; + if (nla_put_u32(skb, NLA_U32, iter->type) != 0) + goto doi_dump_all_failure; + buf_len -= 2 * NETLBL_LEN_U32; + } + rcu_read_unlock(); + + return skb; + +doi_dump_all_failure: + rcu_read_unlock(); + kfree(skb); + return NULL; +} + +/** + * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff + * @doi: the DOI value + * @headroom: the amount of headroom to allocate for the sk_buff + * + * Description: + * Lookup the DOI definition matching @doi and dump it's contents into a + * sk_buff. The returned sk_buff has room at the front of the sk_buff for + * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message + * format. This function may fail if another process is changing the DOI list + * at the same time. Returns a pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) +{ + struct sk_buff *skb = NULL; + struct cipso_v4_doi *iter; + u32 tag_cnt = 0; + u32 lvl_cnt = 0; + u32 cat_cnt = 0; + ssize_t buf_len; + ssize_t tmp; + + rcu_read_lock(); + iter = cipso_v4_doi_getdef(doi); + if (iter == NULL) + goto doi_dump_failure; + buf_len = NETLBL_LEN_U32; + switch (iter->type) { + case CIPSO_V4_MAP_PASS: + buf_len += NETLBL_LEN_U32; + while(tag_cnt < CIPSO_V4_TAG_MAXCNT && + iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { + tag_cnt += 1; + buf_len += NETLBL_LEN_U8; + } + break; + case CIPSO_V4_MAP_STD: + buf_len += 3 * NETLBL_LEN_U32; + while (tag_cnt < CIPSO_V4_TAG_MAXCNT && + iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { + tag_cnt += 1; + buf_len += NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) + if (iter->map.std->lvl.local[tmp] != + CIPSO_V4_INV_LVL) { + lvl_cnt += 1; + buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) + if (iter->map.std->cat.local[tmp] != + CIPSO_V4_INV_CAT) { + cat_cnt += 1; + buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; + } + break; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto doi_dump_failure; + + if (nla_put_u32(skb, NLA_U32, iter->type) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32; + if (iter != cipso_v4_doi_getdef(doi)) + goto doi_dump_failure; + switch (iter->type) { + case CIPSO_V4_MAP_PASS: + if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32; + for (tmp = 0; + tmp < CIPSO_V4_TAG_MAXCNT && + iter->tags[tmp] != CIPSO_V4_TAG_INVALID; + tmp++) { + if (buf_len < NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U8; + } + break; + case CIPSO_V4_MAP_STD: + if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) + goto doi_dump_failure; + buf_len -= 3 * NETLBL_LEN_U32; + for (tmp = 0; + tmp < CIPSO_V4_TAG_MAXCNT && + iter->tags[tmp] != CIPSO_V4_TAG_INVALID; + tmp++) { + if (buf_len < NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) + if (iter->map.std->lvl.local[tmp] != + CIPSO_V4_INV_LVL) { + if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, tmp) != 0) + goto doi_dump_failure; + if (nla_put_u8(skb, + NLA_U8, + iter->map.std->lvl.local[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; + } + for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) + if (iter->map.std->cat.local[tmp] != + CIPSO_V4_INV_CAT) { + if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) + goto doi_dump_failure; + if (nla_put_u32(skb, NLA_U32, tmp) != 0) + goto doi_dump_failure; + if (nla_put_u16(skb, + NLA_U16, + iter->map.std->cat.local[tmp]) != 0) + goto doi_dump_failure; + buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; + } + break; + } + rcu_read_unlock(); + + return skb; + +doi_dump_failure: + rcu_read_unlock(); + kfree(skb); + return NULL; +} + +/** + * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to add + * + * Description: + * Adds the @domain to the the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). This function + * does allocate memory. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + struct cipso_v4_domhsh_entry *new_dom; + + new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); + if (new_dom == NULL) + return -ENOMEM; + if (domain) { + new_dom->domain = kstrdup(domain, GFP_KERNEL); + if (new_dom->domain == NULL) { + kfree(new_dom); + return -ENOMEM; + } + } + new_dom->valid = 1; + INIT_RCU_HEAD(&new_dom->rcu); + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + kfree(new_dom->domain); + kfree(new_dom); + return -EEXIST; + } + list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to remove + * + * Description: + * Removes the @domain from the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). Returns zero + * on success and negative values on error. + * + */ +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + iter->valid = 0; + list_del_rcu(&iter->list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); + + return 0; + } + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return -ENOENT; +} + +/* + * Label Mapping Functions + */ + +/** + * cipso_v4_map_lvl_valid - Checks to see if the given level is understood + * @doi_def: the DOI definition + * @level: the level to check + * + * Description: + * Checks the given level against the given DOI definition and returns a + * negative value if the level does not have a valid mapping and a zero value + * if the level is defined by the DOI. + * + */ +static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network + * @doi_def: the DOI definition + * @host_lvl: the host MLS level + * @net_lvl: the network/CIPSO MLS level + * + * Description: + * Perform a label mapping to translate a local MLS level to the correct + * CIPSO level using the given DOI definition. Returns zero on success, + * negative values otherwise. + * + */ +static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, + u32 host_lvl, + u32 *net_lvl) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *net_lvl = host_lvl; + return 0; + case CIPSO_V4_MAP_STD: + if (host_lvl < doi_def->map.std->lvl.local_size) { + *net_lvl = doi_def->map.std->lvl.local[host_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host + * @doi_def: the DOI definition + * @net_lvl: the network/CIPSO MLS level + * @host_lvl: the host MLS level + * + * Description: + * Perform a label mapping to translate a CIPSO level to the correct local MLS + * level using the given DOI definition. Returns zero on success, negative + * values otherwise. + * + */ +static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, + u32 net_lvl, + u32 *host_lvl) +{ + struct cipso_v4_std_map_tbl *map_tbl; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *host_lvl = net_lvl; + return 0; + case CIPSO_V4_MAP_STD: + map_tbl = doi_def->map.std; + if (net_lvl < map_tbl->lvl.cipso_size && + map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { + *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid + * @doi_def: the DOI definition + * @bitmap: category bitmap + * @bitmap_len: bitmap length in bytes + * + * Description: + * Checks the given category bitmap against the given DOI definition and + * returns a negative value if any of the categories in the bitmap do not have + * a valid mapping and a zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *bitmap, + u32 bitmap_len) +{ + int cat = -1; + u32 bitmap_len_bits = bitmap_len * 8; + u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; + u32 *cipso_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + for (;;) { + cat = cipso_v4_bitmap_walk(bitmap, + bitmap_len_bits, + cat + 1, + 1); + if (cat < 0) + break; + if (cat >= cipso_cat_size || + cipso_array[cat] >= CIPSO_V4_INV_CAT) + return -EFAULT; + } + + if (cat == -1) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @host_cat: the category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * @net_cat: the zero'd out category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO bitmap using the given DOI definition. Returns the minimum + * size in bytes of the network bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, + const unsigned char *host_cat, + u32 host_cat_len, + unsigned char *net_cat, + u32 net_cat_len) +{ + int host_spot = -1; + u32 net_spot; + u32 net_spot_max = 0; + u32 host_clen_bits = host_cat_len * 8; + u32 net_clen_bits = net_cat_len * 8; + u32 host_cat_size = doi_def->map.std->cat.local_size; + u32 *host_cat_array = doi_def->map.std->cat.local; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + net_spot_max = host_cat_len - 1; + while (net_spot_max > 0 && host_cat[net_spot_max] == 0) + net_spot_max--; + if (net_spot_max > net_cat_len) + return -EINVAL; + memcpy(net_cat, host_cat, net_spot_max); + return net_spot_max; + case CIPSO_V4_MAP_STD: + for (;;) { + host_spot = cipso_v4_bitmap_walk(host_cat, + host_clen_bits, + host_spot + 1, + 1); + if (host_spot < 0) + break; + if (host_spot >= host_cat_size) + return -EPERM; + + net_spot = host_cat_array[host_spot]; + if (net_spot >= net_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(net_cat, net_spot, 1); + + if (net_spot > net_spot_max) + net_spot_max = net_spot; + } + + if (host_spot == -2) + return -EFAULT; + + if (++net_spot_max % 8) + return net_spot_max / 8 + 1; + return net_spot_max / 8; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @host_cat: the zero'd out category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * + * Description: + * Perform a label mapping to translate a CIPSO bitmap to the correct local + * MLS category bitmap using the given DOI definition. Returns the minimum + * size in bytes of the host bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + unsigned char *host_cat, + u32 host_cat_len) +{ + u32 host_spot; + u32 host_spot_max = 0; + int net_spot = -1; + u32 net_clen_bits = net_cat_len * 8; + u32 host_clen_bits = host_cat_len * 8; + u32 net_cat_size = doi_def->map.std->cat.cipso_size; + u32 *net_cat_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + if (net_cat_len > host_cat_len) + return -EINVAL; + memcpy(host_cat, net_cat, net_cat_len); + return net_cat_len; + case CIPSO_V4_MAP_STD: + for (;;) { + net_spot = cipso_v4_bitmap_walk(net_cat, + net_clen_bits, + net_spot + 1, + 1); + if (net_spot < 0) + break; + if (net_spot >= net_cat_size || + net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) + return -EPERM; + + host_spot = net_cat_array[net_spot]; + if (host_spot >= host_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(host_cat, host_spot, 1); + + if (host_spot > host_spot_max) + host_spot_max = host_spot; + } + + if (net_spot == -2) + return -EFAULT; + + if (++host_spot_max % 8) + return host_spot_max / 8 + 1; + return host_spot_max / 8; + } + + return -EINVAL; +} + +/* + * Protocol Handling Functions + */ + +#define CIPSO_V4_HDR_LEN 6 + +/** + * cipso_v4_gentag_hdr - Generate a CIPSO option header + * @doi_def: the DOI definition + * @len: the total tag length in bytes + * @buf: the CIPSO option buffer + * + * Description: + * Write a CIPSO header into the beginning of @buffer. Return zero on success, + * negative values on failure. + * + */ +static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, + u32 len, + unsigned char *buf) +{ + if (CIPSO_V4_HDR_LEN + len > 40) + return -ENOSPC; + + buf[0] = IPOPT_CIPSO; + buf[1] = CIPSO_V4_HDR_LEN + len; + *(u32 *)&buf[2] = htonl(doi_def->doi); + + return 0; +} + +#define CIPSO_V4_TAG1_CAT_LEN 30 + +/** + * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The + * actual buffer length may be larger than the indicated size due to + * translation between host and network category bitmaps. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char **buffer, + u32 *buffer_len) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 level; + + if (secattr->mls_cat) { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, + GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_hton(doi_def, + secattr->mls_cat, + secattr->mls_cat_len, + &buf[CIPSO_V4_HDR_LEN + 4], + CIPSO_V4_TAG1_CAT_LEN); + if (ret_val < 0) + goto gentag_failure; + + /* This will send packets using the "optimized" format when + * possibile as specified in section 3.4.2.6 of the + * CIPSO draft. */ + if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) + ret_val = 10; + + buf_len = 4 + ret_val; + } else { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + buf_len = 4; + } + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + goto gentag_failure; + + ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); + if (ret_val != 0) + goto gentag_failure; + + buf[CIPSO_V4_HDR_LEN] = 0x01; + buf[CIPSO_V4_HDR_LEN + 1] = buf_len; + buf[CIPSO_V4_HDR_LEN + 3] = level; + + *buffer = buf; + *buffer_len = CIPSO_V4_HDR_LEN + buf_len; + + return 0; + +gentag_failure: + kfree(buf); + return ret_val; +} + +/** + * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; + + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); + if (ret_val != 0) + return ret_val; + secattr->mls_lvl = level; + secattr->mls_lvl_vld = 1; + + if (tag_len > 4) { + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + secattr->mls_cat_len = tag_len - 4; + break; + case CIPSO_V4_MAP_STD: + secattr->mls_cat_len = + doi_def->map.std->cat.local_size; + break; + } + secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); + if (secattr->mls_cat == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr->mls_cat, + secattr->mls_cat_len); + if (ret_val < 0) { + kfree(secattr->mls_cat); + return ret_val; + } + secattr->mls_cat_len = ret_val; + } + + return 0; +} + +/** + * cipso_v4_validate - Validate a CIPSO option + * @option: the start of the option, on error it is set to point to the error + * + * Description: + * This routine is called to validate a CIPSO option, it checks all of the + * fields to ensure that they are at least valid, see the draft snippet below + * for details. If the option is valid then a zero value is returned and + * the value of @option is unchanged. If the option is invalid then a + * non-zero value is returned and @option is adjusted to point to the + * offending portion of the option. From the IETF draft ... + * + * "If any field within the CIPSO options, such as the DOI identifier, is not + * recognized the IP datagram is discarded and an ICMP 'parameter problem' + * (type 12) is generated and returned. The ICMP code field is set to 'bad + * parameter' (code 0) and the pointer is set to the start of the CIPSO field + * that is unrecognized." + * + */ +int cipso_v4_validate(unsigned char **option) +{ + unsigned char *opt = *option; + unsigned char *tag; + unsigned char opt_iter; + unsigned char err_offset = 0; + u8 opt_len; + u8 tag_len; + struct cipso_v4_doi *doi_def = NULL; + u32 tag_iter; + + /* caller already checks for length values that are too large */ + opt_len = opt[1]; + if (opt_len < 8) { + err_offset = 1; + goto validate_return; + } + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); + if (doi_def == NULL) { + err_offset = 2; + goto validate_return_locked; + } + + opt_iter = 6; + tag = opt + opt_iter; + while (opt_iter < opt_len) { + for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) + if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || + ++tag_iter == CIPSO_V4_TAG_MAXCNT) { + err_offset = opt_iter; + goto validate_return_locked; + } + + tag_len = tag[1]; + if (tag_len > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + switch (tag[0]) { + case CIPSO_V4_TAG_RBITMAP: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + /* We are already going to do all the verification + * necessary at the socket layer so from our point of + * view it is safe to turn these checks off (and less + * work), however, the CIPSO draft says we should do + * all the CIPSO validations here but it doesn't + * really specify _exactly_ what we need to validate + * ... so, just make it a sysctl tunable. */ + if (cipso_v4_rbm_strictvalid) { + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_rbm_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + } + break; + default: + err_offset = opt_iter; + goto validate_return_locked; + } + + tag += tag_len; + opt_iter += tag_len; + } + +validate_return_locked: + rcu_read_unlock(); +validate_return: + *option = opt + err_offset; + return err_offset; +} + +/** + * cipso_v4_error - Send the correct reponse for a bad packet + * @skb: the packet + * @error: the error code + * @gateway: CIPSO gateway flag + * + * Description: + * Based on the error code given in @error, send an ICMP error message back to + * the originating host. From the IETF draft ... + * + * "If the contents of the CIPSO [option] are valid but the security label is + * outside of the configured host or port label range, the datagram is + * discarded and an ICMP 'destination unreachable' (type 3) is generated and + * returned. The code field of the ICMP is set to 'communication with + * destination network administratively prohibited' (code 9) or to + * 'communication with destination host administratively prohibited' + * (code 10). The value of the code is dependent on whether the originator + * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The + * recipient of the ICMP message MUST be able to handle either value. The + * same procedure is performed if a CIPSO [option] can not be added to an + * IP packet because it is too large to fit in the IP options area." + * + * "If the error is triggered by receipt of an ICMP message, the message is + * discarded and no response is permitted (consistent with general ICMP + * processing rules)." + * + */ +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) +{ + if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) + return; + + if (gateway) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); +} + +/** + * cipso_v4_socket_setattr - Add a CIPSO option to a socket + * @sock: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sock->sk, which means it either needs to be in the + * process of being created or locked via lock_sock(sock->sk). Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 iter; + unsigned char *buf = NULL; + u32 buf_len = 0; + u32 opt_len; + struct ip_options *opt = NULL; + struct sock *sk; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + sk = sock->sk; + if (sk == NULL) + return 0; + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ + iter = 0; + do { + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_gentag_rbm(doi_def, + secattr, + &buf, + &buf_len); + break; + default: + ret_val = -EPERM; + goto socket_setattr_failure; + } + + iter++; + } while (ret_val != 0 && + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); + if (ret_val != 0) + goto socket_setattr_failure; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we can't block here. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->is_data = 1; + kfree(buf); + buf = NULL; + ret_val = ip_options_compile(opt, NULL); + if (ret_val != 0) + goto socket_setattr_failure; + + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += opt->optlen; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + opt = xchg(&sk_inet->opt, opt); + kfree(opt); + + return 0; + +socket_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + struct sock *sk; + struct inet_sock *sk_inet; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + sk = sock->sk; + lock_sock(sk); + sk_inet = inet_sk(sk); + if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) + goto socket_getattr_return; + cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - + sizeof(struct iphdr); + ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); + if (ret_val == 0) + goto socket_getattr_return; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + rcu_read_unlock(); + goto socket_getattr_return; + } + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + rcu_read_unlock(); + +socket_getattr_return: + release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Parse the given packet's CIPSO option and return the security attributes. + * Returns zero on success and negative values on failure. + * + */ +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + if (!CIPSO_V4_OPTEXIST(skb)) + return -ENOMSG; + cipso_ptr = CIPSO_V4_OPTPTR(skb); + if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) + return 0; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) + goto skbuff_getattr_return; + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + +skbuff_getattr_return: + rcu_read_unlock(); + return ret_val; +} + +/* + * Setup Functions + */ + +/** + * cipso_v4_init - Initialize the CIPSO module + * + * Description: + * Initialize the CIPSO module and prepare it for use. Returns zero on success + * and negative values on failure. + * + */ +static int __init cipso_v4_init(void) +{ + int ret_val; + + ret_val = cipso_v4_cache_init(); + if (ret_val != 0) + panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", + ret_val); + + return 0; +} + +subsys_initcall(cipso_v4_init); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70cea9d08a38..19b2071ff319 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -17,6 +17,7 @@ #include #include #include +#include /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -697,6 +698,40 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#ifdef CONFIG_NETLABEL + { + .ctl_name = NET_CIPSOV4_CACHE_ENABLE, + .procname = "cipso_cache_enable", + .data = &cipso_v4_cache_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, + .procname = "cipso_cache_bucket_size", + .data = &cipso_v4_cache_bucketsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_OPTFMT, + .procname = "cipso_rbm_optfmt", + .data = &cipso_v4_rbm_optfmt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, + .procname = "cipso_rbm_strictvalid", + .data = &cipso_v4_rbm_strictvalid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* CONFIG_NETLABEL */ { .ctl_name = 0 } }; From d15c345fe3b8dfda0fa5a1d2143a35fffa746a43 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:48:37 -0700 Subject: [PATCH 0391/1063] [NetLabel]: core NetLabel subsystem Add a new kernel subsystem, NetLabel, to provide explicit packet labeling services (CIPSO, RIPSO, etc.) to LSM developers. NetLabel is designed to work in conjunction with a LSM to intercept and decode security labels on incoming network packets as well as ensure that outgoing network packets are labeled according to the security mechanism employed by the LSM. The NetLabel subsystem is configured through a Generic NETLINK interface described in the header files included in this patch. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/Makefile | 1 + net/netlabel/Kconfig | 14 + net/netlabel/Makefile | 16 + net/netlabel/netlabel_cipso_v4.h | 217 ++++++++++ net/netlabel/netlabel_domainhash.c | 513 ++++++++++++++++++++++++ net/netlabel/netlabel_domainhash.h | 63 +++ net/netlabel/netlabel_kapi.c | 231 +++++++++++ net/netlabel/netlabel_mgmt.c | 624 +++++++++++++++++++++++++++++ net/netlabel/netlabel_mgmt.h | 246 ++++++++++++ net/netlabel/netlabel_unlabeled.h | 98 +++++ net/netlabel/netlabel_user.c | 158 ++++++++ net/netlabel/netlabel_user.h | 214 ++++++++++ 12 files changed, 2395 insertions(+) create mode 100644 net/netlabel/Kconfig create mode 100644 net/netlabel/Makefile create mode 100644 net/netlabel/netlabel_cipso_v4.h create mode 100644 net/netlabel/netlabel_domainhash.c create mode 100644 net/netlabel/netlabel_domainhash.h create mode 100644 net/netlabel/netlabel_kapi.c create mode 100644 net/netlabel/netlabel_mgmt.c create mode 100644 net/netlabel/netlabel_mgmt.h create mode 100644 net/netlabel/netlabel_unlabeled.h create mode 100644 net/netlabel/netlabel_user.c create mode 100644 net/netlabel/netlabel_user.h diff --git a/net/Makefile b/net/Makefile index 065796f5fb17..ad4d14f4bb29 100644 --- a/net/Makefile +++ b/net/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ +obj-$(CONFIG_NETLABEL) += netlabel/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig new file mode 100644 index 000000000000..fe23cb7f1e87 --- /dev/null +++ b/net/netlabel/Kconfig @@ -0,0 +1,14 @@ +# +# NetLabel configuration +# + +config NETLABEL + bool "NetLabel subsystem support" + depends on NET && SECURITY + default n + ---help--- + NetLabel provides support for explicit network packet labeling + protocols such as CIPSO and RIPSO. For more information see + Documentation/netlabel. + + If you are unsure, say N. diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile new file mode 100644 index 000000000000..8af18c0a47d9 --- /dev/null +++ b/net/netlabel/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the NetLabel subsystem. +# +# Feb 9, 2006, Paul Moore +# + +# base objects +obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o + +# management objects +obj-y += netlabel_mgmt.o + +# protocol modules +obj-y += netlabel_unlabeled.o +obj-y += netlabel_cipso_v4.o + diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h new file mode 100644 index 000000000000..4c6ff4b93004 --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.h @@ -0,0 +1,217 @@ +/* + * NetLabel CIPSO/IPv4 Support + * + * This file defines the CIPSO/IPv4 functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_CIPSO_V4 +#define _NETLABEL_CIPSO_V4 + +#include + +/* + * The following NetLabel payloads are supported by the CIPSO subsystem, all + * of which are preceeded by the nlmsghdr struct. + * + * o ACK: + * Sent by the kernel in response to an applications message, applications + * should never send this message. + * + * +----------------------+-----------------------+ + * | seq number (32 bits) | return code (32 bits) | + * +----------------------+-----------------------+ + * + * seq number: the sequence number of the original message, taken from the + * nlmsghdr structure + * return code: return value, based on errno values + * + * o ADD: + * Sent by an application to add a new DOI mapping table, after completion + * of the task the kernel should ACK this message. + * + * +---------------+--------------------+---------------------+ + * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ... + * +---------------+--------------------+---------------------+ + * + * +-----------------+ + * | tag #X (8 bits) | ... repeated + * +-----------------+ + * + * +-------------- ---- --- -- - + * | mapping data + * +-------------- ---- --- -- - + * + * DOI: the DOI value + * map type: the mapping table type (defined in the cipso_ipv4.h header + * as CIPSO_V4_MAP_*) + * tag count: the number of tags, must be greater than zero + * tag: the CIPSO tag for the DOI, tags listed first are given + * higher priorirty when sending packets + * mapping data: specific to the map type (see below) + * + * CIPSO_V4_MAP_STD + * + * +------------------+-----------------------+----------------------+ + * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ... + * +------------------+-----------------------+----------------------+ + * + * +----------------------+---------------------+---------------------+ + * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ... + * +----------------------+---------------------+---------------------+ + * + * +--------------------------+-------------------------+ + * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated + * +--------------------------+-------------------------+ + * + * +-----------------------------+-----------------------------+ + * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated + * +-----------------------------+-----------------------------+ + * + * levels: the number of level mappings + * max l level: the highest local level + * max r level: the highest remote/CIPSO level + * categories: the number of category mappings + * max l cat: the highest local category + * max r cat: the highest remote/CIPSO category + * local level: the local part of a level mapping + * CIPSO level: the remote/CIPSO part of a level mapping + * local category: the local part of a category mapping + * CIPSO category: the remote/CIPSO part of a category mapping + * + * CIPSO_V4_MAP_PASS + * + * No mapping data is needed for this map type. + * + * o REMOVE: + * Sent by an application to remove a specific DOI mapping table from the + * CIPSO V4 system. The kernel should ACK this message. + * + * +---------------+ + * | DOI (32 bits) | + * +---------------+ + * + * DOI: the DOI value + * + * o LIST: + * Sent by an application to list the details of a DOI definition. The + * kernel should send an ACK on error or a response as indicated below. The + * application generated message format is shown below. + * + * +---------------+ + * | DOI (32 bits) | + * +---------------+ + * + * DOI: the DOI value + * + * The valid response message format depends on the type of the DOI mapping, + * the known formats are shown below. + * + * +--------------------+ + * | map type (32 bits) | ... + * +--------------------+ + * + * map type: the DOI mapping table type (defined in the cipso_ipv4.h + * header as CIPSO_V4_MAP_*) + * + * (map type == CIPSO_V4_MAP_STD) + * + * +----------------+------------------+----------------------+ + * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ... + * +----------------+------------------+----------------------+ + * + * +-----------------+ + * | tag #X (8 bits) | ... repeated + * +-----------------+ + * + * +--------------------------+-------------------------+ + * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated + * +--------------------------+-------------------------+ + * + * +-----------------------------+-----------------------------+ + * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated + * +-----------------------------+-----------------------------+ + * + * tags: the number of CIPSO tag types + * levels: the number of level mappings + * categories: the number of category mappings + * tag: the tag number, tags listed first are given higher + * priority when sending packets + * local level: the local part of a level mapping + * CIPSO level: the remote/CIPSO part of a level mapping + * local category: the local part of a category mapping + * CIPSO category: the remote/CIPSO part of a category mapping + * + * (map type == CIPSO_V4_MAP_PASS) + * + * +----------------+ + * | tags (32 bits) | ... + * +----------------+ + * + * +-----------------+ + * | tag #X (8 bits) | ... repeated + * +-----------------+ + * + * tags: the number of CIPSO tag types + * tag: the tag number, tags listed first are given higher + * priority when sending packets + * + * o LISTALL: + * This message is sent by an application to list the valid DOIs on the + * system. There is no payload and the kernel should respond with an ACK + * or the following message. + * + * +---------------------+------------------+-----------------------+ + * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) | + * +---------------------+------------------+-----------------------+ + * + * +-----------------------+ + * | map type #X (32 bits) | ... + * +-----------------------+ + * + * DOI count: the number of DOIs + * DOI: the DOI value + * map type: the DOI mapping table type (defined in the cipso_ipv4.h + * header as CIPSO_V4_MAP_*) + * + */ + +/* NetLabel CIPSOv4 commands */ +enum { + NLBL_CIPSOV4_C_UNSPEC, + NLBL_CIPSOV4_C_ACK, + NLBL_CIPSOV4_C_ADD, + NLBL_CIPSOV4_C_REMOVE, + NLBL_CIPSOV4_C_LIST, + NLBL_CIPSOV4_C_LISTALL, + __NLBL_CIPSOV4_C_MAX, +}; +#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_cipsov4_genl_init(void); + +#endif diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c new file mode 100644 index 000000000000..5bb3fad4a115 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.c @@ -0,0 +1,513 @@ +/* + * NetLabel Domain Hash Table + * + * This file manages the domain hash table that NetLabel uses to determine + * which network labeling protocol to use for a given domain. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netlabel_mgmt.h" +#include "netlabel_domainhash.h" + +struct netlbl_domhsh_tbl { + struct list_head *tbl; + u32 size; +}; + +/* Domain hash table */ +/* XXX - updates should be so rare that having one spinlock for the entire + * hash table should be okay */ +DEFINE_SPINLOCK(netlbl_domhsh_lock); +static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; + +/* Default domain mapping */ +DEFINE_SPINLOCK(netlbl_domhsh_def_lock); +static struct netlbl_dom_map *netlbl_domhsh_def = NULL; + +/* + * Domain Hash Table Helper Functions + */ + +/** + * netlbl_domhsh_free_entry - Frees a domain hash table entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a hash table entry can be released + * safely. + * + */ +static void netlbl_domhsh_free_entry(struct rcu_head *entry) +{ + struct netlbl_dom_map *ptr; + + ptr = container_of(entry, struct netlbl_dom_map, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * netlbl_domhsh_hash - Hashing function for the domain hash table + * @domain: the domain name to hash + * + * Description: + * This is the hashing function for the domain hash table, it returns the + * correct bucket number for the domain. The caller is responsibile for + * calling the rcu_read_[un]lock() functions. + * + */ +static u32 netlbl_domhsh_hash(const char *key) +{ + u32 iter; + u32 val; + u32 len; + + /* This is taken (with slight modification) from + * security/selinux/ss/symtab.c:symhash() */ + + for (iter = 0, val = 0, len = strlen(key); iter < len; iter++) + val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter]; + return val & (rcu_dereference(netlbl_domhsh)->size - 1); +} + +/** + * netlbl_domhsh_search - Search for a domain entry + * @domain: the domain + * @def: return default if no match is found + * + * Description: + * Searches the domain hash table and returns a pointer to the hash table + * entry if found, otherwise NULL is returned. If @def is non-zero and a + * match is not found in the domain hash table the default mapping is returned + * if it exists. The caller is responsibile for the rcu hash table locks + * (i.e. the caller much call rcu_read_[un]lock()). + * + */ +static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) +{ + u32 bkt; + struct netlbl_dom_map *iter; + + if (domain != NULL) { + bkt = netlbl_domhsh_hash(domain); + list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) + if (iter->valid && strcmp(iter->domain, domain) == 0) + return iter; + } + + if (def != 0) { + iter = rcu_dereference(netlbl_domhsh_def); + if (iter != NULL && iter->valid) + return iter; + } + + return NULL; +} + +/* + * Domain Hash Table Functions + */ + +/** + * netlbl_domhsh_init - Init for the domain hash + * @size: the number of bits to use for the hash buckets + * + * Description: + * Initializes the domain hash table, should be called only by + * netlbl_user_init() during initialization. Returns zero on success, non-zero + * values on error. + * + */ +int netlbl_domhsh_init(u32 size) +{ + u32 iter; + struct netlbl_domhsh_tbl *hsh_tbl; + + if (size == 0) + return -EINVAL; + + hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); + if (hsh_tbl == NULL) + return -ENOMEM; + hsh_tbl->size = 1 << size; + hsh_tbl->tbl = kcalloc(hsh_tbl->size, + sizeof(struct list_head), + GFP_KERNEL); + if (hsh_tbl->tbl == NULL) { + kfree(hsh_tbl); + return -ENOMEM; + } + for (iter = 0; iter < hsh_tbl->size; iter++) + INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); + + rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); + rcu_assign_pointer(netlbl_domhsh, hsh_tbl); + spin_unlock(&netlbl_domhsh_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * netlbl_domhsh_add - Adds a entry to the domain hash table + * @entry: the entry to add + * + * Description: + * Adds a new entry to the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_add(struct netlbl_dom_map *entry) +{ + int ret_val; + u32 bkt; + + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = 0; + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, + entry->domain); + break; + default: + return -EINVAL; + } + if (ret_val != 0) + return ret_val; + + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + ret_val = 0; + rcu_read_lock(); + if (entry->domain != NULL) { + bkt = netlbl_domhsh_hash(entry->domain); + spin_lock(&netlbl_domhsh_lock); + if (netlbl_domhsh_search(entry->domain, 0) == NULL) + list_add_tail_rcu(&entry->list, + &netlbl_domhsh->tbl[bkt]); + else + ret_val = -EEXIST; + spin_unlock(&netlbl_domhsh_lock); + } else if (entry->domain == NULL) { + INIT_LIST_HEAD(&entry->list); + spin_lock(&netlbl_domhsh_def_lock); + if (rcu_dereference(netlbl_domhsh_def) == NULL) + rcu_assign_pointer(netlbl_domhsh_def, entry); + else + ret_val = -EEXIST; + spin_unlock(&netlbl_domhsh_def_lock); + } else + ret_val = -EINVAL; + rcu_read_unlock(); + + if (ret_val != 0) { + switch (entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, + entry->domain) != 0) + BUG(); + break; + } + } + + return ret_val; +} + +/** + * netlbl_domhsh_add_default - Adds the default entry to the domain hash table + * @entry: the entry to add + * + * Description: + * Adds a new default entry to the domain hash table and handles any updates + * to the lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) +{ + return netlbl_domhsh_add(entry); +} + +/** + * netlbl_domhsh_remove - Removes an entry from the domain hash table + * @domain: the domain to remove + * + * Description: + * Removes an entry from the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_remove(const char *domain) +{ + int ret_val = -ENOENT; + struct netlbl_dom_map *entry; + + rcu_read_lock(); + if (domain != NULL) + entry = netlbl_domhsh_search(domain, 0); + else + entry = netlbl_domhsh_search(domain, 1); + if (entry == NULL) + goto remove_return; + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, + entry->domain); + if (ret_val != 0) + goto remove_return; + break; + } + ret_val = 0; + if (entry != rcu_dereference(netlbl_domhsh_def)) { + spin_lock(&netlbl_domhsh_lock); + if (entry->valid) { + entry->valid = 0; + list_del_rcu(&entry->list); + } else + ret_val = -ENOENT; + spin_unlock(&netlbl_domhsh_lock); + } else { + spin_lock(&netlbl_domhsh_def_lock); + if (entry->valid) { + entry->valid = 0; + rcu_assign_pointer(netlbl_domhsh_def, NULL); + } else + ret_val = -ENOENT; + spin_unlock(&netlbl_domhsh_def_lock); + } + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + +remove_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_domhsh_remove_default - Removes the default entry from the table + * + * Description: + * Removes/resets the default entry for the domain hash table and handles any + * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on + * success, non-zero on failure. + * + */ +int netlbl_domhsh_remove_default(void) +{ + return netlbl_domhsh_remove(NULL); +} + +/** + * netlbl_domhsh_getentry - Get an entry from the domain hash table + * @domain: the domain name to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain, + * return a pointer to a copy of the entry or NULL. The caller is responsibile + * for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) +{ + return netlbl_domhsh_search(domain, 1); +} + +/** + * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff + * + * Description: + * Dump the domain hash table into a buffer suitable for returning to an + * application in response to a NetLabel management DOMAIN message. This + * function may fail if another process is growing the hash table at the same + * time. The returned sk_buff has room at the front of the sk_buff for + * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a + * pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *netlbl_domhsh_dump(size_t headroom) +{ + struct sk_buff *skb = NULL; + ssize_t buf_len; + u32 bkt_iter; + u32 dom_cnt = 0; + struct netlbl_domhsh_tbl *hsh_tbl; + struct netlbl_dom_map *list_iter; + ssize_t tmp_len; + + buf_len = NETLBL_LEN_U32; + rcu_read_lock(); + hsh_tbl = rcu_dereference(netlbl_domhsh); + for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) + list_for_each_entry_rcu(list_iter, + &hsh_tbl->tbl[bkt_iter], list) { + buf_len += NETLBL_LEN_U32 + + nla_total_size(strlen(list_iter->domain) + 1); + switch (list_iter->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + buf_len += 2 * NETLBL_LEN_U32; + break; + } + dom_cnt++; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto dump_failure; + + if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0) + goto dump_failure; + buf_len -= NETLBL_LEN_U32; + hsh_tbl = rcu_dereference(netlbl_domhsh); + for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) + list_for_each_entry_rcu(list_iter, + &hsh_tbl->tbl[bkt_iter], list) { + tmp_len = nla_total_size(strlen(list_iter->domain) + + 1); + if (buf_len < NETLBL_LEN_U32 + tmp_len) + goto dump_failure; + if (nla_put_string(skb, + NLA_STRING, + list_iter->domain) != 0) + goto dump_failure; + if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0) + goto dump_failure; + buf_len -= NETLBL_LEN_U32 + tmp_len; + switch (list_iter->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + if (buf_len < 2 * NETLBL_LEN_U32) + goto dump_failure; + if (nla_put_u32(skb, + NLA_U32, + list_iter->type_def.cipsov4->type) != 0) + goto dump_failure; + if (nla_put_u32(skb, + NLA_U32, + list_iter->type_def.cipsov4->doi) != 0) + goto dump_failure; + buf_len -= 2 * NETLBL_LEN_U32; + break; + } + } + rcu_read_unlock(); + + return skb; + +dump_failure: + rcu_read_unlock(); + kfree_skb(skb); + return NULL; +} + +/** + * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff + * + * Description: + * Dump the default domain mapping into a buffer suitable for returning to an + * application in response to a NetLabel management DEFDOMAIN message. This + * function may fail if another process is changing the default domain mapping + * at the same time. The returned sk_buff has room at the front of the + * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message + * format. Returns a pointer to a sk_buff on success, NULL on error. + * + */ +struct sk_buff *netlbl_domhsh_dump_default(size_t headroom) +{ + struct sk_buff *skb; + ssize_t buf_len; + struct netlbl_dom_map *entry; + + buf_len = NETLBL_LEN_U32; + rcu_read_lock(); + entry = rcu_dereference(netlbl_domhsh_def); + if (entry != NULL) + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + buf_len += 2 * NETLBL_LEN_U32; + break; + } + + skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); + if (skb == NULL) + goto dump_default_failure; + + if (entry != rcu_dereference(netlbl_domhsh_def)) + goto dump_default_failure; + if (entry != NULL) { + if (nla_put_u32(skb, NLA_U32, entry->type) != 0) + goto dump_default_failure; + buf_len -= NETLBL_LEN_U32; + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + break; + case NETLBL_NLTYPE_CIPSOV4: + if (buf_len < 2 * NETLBL_LEN_U32) + goto dump_default_failure; + if (nla_put_u32(skb, + NLA_U32, + entry->type_def.cipsov4->type) != 0) + goto dump_default_failure; + if (nla_put_u32(skb, + NLA_U32, + entry->type_def.cipsov4->doi) != 0) + goto dump_default_failure; + buf_len -= 2 * NETLBL_LEN_U32; + break; + } + } else + nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE); + rcu_read_unlock(); + + return skb; + +dump_default_failure: + rcu_read_unlock(); + kfree_skb(skb); + return NULL; +} diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h new file mode 100644 index 000000000000..9217863ce0d3 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.h @@ -0,0 +1,63 @@ +/* + * NetLabel Domain Hash Table + * + * This file manages the domain hash table that NetLabel uses to determine + * which network labeling protocol to use for a given domain. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_DOMAINHASH_H +#define _NETLABEL_DOMAINHASH_H + +/* Domain hash table size */ +/* XXX - currently this number is an uneducated guess */ +#define NETLBL_DOMHSH_BITSIZE 7 + +/* Domain mapping definition struct */ +struct netlbl_dom_map { + char *domain; + u32 type; + union { + struct cipso_v4_doi *cipsov4; + } type_def; + + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* init function */ +int netlbl_domhsh_init(u32 size); + +/* Manipulate the domain hash table */ +int netlbl_domhsh_add(struct netlbl_dom_map *entry); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); +int netlbl_domhsh_remove_default(void); +struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); +struct sk_buff *netlbl_domhsh_dump(size_t headroom); +struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); + +#endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c new file mode 100644 index 000000000000..0fd8aaafe23f --- /dev/null +++ b/net/netlabel/netlabel_kapi.c @@ -0,0 +1,231 @@ +/* + * NetLabel Kernel API + * + * This file defines the kernel API for the NetLabel system. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include "netlabel_domainhash.h" +#include "netlabel_unlabeled.h" +#include "netlabel_user.h" + +/* + * LSM Functions + */ + +/** + * netlbl_socket_setattr - Label a socket using the correct protocol + * @sock: the socket to label + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given socket using the security attributes + * specified in @secattr. This function requires exclusive access to + * @sock->sk, which means it either needs to be in the process of being + * created or locked via lock_sock(sock->sk). Returns zero on success, + * negative values on failure. + * + */ +int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOENT; + struct netlbl_dom_map *dom_entry; + + rcu_read_lock(); + dom_entry = netlbl_domhsh_getentry(secattr->domain); + if (dom_entry == NULL) + goto socket_setattr_return; + switch (dom_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_socket_setattr(sock, + dom_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + +socket_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_socket_getattr - Determine the security attributes of a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Examines the given socket to see any NetLabel style labeling has been + * applied to the socket, if so it parses the socket label and returns the + * security attributes in @secattr. Returns zero on success, negative values + * on failure. + * + */ +int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_socket_getattr(sock, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** + * netlbl_skbuff_getattr - Determine the security attributes of a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Examines the given packet to see if a recognized form of packet labeling + * is present, if so it parses the packet label and returns the security + * attributes in @secattr. Returns zero on success, negative values on + * failure. + * + */ +int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_skbuff_getattr(skb, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + +/** + * netlbl_skbuff_err - Handle a LSM error on a sk_buff + * @skb: the packet + * @error: the error code + * + * Description: + * Deal with a LSM problem when handling the packet in @skb, typically this is + * a permission denied problem (-EACCES). The correct action is determined + * according to the packet's labeling protocol. + * + */ +void netlbl_skbuff_err(struct sk_buff *skb, int error) +{ + if (CIPSO_V4_OPTEXIST(skb)) + cipso_v4_error(skb, error, 0); +} + +/** + * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches + * + * Description: + * For all of the NetLabel protocols that support some form of label mapping + * cache, invalidate the cache. Returns zero on success, negative values on + * error. + * + */ +void netlbl_cache_invalidate(void) +{ + cipso_v4_cache_invalidate(); +} + +/** + * netlbl_cache_add - Add an entry to a NetLabel protocol cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add the LSM security attributes for the given packet to the underlying + * NetLabel protocol's label mapping cache. Returns zero on success, negative + * values on error. + * + */ +int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + if (secattr->cache.data == NULL) + return -ENOMSG; + + if (CIPSO_V4_OPTEXIST(skb)) + return cipso_v4_cache_add(skb, secattr); + + return -ENOMSG; +} + +/* + * Setup Functions + */ + +/** + * netlbl_init - Initialize NetLabel + * + * Description: + * Perform the required NetLabel initialization before first use. + * + */ +static int __init netlbl_init(void) +{ + int ret_val; + + printk(KERN_INFO "NetLabel: Initializing\n"); + printk(KERN_INFO "NetLabel: domain hash size = %u\n", + (1 << NETLBL_DOMHSH_BITSIZE)); + printk(KERN_INFO "NetLabel: protocols =" + " UNLABELED" + " CIPSOv4" + "\n"); + + ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE); + if (ret_val != 0) + goto init_failure; + + ret_val = netlbl_netlink_init(); + if (ret_val != 0) + goto init_failure; + + ret_val = netlbl_unlabel_defconf(); + if (ret_val != 0) + goto init_failure; + printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n"); + + return 0; + +init_failure: + panic("NetLabel: failed to initialize properly (%d)\n", ret_val); +} + +subsys_initcall(netlbl_init); diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c new file mode 100644 index 000000000000..85bc11a1fc46 --- /dev/null +++ b/net/netlabel/netlabel_mgmt.c @@ -0,0 +1,624 @@ +/* + * NetLabel Management Support + * + * This file defines the management functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netlabel_domainhash.h" +#include "netlabel_user.h" +#include "netlabel_mgmt.h" + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_mgmt_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = 0, +}; + + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_mgmt_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADD message and add the domains from the message + * to the hash table. See netlabel.h for a description of the message format. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); + int msg_len = netlbl_netlink_payload_len(skb); + u32 count; + struct netlbl_dom_map *entry = NULL; + u32 iter; + u32 tmp_val; + int tmp_size; + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto add_failure; + + if (msg_len < NETLBL_LEN_U32) + goto add_failure; + count = netlbl_getinc_u32(&msg_ptr, &msg_len); + + for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) { + if (msg_len <= 0) { + ret_val = -EINVAL; + goto add_failure; + } + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + tmp_size = nla_len(msg_ptr); + if (tmp_size <= 0 || tmp_size > msg_len) { + ret_val = -EINVAL; + goto add_failure; + } + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + nla_strlcpy(entry->domain, msg_ptr, tmp_size); + entry->domain[tmp_size - 1] = '\0'; + msg_ptr = nla_next(msg_ptr, &msg_len); + + if (msg_len < NETLBL_LEN_U32) { + ret_val = -EINVAL; + goto add_failure; + } + tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); + entry->type = tmp_val; + switch (tmp_val) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = netlbl_domhsh_add(entry); + break; + case NETLBL_NLTYPE_CIPSOV4: + if (msg_len < NETLBL_LEN_U32) { + ret_val = -EINVAL; + goto add_failure; + } + tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); + /* We should be holding a rcu_read_lock() here + * while we hold the result but since the entry + * will always be deleted when the CIPSO DOI + * is deleted we aren't going to keep the lock. */ + rcu_read_lock(); + entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (entry->type_def.cipsov4 == NULL) { + rcu_read_unlock(); + ret_val = -EINVAL; + goto add_failure; + } + ret_val = netlbl_domhsh_add(entry); + rcu_read_unlock(); + break; + default: + ret_val = -EINVAL; + } + if (ret_val != 0) + goto add_failure; + } + + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + NETLBL_E_OK); + return 0; + +add_failure: + if (entry) + kfree(entry->domain); + kfree(entry); + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_remove - Handle a REMOVE message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVE message and remove the specified domain + * mappings. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); + int msg_len = netlbl_netlink_payload_len(skb); + u32 count; + u32 iter; + int tmp_size; + unsigned char *domain; + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto remove_return; + + if (msg_len < NETLBL_LEN_U32) + goto remove_return; + count = netlbl_getinc_u32(&msg_ptr, &msg_len); + + for (iter = 0; iter < count && msg_len > 0; iter++) { + if (msg_len <= 0) { + ret_val = -EINVAL; + goto remove_return; + } + tmp_size = nla_len(msg_ptr); + domain = nla_data(msg_ptr); + if (tmp_size <= 0 || tmp_size > msg_len || + domain[tmp_size - 1] != '\0') { + ret_val = -EINVAL; + goto remove_return; + } + ret_val = netlbl_domhsh_remove(domain); + if (ret_val != 0) + goto remove_return; + msg_ptr = nla_next(msg_ptr, &msg_len); + } + + ret_val = 0; + +remove_return: + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_list - Handle a LIST message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LIST message and dumps the domain hash table in a + * form suitable for use in a kernel generated LIST message. Returns zero on + * success, negative values on failure. + * + */ +static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb; + + ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN)); + if (ans_skb == NULL) + goto list_failure; + netlbl_netlink_hdr_push(ans_skb, + info->snd_pid, + 0, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_LIST); + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto list_failure; + + return 0; + +list_failure: + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_adddef - Handle an ADDDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADDDEF message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); + int msg_len = netlbl_netlink_payload_len(skb); + struct netlbl_dom_map *entry = NULL; + u32 tmp_val; + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto adddef_failure; + + if (msg_len < NETLBL_LEN_U32) + goto adddef_failure; + tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + ret_val = -ENOMEM; + goto adddef_failure; + } + + entry->type = tmp_val; + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + ret_val = netlbl_domhsh_add_default(entry); + break; + case NETLBL_NLTYPE_CIPSOV4: + if (msg_len < NETLBL_LEN_U32) { + ret_val = -EINVAL; + goto adddef_failure; + } + tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); + /* We should be holding a rcu_read_lock here while we + * hold the result but since the entry will always be + * deleted when the CIPSO DOI is deleted we are going + * to skip the lock. */ + rcu_read_lock(); + entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (entry->type_def.cipsov4 == NULL) { + rcu_read_unlock(); + ret_val = -EINVAL; + goto adddef_failure; + } + ret_val = netlbl_domhsh_add_default(entry); + rcu_read_unlock(); + break; + default: + ret_val = -EINVAL; + } + if (ret_val != 0) + goto adddef_failure; + + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + NETLBL_E_OK); + return 0; + +adddef_failure: + kfree(entry); + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_removedef - Handle a REMOVEDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVEDEF message and remove the default domain + * mapping. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val; + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto removedef_return; + + ret_val = netlbl_domhsh_remove_default(); + +removedef_return: + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_listdef - Handle a LISTDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LISTDEF message and dumps the default domain + * mapping in a form suitable for use in a kernel generated LISTDEF message. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb; + + ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN)); + if (ans_skb == NULL) + goto listdef_failure; + netlbl_netlink_hdr_push(ans_skb, + info->snd_pid, + 0, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_LISTDEF); + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto listdef_failure; + + return 0; + +listdef_failure: + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_modules - Handle a MODULES message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated MODULES message and respond accordingly. + * + */ +static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + size_t data_size; + u32 mod_count; + struct sk_buff *ans_skb = NULL; + + /* unlabeled + cipsov4 */ + mod_count = 2; + + data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32; + ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); + if (ans_skb == NULL) + goto modules_failure; + + if (netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + 0, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_MODULES) == NULL) + goto modules_failure; + + ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count); + if (ret_val != 0) + goto modules_failure; + ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED); + if (ret_val != 0) + goto modules_failure; + ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4); + if (ret_val != 0) + goto modules_failure; + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto modules_failure; + + return 0; + +modules_failure: + kfree_skb(ans_skb); + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_mgmt_version - Handle a VERSION message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated VERSION message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb = NULL; + + ans_skb = netlbl_netlink_alloc_skb(0, + GENL_HDRLEN + NETLBL_LEN_U32, + GFP_KERNEL); + if (ans_skb == NULL) + goto version_failure; + if (netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + 0, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_VERSION) == NULL) + goto version_failure; + + ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION); + if (ret_val != 0) + goto version_failure; + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto version_failure; + + return 0; + +version_failure: + kfree_skb(ans_skb); + netlbl_netlink_send_ack(info, + netlbl_mgmt_gnl_family.id, + NLBL_MGMT_C_ACK, + -ret_val); + return ret_val; +} + + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_mgmt_genl_c_add = { + .cmd = NLBL_MGMT_C_ADD, + .flags = 0, + .doit = netlbl_mgmt_add, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_remove = { + .cmd = NLBL_MGMT_C_REMOVE, + .flags = 0, + .doit = netlbl_mgmt_remove, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_list = { + .cmd = NLBL_MGMT_C_LIST, + .flags = 0, + .doit = netlbl_mgmt_list, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_adddef = { + .cmd = NLBL_MGMT_C_ADDDEF, + .flags = 0, + .doit = netlbl_mgmt_adddef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_removedef = { + .cmd = NLBL_MGMT_C_REMOVEDEF, + .flags = 0, + .doit = netlbl_mgmt_removedef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_listdef = { + .cmd = NLBL_MGMT_C_LISTDEF, + .flags = 0, + .doit = netlbl_mgmt_listdef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_modules = { + .cmd = NLBL_MGMT_C_MODULES, + .flags = 0, + .doit = netlbl_mgmt_modules, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_mgmt_genl_c_version = { + .cmd = NLBL_MGMT_C_VERSION, + .flags = 0, + .doit = netlbl_mgmt_version, + .dumpit = NULL, +}; + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_mgmt_genl_init - Register the NetLabel management component + * + * Description: + * Register the NetLabel management component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_mgmt_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_mgmt_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_add); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_remove); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_list); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_adddef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_removedef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_listdef); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_modules); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, + &netlbl_mgmt_genl_c_version); + if (ret_val != 0) + return ret_val; + + return 0; +} diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h new file mode 100644 index 000000000000..fd6c6acbfa08 --- /dev/null +++ b/net/netlabel/netlabel_mgmt.h @@ -0,0 +1,246 @@ +/* + * NetLabel Management Support + * + * This file defines the management functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_MGMT_H +#define _NETLABEL_MGMT_H + +#include + +/* + * The following NetLabel payloads are supported by the management interface, + * all of which are preceeded by the nlmsghdr struct. + * + * o ACK: + * Sent by the kernel in response to an applications message, applications + * should never send this message. + * + * +----------------------+-----------------------+ + * | seq number (32 bits) | return code (32 bits) | + * +----------------------+-----------------------+ + * + * seq number: the sequence number of the original message, taken from the + * nlmsghdr structure + * return code: return value, based on errno values + * + * o ADD: + * Sent by an application to add a domain mapping to the NetLabel system. + * The kernel should respond with an ACK. + * + * +-------------------+ + * | domains (32 bits) | ... + * +-------------------+ + * + * domains: the number of domains in the message + * + * +--------------------------+-------------------------+ + * | domain string (variable) | protocol type (32 bits) | ... + * +--------------------------+-------------------------+ + * + * +-------------- ---- --- -- - + * | mapping data ... repeated + * +-------------- ---- --- -- - + * + * domain string: the domain string, NULL terminated + * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) + * mapping data: specific to the map type (see below) + * + * NETLBL_NLTYPE_UNLABELED + * + * No mapping data for this protocol type. + * + * NETLBL_NLTYPE_CIPSOV4 + * + * +---------------+ + * | doi (32 bits) | + * +---------------+ + * + * doi: the CIPSO DOI value + * + * o REMOVE: + * Sent by an application to remove a domain mapping from the NetLabel + * system. The kernel should ACK this message. + * + * +-------------------+ + * | domains (32 bits) | ... + * +-------------------+ + * + * domains: the number of domains in the message + * + * +--------------------------+ + * | domain string (variable) | ... + * +--------------------------+ + * + * domain string: the domain string, NULL terminated + * + * o LIST: + * This message can be sent either from an application or by the kernel in + * response to an application generated LIST message. When sent by an + * application there is no payload. The kernel should respond to a LIST + * message either with a LIST message on success or an ACK message on + * failure. + * + * +-------------------+ + * | domains (32 bits) | ... + * +-------------------+ + * + * domains: the number of domains in the message + * + * +--------------------------+ + * | domain string (variable) | ... + * +--------------------------+ + * + * +-------------------------+-------------- ---- --- -- - + * | protocol type (32 bits) | mapping data ... repeated + * +-------------------------+-------------- ---- --- -- - + * + * domain string: the domain string, NULL terminated + * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) + * mapping data: specific to the map type (see below) + * + * NETLBL_NLTYPE_UNLABELED + * + * No mapping data for this protocol type. + * + * NETLBL_NLTYPE_CIPSOV4 + * + * +----------------+---------------+ + * | type (32 bits) | doi (32 bits) | + * +----------------+---------------+ + * + * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header + * as CIPSO_V4_MAP_*) + * doi: the CIPSO DOI value + * + * o ADDDEF: + * Sent by an application to set the default domain mapping for the NetLabel + * system. The kernel should respond with an ACK. + * + * +-------------------------+-------------- ---- --- -- - + * | protocol type (32 bits) | mapping data ... repeated + * +-------------------------+-------------- ---- --- -- - + * + * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) + * mapping data: specific to the map type (see below) + * + * NETLBL_NLTYPE_UNLABELED + * + * No mapping data for this protocol type. + * + * NETLBL_NLTYPE_CIPSOV4 + * + * +---------------+ + * | doi (32 bits) | + * +---------------+ + * + * doi: the CIPSO DOI value + * + * o REMOVEDEF: + * Sent by an application to remove the default domain mapping from the + * NetLabel system, there is no payload. The kernel should ACK this message. + * + * o LISTDEF: + * This message can be sent either from an application or by the kernel in + * response to an application generated LISTDEF message. When sent by an + * application there is no payload. The kernel should respond to a + * LISTDEF message either with a LISTDEF message on success or an ACK message + * on failure. + * + * +-------------------------+-------------- ---- --- -- - + * | protocol type (32 bits) | mapping data ... repeated + * +-------------------------+-------------- ---- --- -- - + * + * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) + * mapping data: specific to the map type (see below) + * + * NETLBL_NLTYPE_UNLABELED + * + * No mapping data for this protocol type. + * + * NETLBL_NLTYPE_CIPSOV4 + * + * +----------------+---------------+ + * | type (32 bits) | doi (32 bits) | + * +----------------+---------------+ + * + * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header + * as CIPSO_V4_MAP_*) + * doi: the CIPSO DOI value + * + * o MODULES: + * Sent by an application to request a list of configured NetLabel modules + * in the kernel. When sent by an application there is no payload. + * + * +-------------------+ + * | modules (32 bits) | ... + * +-------------------+ + * + * modules: the number of modules in the message, if this is an application + * generated message and the value is zero then return a list of + * the configured modules + * + * +------------------+ + * | module (32 bits) | ... repeated + * +------------------+ + * + * module: the module number as defined by NETLBL_NLTYPE_* + * + * o VERSION: + * Sent by an application to request the NetLabel version string. When sent + * by an application there is no payload. This message type is also used by + * the kernel to respond to an VERSION request. + * + * +-------------------+ + * | version (32 bits) | + * +-------------------+ + * + * version: the protocol version number + * + */ + +/* NetLabel Management commands */ +enum { + NLBL_MGMT_C_UNSPEC, + NLBL_MGMT_C_ACK, + NLBL_MGMT_C_ADD, + NLBL_MGMT_C_REMOVE, + NLBL_MGMT_C_LIST, + NLBL_MGMT_C_ADDDEF, + NLBL_MGMT_C_REMOVEDEF, + NLBL_MGMT_C_LISTDEF, + NLBL_MGMT_C_MODULES, + NLBL_MGMT_C_VERSION, + __NLBL_MGMT_C_MAX, +}; +#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_mgmt_genl_init(void); + +#endif diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h new file mode 100644 index 000000000000..f300e54e14b6 --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.h @@ -0,0 +1,98 @@ +/* + * NetLabel Unlabeled Support + * + * This file defines functions for dealing with unlabeled packets for the + * NetLabel system. The NetLabel system manages static and dynamic label + * mappings for network protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_UNLABELED_H +#define _NETLABEL_UNLABELED_H + +#include + +/* + * The following NetLabel payloads are supported by the Unlabeled subsystem. + * + * o ACK: + * Sent by the kernel in response to an applications message, applications + * should never send this message. + * + * +----------------------+-----------------------+ + * | seq number (32 bits) | return code (32 bits) | + * +----------------------+-----------------------+ + * + * seq number: the sequence number of the original message, taken from the + * nlmsghdr structure + * return code: return value, based on errno values + * + * o ACCEPT + * This message is sent from an application to specify if the kernel should + * allow unlabled packets to pass if they do not match any of the static + * mappings defined in the unlabeled module. + * + * +-----------------+ + * | allow (32 bits) | + * +-----------------+ + * + * allow: if true (1) then allow the packets to pass, if false (0) then + * reject the packets + * + * o LIST + * This message can be sent either from an application or by the kernel in + * response to an application generated LIST message. When sent by an + * application there is no payload. The kernel should respond to a LIST + * message either with a LIST message on success or an ACK message on + * failure. + * + * +-----------------------+ + * | accept flag (32 bits) | + * +-----------------------+ + * + * accept flag: if true (1) then unlabeled packets are allowed to pass, + * if false (0) then unlabeled packets are rejected + * + */ + +/* NetLabel Unlabeled commands */ +enum { + NLBL_UNLABEL_C_UNSPEC, + NLBL_UNLABEL_C_ACK, + NLBL_UNLABEL_C_ACCEPT, + NLBL_UNLABEL_C_LIST, + __NLBL_UNLABEL_C_MAX, +}; +#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) + +/* NetLabel protocol functions */ +int netlbl_unlabel_genl_init(void); + +/* Process Unlabeled incoming network packets */ +int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); + +/* Set the default configuration to allow Unlabeled packets */ +int netlbl_unlabel_defconf(void); + +#endif diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c new file mode 100644 index 000000000000..80022221b0a7 --- /dev/null +++ b/net/netlabel/netlabel_user.c @@ -0,0 +1,158 @@ +/* + * NetLabel NETLINK Interface + * + * This file defines the NETLINK interface for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netlabel_mgmt.h" +#include "netlabel_unlabeled.h" +#include "netlabel_cipso_v4.h" +#include "netlabel_user.h" + +/* + * NetLabel NETLINK Setup Functions + */ + +/** + * netlbl_netlink_init - Initialize the NETLINK communication channel + * + * Description: + * Call out to the NetLabel components so they can register their families and + * commands with the Generic NETLINK mechanism. Returns zero on success and + * non-zero on failure. + * + */ +int netlbl_netlink_init(void) +{ + int ret_val; + + ret_val = netlbl_mgmt_genl_init(); + if (ret_val != 0) + return ret_val; + + ret_val = netlbl_cipsov4_genl_init(); + if (ret_val != 0) + return ret_val; + + ret_val = netlbl_unlabel_genl_init(); + if (ret_val != 0) + return ret_val; + + return 0; +} + +/* + * NetLabel Common Protocol Functions + */ + +/** + * netlbl_netlink_send_ack - Send an ACK message + * @info: the generic NETLINK information + * @genl_family: the generic NETLINK family ID value + * @ack_cmd: the generic NETLINK family ACK command value + * @ret_code: return code to use + * + * Description: + * This function sends an ACK message to the sender of the NETLINK message + * specified by @info. + * + */ +void netlbl_netlink_send_ack(const struct genl_info *info, + u32 genl_family, + u8 ack_cmd, + u32 ret_code) +{ + size_t data_size; + struct sk_buff *skb; + + data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32; + skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); + if (skb == NULL) + return; + + if (netlbl_netlink_hdr_put(skb, + info->snd_pid, + 0, + genl_family, + ack_cmd) == NULL) + goto send_ack_failure; + + if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0) + goto send_ack_failure; + if (nla_put_u32(skb, NLA_U32, ret_code) != 0) + goto send_ack_failure; + + netlbl_netlink_snd(skb, info->snd_pid); + return; + +send_ack_failure: + kfree_skb(skb); +} + +/* + * NETLINK I/O Functions + */ + +/** + * netlbl_netlink_snd - Send a NetLabel message + * @skb: NetLabel message + * @pid: destination PID + * + * Description: + * Sends a unicast NetLabel message over the NETLINK socket. + * + */ +int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) +{ + return genlmsg_unicast(skb, pid); +} + +/** + * netlbl_netlink_snd - Send a NetLabel message + * @skb: NetLabel message + * @pid: sending PID + * @group: multicast group id + * + * Description: + * Sends a multicast NetLabel message over the NETLINK socket to all members + * of @group except @pid. + * + */ +int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) +{ + return genlmsg_multicast(skb, pid, group); +} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h new file mode 100644 index 000000000000..ccf237b3a128 --- /dev/null +++ b/net/netlabel/netlabel_user.h @@ -0,0 +1,214 @@ +/* + * NetLabel NETLINK Interface + * + * This file defines the NETLINK interface for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_USER_H +#define _NETLABEL_USER_H + +#include +#include +#include +#include +#include + +/* NetLabel NETLINK helper functions */ + +/** + * netlbl_netlink_cap_check - Check the NETLINK msg capabilities + * @skb: the NETLINK buffer + * @req_cap: the required capability + * + * Description: + * Check the NETLINK buffer's capabilities against the required capabilities. + * Returns zero on success, negative values on failure. + * + */ +static inline int netlbl_netlink_cap_check(const struct sk_buff *skb, + kernel_cap_t req_cap) +{ + if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap)) + return 0; + return -EPERM; +} + +/** + * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on + * @nla: the attribute + * @rem_len: remaining length + * + * Description: + * Return a u8 value pointed to by @nla and advance it to the next attribute. + * + */ +static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len) +{ + u8 val = nla_get_u8(*nla); + *nla = nla_next(*nla, rem_len); + return val; +} + +/** + * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on + * @nla: the attribute + * @rem_len: remaining length + * + * Description: + * Return a u16 value pointed to by @nla and advance it to the next attribute. + * + */ +static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len) +{ + u16 val = nla_get_u16(*nla); + *nla = nla_next(*nla, rem_len); + return val; +} + +/** + * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on + * @nla: the attribute + * @rem_len: remaining length + * + * Description: + * Return a u32 value pointed to by @nla and advance it to the next attribute. + * + */ +static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len) +{ + u32 val = nla_get_u32(*nla); + *nla = nla_next(*nla, rem_len); + return val; +} + +/** + * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff + * @skb: the packet + * @pid: the PID of the receipient + * @seq: the sequence number + * @type: the generic NETLINK message family type + * @cmd: command + * + * Description: + * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr + * struct to the packet. Returns a pointer to the start of the payload buffer + * on success or NULL on failure. + * + */ +static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, + u32 pid, + u32 seq, + int type, + u8 cmd) +{ + return genlmsg_put(skb, + pid, + seq, + type, + 0, + 0, + cmd, + NETLBL_PROTO_VERSION); +} + +/** + * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff + * @skb: the packet + * @pid: the PID of the receipient + * @seq: the sequence number + * @type: the generic NETLINK message family type + * @cmd: command + * + * Description: + * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr + * struct to the packet. + * + */ +static inline void netlbl_netlink_hdr_push(struct sk_buff *skb, + u32 pid, + u32 seq, + int type, + u8 cmd) + +{ + struct nlmsghdr *nlh; + struct genlmsghdr *hdr; + + nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = skb->len; + nlh->nlmsg_flags = 0; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + + hdr = nlmsg_data(nlh); + hdr->cmd = cmd; + hdr->version = NETLBL_PROTO_VERSION; + hdr->reserved = 0; +} + +/** + * netlbl_netlink_payload_len - Return the length of the payload + * @skb: the NETLINK buffer + * + * Description: + * This function returns the length of the NetLabel payload. + * + */ +static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb) +{ + return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN; +} + +/** + * netlbl_netlink_payload_data - Returns a pointer to the start of the payload + * @skb: the NETLINK buffer + * + * Description: + * This function returns a pointer to the start of the NetLabel payload. + * + */ +static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb) +{ + return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) + + GENL_HDRLEN; +} + +/* NetLabel common protocol functions */ + +void netlbl_netlink_send_ack(const struct genl_info *info, + u32 genl_family, + u8 ack_cmd, + u32 ret_code); + +/* NetLabel NETLINK I/O functions */ + +int netlbl_netlink_init(void); +int netlbl_netlink_snd(struct sk_buff *skb, u32 pid); +int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group); + +#endif From 96cb8e3313c7a12e026c1ed510522ae6f6023875 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:48:59 -0700 Subject: [PATCH 0392/1063] [NetLabel]: CIPSOv4 and Unlabeled packet integration Add CIPSO/IPv4 and unlabeled packet management to the NetLabel subsystem. The CIPSO/IPv4 changes allow the configuration of CIPSO/IPv4 within the overall NetLabel framework. The unlabeled packet changes allows NetLabel to pass unlabeled packets without error. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_cipso_v4.c | 542 ++++++++++++++++++++++++++++++ net/netlabel/netlabel_unlabeled.c | 253 ++++++++++++++ 2 files changed, 795 insertions(+) create mode 100644 net/netlabel/netlabel_cipso_v4.c create mode 100644 net/netlabel/netlabel_unlabeled.c diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c new file mode 100644 index 000000000000..a4f40adc447b --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.c @@ -0,0 +1,542 @@ +/* + * NetLabel CIPSO/IPv4 Support + * + * This file defines the CIPSO/IPv4 functions for the NetLabel system. The + * NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "netlabel_user.h" +#include "netlabel_cipso_v4.h" + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_cipsov4_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = 0, +}; + + +/* + * Helper Functions + */ + +/** + * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void netlbl_cipsov4_doi_free(struct rcu_head *entry) +{ + struct cipso_v4_doi *ptr; + + ptr = container_of(entry, struct cipso_v4_doi, rcu); + switch (ptr->type) { + case CIPSO_V4_MAP_STD: + kfree(ptr->map.std->lvl.cipso); + kfree(ptr->map.std->lvl.local); + kfree(ptr->map.std->cat.cipso); + kfree(ptr->map.std->cat.local); + break; + } + kfree(ptr); +} + + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition + * @doi: the DOI value + * @msg: the ADD message data + * @msg_size: the size of the ADD message buffer + * + * Description: + * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message + * and add it to the CIPSO V4 engine. Return zero on success and non-zero on + * error. + * + */ +static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) +{ + int ret_val = -EINVAL; + int msg_len = msg_size; + u32 num_tags; + u32 num_lvls; + u32 num_cats; + struct cipso_v4_doi *doi_def = NULL; + u32 iter; + u32 tmp_val_a; + u32 tmp_val_b; + + if (msg_len < NETLBL_LEN_U32) + goto add_std_failure; + num_tags = netlbl_getinc_u32(&msg, &msg_len); + if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) + goto add_std_failure; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); + if (doi_def->map.std == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->type = CIPSO_V4_MAP_STD; + + for (iter = 0; iter < num_tags; iter++) { + if (msg_len < NETLBL_LEN_U8) + goto add_std_failure; + doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + break; + default: + goto add_std_failure; + } + } + if (iter < CIPSO_V4_TAG_MAXCNT) + doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + + if (msg_len < 6 * NETLBL_LEN_U32) + goto add_std_failure; + + num_lvls = netlbl_getinc_u32(&msg, &msg_len); + if (num_lvls == 0) + goto add_std_failure; + doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len); + if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS) + goto add_std_failure; + doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->lvl.local == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len); + if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) + goto add_std_failure; + doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->lvl.cipso == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + + num_cats = netlbl_getinc_u32(&msg, &msg_len); + doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len); + if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS) + goto add_std_failure; + doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->cat.local == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len); + if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) + goto add_std_failure; + doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size, + sizeof(u32), + GFP_KERNEL); + if (doi_def->map.std->cat.cipso == NULL) { + ret_val = -ENOMEM; + goto add_std_failure; + } + + if (msg_len < + num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) + + num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16)) + goto add_std_failure; + + for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) + doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; + for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) + doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; + for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) + doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; + for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) + doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; + + for (iter = 0; iter < num_lvls; iter++) { + tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); + tmp_val_b = netlbl_getinc_u8(&msg, &msg_len); + + if (tmp_val_a >= doi_def->map.std->lvl.local_size || + tmp_val_b >= doi_def->map.std->lvl.cipso_size) + goto add_std_failure; + + doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a; + doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b; + } + + for (iter = 0; iter < num_cats; iter++) { + tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); + tmp_val_b = netlbl_getinc_u16(&msg, &msg_len); + + if (tmp_val_a >= doi_def->map.std->cat.local_size || + tmp_val_b >= doi_def->map.std->cat.cipso_size) + goto add_std_failure; + + doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a; + doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b; + } + + doi_def->doi = doi; + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_std_failure; + return 0; + +add_std_failure: + if (doi_def) + netlbl_cipsov4_doi_free(&doi_def->rcu); + return ret_val; +} + +/** + * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition + * @doi: the DOI value + * @msg: the ADD message data + * @msg_size: the size of the ADD message buffer + * + * Description: + * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message + * and add it to the CIPSO V4 engine. Return zero on success and non-zero on + * error. + * + */ +static int netlbl_cipsov4_add_pass(u32 doi, + struct nlattr *msg, + size_t msg_size) +{ + int ret_val = -EINVAL; + int msg_len = msg_size; + u32 num_tags; + struct cipso_v4_doi *doi_def = NULL; + u32 iter; + + if (msg_len < NETLBL_LEN_U32) + goto add_pass_failure; + num_tags = netlbl_getinc_u32(&msg, &msg_len); + if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) + goto add_pass_failure; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) { + ret_val = -ENOMEM; + goto add_pass_failure; + } + doi_def->type = CIPSO_V4_MAP_PASS; + + for (iter = 0; iter < num_tags; iter++) { + if (msg_len < NETLBL_LEN_U8) + goto add_pass_failure; + doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + break; + default: + goto add_pass_failure; + } + } + if (iter < CIPSO_V4_TAG_MAXCNT) + doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; + + doi_def->doi = doi; + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_pass_failure; + return 0; + +add_pass_failure: + if (doi_def) + netlbl_cipsov4_doi_free(&doi_def->rcu); + return ret_val; +} + +/** + * netlbl_cipsov4_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Create a new DOI definition based on the given ADD message and add it to the + * CIPSO V4 engine. Returns zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) + +{ + int ret_val = -EINVAL; + u32 doi; + u32 map_type; + int msg_len = netlbl_netlink_payload_len(skb); + struct nlattr *msg = netlbl_netlink_payload_data(skb); + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto add_return; + + if (msg_len < 2 * NETLBL_LEN_U32) + goto add_return; + + doi = netlbl_getinc_u32(&msg, &msg_len); + map_type = netlbl_getinc_u32(&msg, &msg_len); + switch (map_type) { + case CIPSO_V4_MAP_STD: + ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len); + break; + case CIPSO_V4_MAP_PASS: + ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len); + break; + } + +add_return: + netlbl_netlink_send_ack(info, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_cipsov4_list - Handle a LIST message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LIST message and respond accordingly. Returns + * zero on success and negative values on error. + * + */ +static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + u32 doi; + struct nlattr *msg = netlbl_netlink_payload_data(skb); + struct sk_buff *ans_skb; + + if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) + goto list_failure; + + doi = nla_get_u32(msg); + ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN)); + if (ans_skb == NULL) { + ret_val = -ENOMEM; + goto list_failure; + } + netlbl_netlink_hdr_push(ans_skb, + info->snd_pid, + 0, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_LIST); + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto list_failure; + + return 0; + +list_failure: + netlbl_netlink_send_ack(info, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_cipsov4_listall - Handle a LISTALL message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LISTALL message and respond accordingly. Returns + * zero on success and negative values on error. + * + */ +static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -EINVAL; + struct sk_buff *ans_skb; + + ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN)); + if (ans_skb == NULL) { + ret_val = -ENOMEM; + goto listall_failure; + } + netlbl_netlink_hdr_push(ans_skb, + info->snd_pid, + 0, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_LISTALL); + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto listall_failure; + + return 0; + +listall_failure: + netlbl_netlink_send_ack(info, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_ACK, + -ret_val); + return ret_val; +} + +/** + * netlbl_cipsov4_remove - Handle a REMOVE message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated REMOVE message and respond accordingly. Returns + * zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val; + u32 doi; + struct nlattr *msg = netlbl_netlink_payload_data(skb); + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + goto remove_return; + + if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) { + ret_val = -EINVAL; + goto remove_return; + } + + doi = nla_get_u32(msg); + ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); + +remove_return: + netlbl_netlink_send_ack(info, + netlbl_cipsov4_gnl_family.id, + NLBL_CIPSOV4_C_ACK, + -ret_val); + return ret_val; +} + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_cipsov4_genl_c_add = { + .cmd = NLBL_CIPSOV4_C_ADD, + .flags = 0, + .doit = netlbl_cipsov4_add, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_remove = { + .cmd = NLBL_CIPSOV4_C_REMOVE, + .flags = 0, + .doit = netlbl_cipsov4_remove, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_list = { + .cmd = NLBL_CIPSOV4_C_LIST, + .flags = 0, + .doit = netlbl_cipsov4_list, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_cipsov4_genl_c_listall = { + .cmd = NLBL_CIPSOV4_C_LISTALL, + .flags = 0, + .doit = netlbl_cipsov4_listall, + .dumpit = NULL, +}; + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component + * + * Description: + * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_cipsov4_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_cipsov4_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_add); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_remove); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_list); + if (ret_val != 0) + return ret_val; + ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, + &netlbl_cipsov4_genl_c_listall); + if (ret_val != 0) + return ret_val; + + return 0; +} diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c new file mode 100644 index 000000000000..785f4960e0d3 --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.c @@ -0,0 +1,253 @@ +/* + * NetLabel Unlabeled Support + * + * This file defines functions for dealing with unlabeled packets for the + * NetLabel system. The NetLabel system manages static and dynamic label + * mappings for network protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "netlabel_user.h" +#include "netlabel_domainhash.h" +#include "netlabel_unlabeled.h" + +/* Accept unlabeled packets flag */ +static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0); + +/* NetLabel Generic NETLINK CIPSOv4 family */ +static struct genl_family netlbl_unlabel_gnl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = 0, +}; + + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_unlabel_accept - Handle an ACCEPT message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ACCEPT message and set the accept flag accordingly. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val; + struct nlattr *data = netlbl_netlink_payload_data(skb); + u32 value; + + ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); + if (ret_val != 0) + return ret_val; + + if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) { + value = nla_get_u32(data); + if (value == 1 || value == 0) { + atomic_set(&netlabel_unlabel_accept_flg, value); + netlbl_netlink_send_ack(info, + netlbl_unlabel_gnl_family.id, + NLBL_UNLABEL_C_ACK, + NETLBL_E_OK); + return 0; + } + } + + netlbl_netlink_send_ack(info, + netlbl_unlabel_gnl_family.id, + NLBL_UNLABEL_C_ACK, + EINVAL); + return -EINVAL; +} + +/** + * netlbl_unlabel_list - Handle a LIST message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated LIST message and respond with the current status. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) +{ + int ret_val = -ENOMEM; + struct sk_buff *ans_skb; + + ans_skb = netlbl_netlink_alloc_skb(0, + GENL_HDRLEN + NETLBL_LEN_U32, + GFP_KERNEL); + if (ans_skb == NULL) + goto list_failure; + + if (netlbl_netlink_hdr_put(ans_skb, + info->snd_pid, + 0, + netlbl_unlabel_gnl_family.id, + NLBL_UNLABEL_C_LIST) == NULL) + goto list_failure; + + ret_val = nla_put_u32(ans_skb, + NLA_U32, + atomic_read(&netlabel_unlabel_accept_flg)); + if (ret_val != 0) + goto list_failure; + + ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); + if (ret_val != 0) + goto list_failure; + + return 0; + +list_failure: + netlbl_netlink_send_ack(info, + netlbl_unlabel_gnl_family.id, + NLBL_UNLABEL_C_ACK, + -ret_val); + return ret_val; +} + + +/* + * NetLabel Generic NETLINK Command Definitions + */ + +static struct genl_ops netlbl_unlabel_genl_c_accept = { + .cmd = NLBL_UNLABEL_C_ACCEPT, + .flags = 0, + .doit = netlbl_unlabel_accept, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_list = { + .cmd = NLBL_UNLABEL_C_LIST, + .flags = 0, + .doit = netlbl_unlabel_list, + .dumpit = NULL, +}; + + +/* + * NetLabel Generic NETLINK Protocol Functions + */ + +/** + * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component + * + * Description: + * Register the unlabeled packet NetLabel component with the Generic NETLINK + * mechanism. Returns zero on success, negative values on failure. + * + */ +int netlbl_unlabel_genl_init(void) +{ + int ret_val; + + ret_val = genl_register_family(&netlbl_unlabel_gnl_family); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_accept); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_list); + if (ret_val != 0) + return ret_val; + + return 0; +} + +/* + * NetLabel KAPI Hooks + */ + +/** + * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet + * @secattr: the security attributes + * + * Description: + * Determine the security attributes, if any, for an unlabled packet and return + * them in @secattr. Returns zero on success and negative values on failure. + * + */ +int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) +{ + if (atomic_read(&netlabel_unlabel_accept_flg) == 1) { + memset(secattr, 0, sizeof(*secattr)); + return 0; + } + + return -ENOMSG; +} + +/** + * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets + * + * Description: + * Set the default NetLabel configuration to allow incoming unlabeled packets + * and to send unlabeled network traffic by default. + * + */ +int netlbl_unlabel_defconf(void) +{ + int ret_val; + struct netlbl_dom_map *entry; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) + return -ENOMEM; + entry->type = NETLBL_NLTYPE_UNLABELED; + ret_val = netlbl_domhsh_add_default(entry); + if (ret_val != 0) + return ret_val; + + atomic_set(&netlabel_unlabel_accept_flg, 1); + + return 0; +} From 7420ed23a4f77480b5b7b3245e5da30dd24b7575 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:17:57 -0700 Subject: [PATCH 0393/1063] [NetLabel]: SELinux support Add NetLabel support to the SELinux LSM and modify the socket_post_create() LSM hook to return an error code. The most significant part of this patch is the addition of NetLabel hooks into the following SELinux LSM hooks: * selinux_file_permission() * selinux_socket_sendmsg() * selinux_socket_post_create() * selinux_socket_sock_rcv_skb() * selinux_socket_getpeersec_stream() * selinux_socket_getpeersec_dgram() * selinux_sock_graft() * selinux_inet_conn_request() The basic reasoning behind this patch is that outgoing packets are "NetLabel'd" by labeling their socket and the NetLabel security attributes are checked via the additional hook in selinux_socket_sock_rcv_skb(). NetLabel itself is only a labeling mechanism, similar to filesystem extended attributes, it is up to the SELinux enforcement mechanism to perform the actual access checks. In addition to the changes outlined above this patch also includes some changes to the extended bitmap (ebitmap) and multi-level security (mls) code to import and export SELinux TE/MLS attributes into and out of NetLabel. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/security.h | 25 +- net/socket.c | 13 +- security/dummy.c | 6 +- security/selinux/hooks.c | 56 ++- security/selinux/include/objsec.h | 8 + security/selinux/include/selinux_netlabel.h | 125 +++++ security/selinux/ss/ebitmap.c | 144 ++++++ security/selinux/ss/ebitmap.h | 6 + security/selinux/ss/mls.c | 156 +++++++ security/selinux/ss/mls.h | 21 + security/selinux/ss/services.c | 488 ++++++++++++++++++++ 11 files changed, 1020 insertions(+), 28 deletions(-) create mode 100644 security/selinux/include/selinux_netlabel.h diff --git a/include/linux/security.h b/include/linux/security.h index bb4c80fdfe7a..9f56fb8a4a6c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1341,8 +1341,8 @@ struct security_operations { int (*unix_may_send) (struct socket * sock, struct socket * other); int (*socket_create) (int family, int type, int protocol, int kern); - void (*socket_post_create) (struct socket * sock, int family, - int type, int protocol, int kern); + int (*socket_post_create) (struct socket * sock, int family, + int type, int protocol, int kern); int (*socket_bind) (struct socket * sock, struct sockaddr * address, int addrlen); int (*socket_connect) (struct socket * sock, @@ -2824,13 +2824,13 @@ static inline int security_socket_create (int family, int type, return security_ops->socket_create(family, type, protocol, kern); } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { - security_ops->socket_post_create(sock, family, type, - protocol, kern); + return security_ops->socket_post_create(sock, family, type, + protocol, kern); } static inline int security_socket_bind(struct socket * sock, @@ -2982,11 +2982,12 @@ static inline int security_socket_create (int family, int type, return 0; } -static inline void security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline int security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { + return 0; } static inline int security_socket_bind(struct socket * sock, diff --git a/net/socket.c b/net/socket.c index 6d261bf206fc..6756e57e1ff0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -973,11 +973,18 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) goto out; } - security_socket_post_create(sock, family, type, protocol, 1); sock->type = type; + err = security_socket_post_create(sock, family, type, protocol, 1); + if (err) + goto out_release; + out: *res = sock; return err; +out_release: + sock_release(sock); + sock = NULL; + goto out; } /* No kernel lock held - perfect */ @@ -1214,7 +1221,9 @@ static int __sock_create(int family, int type, int protocol, struct socket **res */ module_put(net_families[family]->owner); *res = sock; - security_socket_post_create(sock, family, type, protocol, kern); + err = security_socket_post_create(sock, family, type, protocol, kern); + if (err) + goto out_release; out: net_family_read_unlock(); diff --git a/security/dummy.c b/security/dummy.c index 1c45f8e4aad1..aeee70565509 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -709,10 +709,10 @@ static int dummy_socket_create (int family, int type, return 0; } -static void dummy_socket_post_create (struct socket *sock, int family, int type, - int protocol, int kern) +static int dummy_socket_post_create (struct socket *sock, int family, int type, + int protocol, int kern) { - return; + return 0; } static int dummy_socket_bind (struct socket *sock, struct sockaddr *address, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 33028b3b19ce..2a6bbb921e1e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -12,6 +12,8 @@ * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * + * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. + * Paul Moore, * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -74,6 +76,7 @@ #include "objsec.h" #include "netif.h" #include "xfrm.h" +#include "selinux_netlabel.h" #define XATTR_SELINUX_SUFFIX "selinux" #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX @@ -2395,6 +2398,7 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t static int selinux_file_permission(struct file *file, int mask) { + int rc; struct inode *inode = file->f_dentry->d_inode; if (!mask) { @@ -2406,8 +2410,12 @@ static int selinux_file_permission(struct file *file, int mask) if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) mask |= MAY_APPEND; - return file_has_perm(current, file, - file_mask_to_av(inode->i_mode, mask)); + rc = file_has_perm(current, file, + file_mask_to_av(inode->i_mode, mask)); + if (rc) + return rc; + + return selinux_netlbl_inode_permission(inode, mask); } static int selinux_file_alloc_security(struct file *file) @@ -3058,9 +3066,10 @@ out: return err; } -static void selinux_socket_post_create(struct socket *sock, int family, - int type, int protocol, int kern) +static int selinux_socket_post_create(struct socket *sock, int family, + int type, int protocol, int kern) { + int err = 0; struct inode_security_struct *isec; struct task_security_struct *tsec; struct sk_security_struct *sksec; @@ -3077,9 +3086,12 @@ static void selinux_socket_post_create(struct socket *sock, int family, if (sock->sk) { sksec = sock->sk->sk_security; sksec->sid = isec->sid; + err = selinux_netlbl_socket_post_create(sock, + family, + isec->sid); } - return; + return err; } /* Range of port numbers used to automatically bind. @@ -3260,7 +3272,13 @@ static int selinux_socket_accept(struct socket *sock, struct socket *newsock) static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - return socket_has_perm(current, sock, SOCKET__WRITE); + int rc; + + rc = socket_has_perm(current, sock, SOCKET__WRITE); + if (rc) + return rc; + + return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE); } static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, @@ -3468,6 +3486,10 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad); + if (err) + goto out; + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; @@ -3491,8 +3513,9 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op peer_sid = ssec->peer_sid; } else if (isec->sclass == SECCLASS_TCP_SOCKET) { - peer_sid = selinux_socket_getpeer_stream(sock->sk); - + peer_sid = selinux_netlbl_socket_getpeersec_stream(sock); + if (peer_sid == SECSID_NULL) + peer_sid = selinux_socket_getpeer_stream(sock->sk); if (peer_sid == SECSID_NULL) { err = -ENOPROTOOPT; goto out; @@ -3532,8 +3555,11 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff * if (sock && (sock->sk->sk_family == PF_UNIX)) selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid); - else if (skb) - peer_secid = selinux_socket_getpeer_dgram(skb); + else if (skb) { + peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb); + if (peer_secid == SECSID_NULL) + peer_secid = selinux_socket_getpeer_dgram(skb); + } if (peer_secid == SECSID_NULL) err = -EINVAL; @@ -3578,6 +3604,8 @@ void selinux_sock_graft(struct sock* sk, struct socket *parent) struct sk_security_struct *sksec = sk->sk_security; isec->sid = sksec->sid; + + selinux_netlbl_sock_graft(sk, parent); } int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, @@ -3585,9 +3613,15 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, { struct sk_security_struct *sksec = sk->sk_security; int err; - u32 newsid = 0; + u32 newsid; u32 peersid; + newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid); + if (newsid != SECSID_NULL) { + req->secid = newsid; + return 0; + } + err = selinux_xfrm_decode_session(skb, &peersid, 0); BUG_ON(err); diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 79b9e0af19a0..0a39bfd1319f 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -101,6 +101,14 @@ struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ +#ifdef CONFIG_NETLABEL + u16 sclass; /* sock security class */ + enum { /* NetLabel state */ + NLBL_UNSET = 0, + NLBL_REQUIRE, + NLBL_LABELED, + } nlbl_state; +#endif }; struct key_security_struct { diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h new file mode 100644 index 000000000000..88c463eef1e1 --- /dev/null +++ b/security/selinux/include/selinux_netlabel.h @@ -0,0 +1,125 @@ +/* + * SELinux interface to the NetLabel subsystem + * + * Author : Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _SELINUX_NETLABEL_H_ +#define _SELINUX_NETLABEL_H_ + +#ifdef CONFIG_NETLABEL +void selinux_netlbl_cache_invalidate(void); +int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid); +void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); +u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid); +int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad); +u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock); +u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb); + +int __selinux_netlbl_inode_permission(struct inode *inode, int mask); +/** + * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled + * @inode: the file descriptor's inode + * @mask: the permission mask + * + * Description: + * Looks at a file's inode and if it is marked as a socket protected by + * NetLabel then verify that the socket has been labeled, if not try to label + * the socket now with the inode's SID. Returns zero on success, negative + * values on failure. + * + */ +static inline int selinux_netlbl_inode_permission(struct inode *inode, + int mask) +{ + int rc = 0; + struct inode_security_struct *isec; + struct sk_security_struct *sksec; + + if (!S_ISSOCK(inode->i_mode)) + return 0; + + isec = inode->i_security; + sksec = SOCKET_I(inode)->sk->sk_security; + down(&isec->sem); + if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && + (mask & (MAY_WRITE | MAY_APPEND)))) + rc = __selinux_netlbl_inode_permission(inode, mask); + up(&isec->sem); + + return rc; +} +#else +static inline void selinux_netlbl_cache_invalidate(void) +{ + return; +} + +static inline int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid) +{ + return 0; +} + +static inline void selinux_netlbl_sock_graft(struct sock *sk, + struct socket *sock) +{ + return; +} + +static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, + u32 sock_sid) +{ + return SECSID_NULL; +} + +static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad) +{ + return 0; +} + +static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) +{ + return SECSID_NULL; +} + +static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) +{ + return SECSID_NULL; +} + +static inline int selinux_netlbl_inode_permission(struct inode *inode, + int mask) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +#endif diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 47024a6e1844..4b915eb60c45 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -3,6 +3,14 @@ * * Author : Stephen Smalley, */ +/* + * Updated: Hewlett-Packard + * + * Added ebitmap_export() and ebitmap_import() + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ + #include #include #include @@ -59,6 +67,142 @@ int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src) return 0; } +/** + * ebitmap_export - Export an ebitmap to a unsigned char bitmap string + * @src: the ebitmap to export + * @dst: the resulting bitmap string + * @dst_len: length of dst in bytes + * + * Description: + * Allocate a buffer at least src->highbit bits long and export the extensible + * bitmap into the buffer. The bitmap string will be in little endian format, + * i.e. LSB first. The value returned in dst_len may not the true size of the + * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE. + * The caller must free the buffer when finished. Returns zero on success, + * negative values on failure. + * + */ +int ebitmap_export(const struct ebitmap *src, + unsigned char **dst, + size_t *dst_len) +{ + size_t bitmap_len; + unsigned char *bitmap; + struct ebitmap_node *iter_node; + MAPTYPE node_val; + size_t bitmap_byte; + unsigned char bitmask; + + bitmap_len = src->highbit / 8; + if (src->highbit % 7) + bitmap_len += 1; + if (bitmap_len == 0) + return -EINVAL; + + bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) + + sizeof(MAPTYPE), + GFP_ATOMIC); + if (bitmap == NULL) + return -ENOMEM; + + iter_node = src->node; + do { + bitmap_byte = iter_node->startbit / 8; + bitmask = 0x80; + node_val = iter_node->map; + do { + if (bitmask == 0) { + bitmap_byte++; + bitmask = 0x80; + } + if (node_val & (MAPTYPE)0x01) + bitmap[bitmap_byte] |= bitmask; + node_val >>= 1; + bitmask >>= 1; + } while (node_val > 0); + iter_node = iter_node->next; + } while (iter_node); + + *dst = bitmap; + *dst_len = bitmap_len; + return 0; +} + +/** + * ebitmap_import - Import an unsigned char bitmap string into an ebitmap + * @src: the bitmap string + * @src_len: the bitmap length in bytes + * @dst: the empty ebitmap + * + * Description: + * This function takes a little endian bitmap string in src and imports it into + * the ebitmap pointed to by dst. Returns zero on success, negative values on + * failure. + * + */ +int ebitmap_import(const unsigned char *src, + size_t src_len, + struct ebitmap *dst) +{ + size_t src_off = 0; + struct ebitmap_node *node_new; + struct ebitmap_node *node_last = NULL; + size_t iter; + size_t iter_bit; + size_t iter_limit; + unsigned char src_byte; + + do { + iter_limit = src_len - src_off; + if (iter_limit >= sizeof(MAPTYPE)) { + if (*(MAPTYPE *)&src[src_off] == 0) { + src_off += sizeof(MAPTYPE); + continue; + } + iter_limit = sizeof(MAPTYPE); + } else { + iter = src_off; + src_byte = 0; + do { + src_byte |= src[iter++]; + } while (iter < src_len && src_byte == 0); + if (src_byte == 0) + break; + } + + node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC); + if (unlikely(node_new == NULL)) { + ebitmap_destroy(dst); + return -ENOMEM; + } + node_new->startbit = src_off * 8; + iter = 0; + do { + src_byte = src[src_off++]; + iter_bit = iter++ * 8; + while (src_byte != 0) { + if (src_byte & 0x80) + node_new->map |= MAPBIT << iter_bit; + iter_bit++; + src_byte <<= 1; + } + } while (iter < iter_limit); + + if (node_last != NULL) + node_last->next = node_new; + else + dst->node = node_new; + node_last = node_new; + } while (src_off < src_len); + + if (likely(node_last != NULL)) + dst->highbit = node_last->startbit + MAPSIZE; + else + ebitmap_init(dst); + + return 0; +} + int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) { struct ebitmap_node *n1, *n2; diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index 8bf41055a6cb..da2d4651b10d 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -69,6 +69,12 @@ static inline int ebitmap_node_get_bit(struct ebitmap_node * n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); +int ebitmap_export(const struct ebitmap *src, + unsigned char **dst, + size_t *dst_len); +int ebitmap_import(const unsigned char *src, + size_t src_len, + struct ebitmap *dst); int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index e15f7e0399b8..119bd6078ba1 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -10,6 +10,13 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ +/* + * Updated: Hewlett-Packard + * + * Added support to import/export the MLS label + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ #include #include @@ -565,3 +572,152 @@ int mls_compute_sid(struct context *scontext, return -EINVAL; } +/** + * mls_export_lvl - Export the MLS sensitivity levels + * @context: the security context + * @low: the low sensitivity level + * @high: the high sensitivity level + * + * Description: + * Given the security context copy the low MLS sensitivity level into lvl_low + * and the high sensitivity level in lvl_high. The MLS levels are only + * exported if the pointers are not NULL, if they are NULL then that level is + * not exported. + * + */ +void mls_export_lvl(const struct context *context, u32 *low, u32 *high) +{ + if (!selinux_mls_enabled) + return; + + if (low != NULL) + *low = context->range.level[0].sens - 1; + if (high != NULL) + *high = context->range.level[1].sens - 1; +} + +/** + * mls_import_lvl - Import the MLS sensitivity levels + * @context: the security context + * @low: the low sensitivity level + * @high: the high sensitivity level + * + * Description: + * Given the security context and the two sensitivty levels, set the MLS levels + * in the context according the two given as parameters. Returns zero on + * success, negative values on failure. + * + */ +void mls_import_lvl(struct context *context, u32 low, u32 high) +{ + if (!selinux_mls_enabled) + return; + + context->range.level[0].sens = low + 1; + context->range.level[1].sens = high + 1; +} + +/** + * mls_export_cat - Export the MLS categories + * @context: the security context + * @low: the low category + * @low_len: length of the cat_low bitmap in bytes + * @high: the high category + * @high_len: length of the cat_high bitmap in bytes + * + * Description: + * Given the security context export the low MLS category bitmap into cat_low + * and the high category bitmap into cat_high. The MLS categories are only + * exported if the pointers are not NULL, if they are NULL then that level is + * not exported. The caller is responsibile for freeing the memory when + * finished. Returns zero on success, negative values on failure. + * + */ +int mls_export_cat(const struct context *context, + unsigned char **low, + size_t *low_len, + unsigned char **high, + size_t *high_len) +{ + int rc = -EPERM; + + if (!selinux_mls_enabled) + return 0; + + if (low != NULL) { + rc = ebitmap_export(&context->range.level[0].cat, + low, + low_len); + if (rc != 0) + goto export_cat_failure; + } + if (high != NULL) { + rc = ebitmap_export(&context->range.level[1].cat, + high, + high_len); + if (rc != 0) + goto export_cat_failure; + } + + return 0; + +export_cat_failure: + if (low != NULL) + kfree(*low); + if (high != NULL) + kfree(*high); + return rc; +} + +/** + * mls_import_cat - Import the MLS categories + * @context: the security context + * @low: the low category + * @low_len: length of the cat_low bitmap in bytes + * @high: the high category + * @high_len: length of the cat_high bitmap in bytes + * + * Description: + * Given the security context and the two category bitmap strings import the + * categories into the security context. The MLS categories are only imported + * if the pointers are not NULL, if they are NULL they are skipped. Returns + * zero on success, negative values on failure. + * + */ +int mls_import_cat(struct context *context, + const unsigned char *low, + size_t low_len, + const unsigned char *high, + size_t high_len) +{ + int rc = -EPERM; + + if (!selinux_mls_enabled) + return 0; + + if (low != NULL) { + rc = ebitmap_import(low, + low_len, + &context->range.level[0].cat); + if (rc != 0) + goto import_cat_failure; + } + if (high != NULL) { + if (high == low) + rc = ebitmap_cpy(&context->range.level[1].cat, + &context->range.level[0].cat); + else + rc = ebitmap_import(high, + high_len, + &context->range.level[1].cat); + if (rc != 0) + goto import_cat_failure; + } + + return 0; + +import_cat_failure: + ebitmap_destroy(&context->range.level[0].cat); + ebitmap_destroy(&context->range.level[1].cat); + return rc; +} diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 90c5e88987fa..df6032c6d492 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -10,6 +10,13 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ +/* + * Updated: Hewlett-Packard + * + * Added support to import/export the MLS label + * + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + */ #ifndef _SS_MLS_H_ #define _SS_MLS_H_ @@ -62,5 +69,19 @@ int mls_compute_sid(struct context *scontext, int mls_setup_user_range(struct context *fromcon, struct user_datum *user, struct context *usercon); +void mls_export_lvl(const struct context *context, u32 *low, u32 *high); +void mls_import_lvl(struct context *context, u32 low, u32 high); + +int mls_export_cat(const struct context *context, + unsigned char **low, + size_t *low_len, + unsigned char **high, + size_t *high_len); +int mls_import_cat(struct context *context, + const unsigned char *low, + size_t low_len, + const unsigned char *high, + size_t high_len); + #endif /* _SS_MLS_H */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b00ec69f0ffd..910afa1ffc31 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -13,6 +13,11 @@ * * Added conditional policy language extensions * + * Updated: Hewlett-Packard + * + * Added support for NetLabel + * + * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC * Copyright (C) 2003 Red Hat, Inc., James Morris @@ -29,6 +34,8 @@ #include #include #include +#include +#include #include "flask.h" #include "avc.h" @@ -40,6 +47,8 @@ #include "services.h" #include "conditional.h" #include "mls.h" +#include "objsec.h" +#include "selinux_netlabel.h" extern void selnl_notify_policyload(u32 seqno); unsigned int policydb_loaded_version; @@ -1241,6 +1250,7 @@ int security_load_policy(void *data, size_t len) selinux_complete_init(); avc_ss_reset(seqno); selnl_notify_policyload(seqno); + selinux_netlbl_cache_invalidate(); return 0; } @@ -1295,6 +1305,7 @@ int security_load_policy(void *data, size_t len) avc_ss_reset(seqno); selnl_notify_policyload(seqno); + selinux_netlbl_cache_invalidate(); return 0; @@ -2133,3 +2144,480 @@ void selinux_audit_set_callback(int (*callback)(void)) { aurule_callback = callback; } + +#ifdef CONFIG_NETLABEL +/* + * This is the structure we store inside the NetLabel cache block. + */ +#define NETLBL_CACHE(x) ((struct netlbl_cache *)(x)) +#define NETLBL_CACHE_T_NONE 0 +#define NETLBL_CACHE_T_SID 1 +#define NETLBL_CACHE_T_MLS 2 +struct netlbl_cache { + u32 type; + union { + u32 sid; + struct mls_range mls_label; + } data; +}; + +/** + * selinux_netlbl_cache_free - Free the NetLabel cached data + * @data: the data to free + * + * Description: + * This function is intended to be used as the free() callback inside the + * netlbl_lsm_cache structure. + * + */ +static void selinux_netlbl_cache_free(const void *data) +{ + struct netlbl_cache *cache = NETLBL_CACHE(data); + switch (cache->type) { + case NETLBL_CACHE_T_MLS: + ebitmap_destroy(&cache->data.mls_label.level[0].cat); + break; + } + kfree(data); +} + +/** + * selinux_netlbl_cache_add - Add an entry to the NetLabel cache + * @skb: the packet + * @ctx: the SELinux context + * + * Description: + * Attempt to cache the context in @ctx, which was derived from the packet in + * @skb, in the NetLabel subsystem cache. + * + */ +static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) +{ + struct netlbl_cache *cache = NULL; + struct netlbl_lsm_secattr secattr; + + netlbl_secattr_init(&secattr); + + cache = kzalloc(sizeof(*cache), GFP_ATOMIC); + if (cache == NULL) + goto netlbl_cache_add_failure; + secattr.cache.free = selinux_netlbl_cache_free; + secattr.cache.data = (void *)cache; + + cache->type = NETLBL_CACHE_T_MLS; + if (ebitmap_cpy(&cache->data.mls_label.level[0].cat, + &ctx->range.level[0].cat) != 0) + goto netlbl_cache_add_failure; + cache->data.mls_label.level[1].cat.highbit = + cache->data.mls_label.level[0].cat.highbit; + cache->data.mls_label.level[1].cat.node = + cache->data.mls_label.level[0].cat.node; + cache->data.mls_label.level[0].sens = ctx->range.level[0].sens; + cache->data.mls_label.level[1].sens = ctx->range.level[0].sens; + + if (netlbl_cache_add(skb, &secattr) != 0) + goto netlbl_cache_add_failure; + + return; + +netlbl_cache_add_failure: + netlbl_secattr_destroy(&secattr, 1); +} + +/** + * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache + * + * Description: + * Invalidate the NetLabel security attribute mapping cache. + * + */ +void selinux_netlbl_cache_invalidate(void) +{ + netlbl_cache_invalidate(); +} + +/** + * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID + * @skb: the network packet + * @secattr: the NetLabel packet security attributes + * @base_sid: the SELinux SID to use as a context for MLS only attributes + * @sid: the SELinux SID + * + * Description: + * Convert the given NetLabel packet security attributes in @secattr into a + * SELinux SID. If the @secattr field does not contain a full SELinux + * SID/context then use the context in @base_sid as the foundation. If @skb + * is not NULL attempt to cache as much data as possibile. Returns zero on + * success, negative values on failure. + * + */ +static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr, + u32 base_sid, + u32 *sid) +{ + int rc = -EIDRM; + struct context *ctx; + struct context ctx_new; + struct netlbl_cache *cache; + + POLICY_RDLOCK; + + if (secattr->cache.data) { + cache = NETLBL_CACHE(secattr->cache.data); + switch (cache->type) { + case NETLBL_CACHE_T_SID: + *sid = cache->data.sid; + rc = 0; + break; + case NETLBL_CACHE_T_MLS: + ctx = sidtab_search(&sidtab, base_sid); + if (ctx == NULL) + goto netlbl_secattr_to_sid_return; + + ctx_new.user = ctx->user; + ctx_new.role = ctx->role; + ctx_new.type = ctx->type; + ctx_new.range.level[0].sens = + cache->data.mls_label.level[0].sens; + ctx_new.range.level[0].cat.highbit = + cache->data.mls_label.level[0].cat.highbit; + ctx_new.range.level[0].cat.node = + cache->data.mls_label.level[0].cat.node; + ctx_new.range.level[1].sens = + cache->data.mls_label.level[1].sens; + ctx_new.range.level[1].cat.highbit = + cache->data.mls_label.level[1].cat.highbit; + ctx_new.range.level[1].cat.node = + cache->data.mls_label.level[1].cat.node; + + rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + break; + default: + goto netlbl_secattr_to_sid_return; + } + } else if (secattr->mls_lvl_vld) { + ctx = sidtab_search(&sidtab, base_sid); + if (ctx == NULL) + goto netlbl_secattr_to_sid_return; + + ctx_new.user = ctx->user; + ctx_new.role = ctx->role; + ctx_new.type = ctx->type; + mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl); + if (secattr->mls_cat) { + if (mls_import_cat(&ctx_new, + secattr->mls_cat, + secattr->mls_cat_len, + NULL, + 0) != 0) + goto netlbl_secattr_to_sid_return; + ctx_new.range.level[1].cat.highbit = + ctx_new.range.level[0].cat.highbit; + ctx_new.range.level[1].cat.node = + ctx_new.range.level[0].cat.node; + } else { + ebitmap_init(&ctx_new.range.level[0].cat); + ebitmap_init(&ctx_new.range.level[1].cat); + } + if (mls_context_isvalid(&policydb, &ctx_new) != 1) + goto netlbl_secattr_to_sid_return_cleanup; + + rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + if (rc != 0) + goto netlbl_secattr_to_sid_return_cleanup; + + if (skb != NULL) + selinux_netlbl_cache_add(skb, &ctx_new); + ebitmap_destroy(&ctx_new.range.level[0].cat); + } else { + *sid = SECINITSID_UNLABELED; + rc = 0; + } + +netlbl_secattr_to_sid_return: + POLICY_RDUNLOCK; + return rc; +netlbl_secattr_to_sid_return_cleanup: + ebitmap_destroy(&ctx_new.range.level[0].cat); + goto netlbl_secattr_to_sid_return; +} + +/** + * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel + * @skb: the packet + * @base_sid: the SELinux SID to use as a context for MLS only attributes + * @sid: the SID + * + * Description: + * Call the NetLabel mechanism to get the security attributes of the given + * packet and use those attributes to determine the correct context/SID to + * assign to the packet. Returns zero on success, negative values on failure. + * + */ +static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, + u32 base_sid, + u32 *sid) +{ + int rc; + struct netlbl_lsm_secattr secattr; + + netlbl_secattr_init(&secattr); + rc = netlbl_skbuff_getattr(skb, &secattr); + if (rc == 0) + rc = selinux_netlbl_secattr_to_sid(skb, + &secattr, + base_sid, + sid); + netlbl_secattr_destroy(&secattr, 0); + + return rc; +} + +/** + * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism + * @sock: the socket to label + * @sid: the SID to use + * + * Description: + * Attempt to label a socket using the NetLabel mechanism using the given + * SID. Returns zero values on success, negative values on failure. + * + */ +static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid) +{ + int rc = -ENOENT; + struct sk_security_struct *sksec = sock->sk->sk_security; + struct netlbl_lsm_secattr secattr; + struct context *ctx; + + if (!ss_initialized) + return 0; + + POLICY_RDLOCK; + + ctx = sidtab_search(&sidtab, sid); + if (ctx == NULL) + goto netlbl_socket_setsid_return; + + netlbl_secattr_init(&secattr); + secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1], + GFP_ATOMIC); + mls_export_lvl(ctx, &secattr.mls_lvl, NULL); + secattr.mls_lvl_vld = 1; + mls_export_cat(ctx, + &secattr.mls_cat, + &secattr.mls_cat_len, + NULL, + NULL); + + rc = netlbl_socket_setattr(sock, &secattr); + if (rc == 0) + sksec->nlbl_state = NLBL_LABELED; + + netlbl_secattr_destroy(&secattr, 0); + +netlbl_socket_setsid_return: + POLICY_RDUNLOCK; + return rc; +} + +/** + * selinux_netlbl_socket_post_create - Label a socket using NetLabel + * @sock: the socket to label + * @sock_family: the socket family + * @sid: the SID to use + * + * Description: + * Attempt to label a socket using the NetLabel mechanism using the given + * SID. Returns zero values on success, negative values on failure. + * + */ +int selinux_netlbl_socket_post_create(struct socket *sock, + int sock_family, + u32 sid) +{ + struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; + struct sk_security_struct *sksec = sock->sk->sk_security; + + if (sock_family != PF_INET) + return 0; + + sksec->sclass = isec->sclass; + sksec->nlbl_state = NLBL_REQUIRE; + return selinux_netlbl_socket_setsid(sock, sid); +} + +/** + * selinux_netlbl_sock_graft - Netlabel the new socket + * @sk: the new connection + * @sock: the new socket + * + * Description: + * The connection represented by @sk is being grafted onto @sock so set the + * socket's NetLabel to match the SID of @sk. + * + */ +void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) +{ + struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + if (sk->sk_family != PF_INET) + return; + + sksec->nlbl_state = NLBL_REQUIRE; + sksec->peer_sid = sksec->sid; + sksec->sclass = isec->sclass; + + /* Try to set the NetLabel on the socket to save time later, if we fail + * here we will pick up the pieces in later calls to + * selinux_netlbl_inode_permission(). */ + selinux_netlbl_socket_setsid(sock, sksec->sid); +} + +/** + * selinux_netlbl_inet_conn_request - Handle a new connection request + * @skb: the packet + * @sock_sid: the SID of the parent socket + * + * Description: + * If present, use the security attributes of the packet in @skb and the + * parent sock's SID to arrive at a SID for the new child sock. Returns the + * SID of the connection or SECSID_NULL on failure. + * + */ +u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) +{ + int rc; + u32 peer_sid; + + rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid); + if (rc != 0) + return SECSID_NULL; + + if (peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return peer_sid; +} + +/** + * __selinux_netlbl_inode_permission - Label a socket using NetLabel + * @inode: the file descriptor's inode + * @mask: the permission mask + * + * Description: + * Try to label a socket with the inode's SID using NetLabel. Returns zero on + * success, negative values on failure. + * + */ +int __selinux_netlbl_inode_permission(struct inode *inode, int mask) +{ + int rc; + struct socket *sock = SOCKET_I(inode); + struct sk_security_struct *sksec = sock->sk->sk_security; + + lock_sock(sock->sk); + rc = selinux_netlbl_socket_setsid(sock, sksec->sid); + release_sock(sock->sk); + + return rc; +} + +/** + * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel + * @sksec: the sock's sk_security_struct + * @skb: the packet + * @ad: the audit data + * + * Description: + * Fetch the NetLabel security attributes from @skb and perform an access check + * against the receiving socket. Returns zero on success, negative values on + * error. + * + */ +int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, + struct sk_buff *skb, + struct avc_audit_data *ad) +{ + int rc; + u32 netlbl_sid; + u32 recv_perm; + + rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid); + if (rc != 0) + return rc; + + if (netlbl_sid == SECINITSID_UNLABELED) + return 0; + + switch (sksec->sclass) { + case SECCLASS_UDP_SOCKET: + recv_perm = UDP_SOCKET__RECV_MSG; + break; + case SECCLASS_TCP_SOCKET: + recv_perm = TCP_SOCKET__RECV_MSG; + break; + default: + recv_perm = RAWIP_SOCKET__RECV_MSG; + } + + rc = avc_has_perm(sksec->sid, + netlbl_sid, + sksec->sclass, + recv_perm, + ad); + if (rc == 0) + return 0; + + netlbl_skbuff_err(skb, rc); + return rc; +} + +/** + * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket + * @sock: the socket + * + * Description: + * Examine @sock to find the connected peer's SID. Returns the SID on success + * or SECSID_NULL on error. + * + */ +u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) +{ + struct sk_security_struct *sksec = sock->sk->sk_security; + + if (sksec->peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return sksec->peer_sid; +} + +/** + * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet + * @skb: the packet + * + * Description: + * Examine @skb to find the SID assigned to it by NetLabel. Returns the SID on + * success, SECSID_NULL on error. + * + */ +u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) +{ + int peer_sid; + struct sock *sk = skb->sk; + struct inode_security_struct *isec; + + if (sk == NULL || sk->sk_socket == NULL) + return SECSID_NULL; + + isec = SOCK_INODE(sk->sk_socket)->i_security; + if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0) + return SECSID_NULL; + if (peer_sid == SECINITSID_UNLABELED) + return SECSID_NULL; + + return peer_sid; +} +#endif /* CONFIG_NETLABEL */ From 8161327311fe4da1684ed08015e141feb9a0a737 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:50:39 -0700 Subject: [PATCH 0394/1063] [NetLabel]: tie NetLabel into the Kconfig system Modify the net/Kconfig file to enable selecting the NetLabel Kconfig options. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/Kconfig b/net/Kconfig index 4959a4e1e0fe..eb855b7fa642 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -249,6 +249,8 @@ source "net/ieee80211/Kconfig" config WIRELESS_EXT bool +source "net/netlabel/Kconfig" + endif # if NET endmenu # Networking From 5d0bbeeb144f631150881712607345c532e38e7e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:37:36 -0700 Subject: [PATCH 0395/1063] [IPV6]: Remove ndiscs rt6_lock dependency (Ab)using rt6_lock wouldn't work anymore if rt6_lock is converted into a per table lock. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/route.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9baca062d24..ce1f49b595b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -747,8 +747,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -/* Protected by rt6_lock. */ -static struct dst_entry *ndisc_dst_gc_list; static int ipv6_get_mtu(struct net_device *dev); static inline unsigned int ipv6_advmss(unsigned int mtu) @@ -769,6 +767,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } +static struct dst_entry *ndisc_dst_gc_list; +DEFINE_SPINLOCK(ndisc_lock); + struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, struct in6_addr *addr, @@ -809,10 +810,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - write_lock_bh(&rt6_lock); + spin_lock_bh(&ndisc_lock); rt->u.dst.next = ndisc_dst_gc_list; ndisc_dst_gc_list = &rt->u.dst; - write_unlock_bh(&rt6_lock); + spin_unlock_bh(&ndisc_lock); fib6_force_start_gc(); @@ -826,8 +827,11 @@ int ndisc_dst_gc(int *more) int freed; next = NULL; + freed = 0; + + spin_lock_bh(&ndisc_lock); pprev = &ndisc_dst_gc_list; - freed = 0; + while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; @@ -839,6 +843,8 @@ int ndisc_dst_gc(int *more) } } + spin_unlock_bh(&ndisc_lock); + return freed; } From c71099acce933455123ee505cc75964610a209ad Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:20:06 -0700 Subject: [PATCH 0396/1063] [IPV6]: Multiple Routing Tables Adds the framework to support multiple IPv6 routing tables. Currently all automatically generated routes are put into the same table. This could be changed at a later point after considering the produced locking overhead. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 39 +++- include/net/ip6_route.h | 3 +- net/ipv6/Kconfig | 6 + net/ipv6/addrconf.c | 6 +- net/ipv6/ip6_fib.c | 144 +++++++++++- net/ipv6/route.c | 496 +++++++++++++++++++++++++--------------- 6 files changed, 499 insertions(+), 195 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a66e9de16a6c..818411519c89 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -51,6 +51,8 @@ struct rt6key int plen; }; +struct fib6_table; + struct rt6_info { union { @@ -71,6 +73,7 @@ struct rt6_info u32 rt6i_flags; u32 rt6i_metric; atomic_t rt6i_ref; + struct fib6_table *rt6i_table; struct rt6key rt6i_dst; struct rt6key rt6i_src; @@ -143,12 +146,43 @@ struct rt6_statistics { typedef void (*f_pnode)(struct fib6_node *fn, void *); -extern struct fib6_node ip6_routing_table; +struct fib6_table { + struct hlist_node tb6_hlist; + u32 tb6_id; + rwlock_t tb6_lock; + struct fib6_node tb6_root; +}; + +#define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC +#define RT6_TABLE_MAIN RT_TABLE_MAIN +#define RT6_TABLE_LOCAL RT6_TABLE_MAIN +#define RT6_TABLE_DFLT RT6_TABLE_MAIN +#define RT6_TABLE_INFO RT6_TABLE_MAIN +#define RT6_TABLE_PREFIX RT6_TABLE_MAIN + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB6_TABLE_MIN 1 +#define FIB6_TABLE_MAX RT_TABLE_MAX +#else +#define FIB6_TABLE_MIN RT_TABLE_MAIN +#define FIB6_TABLE_MAX FIB6_TABLE_MIN +#endif + +#define RT6_F_STRICT 1 +#define RT6_F_HAS_SADDR 2 + +typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *, + struct flowi *, int); /* * exported functions */ +extern struct fib6_table * fib6_get_table(u32 id); +extern struct fib6_table * fib6_new_table(u32 id); +extern struct dst_entry * fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup); + extern struct fib6_node *fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr); @@ -161,6 +195,9 @@ extern void fib6_clean_tree(struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), int prune, void *arg); +extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), + int prune, void *arg); + extern int fib6_walk(struct fib6_walker_t *w); extern int fib6_walk_continue(struct fib6_walker_t *w); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 96b0e66406ec..d49c8c90eb68 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -58,7 +58,8 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *, void *rtattr, - struct netlink_skb_parms *req); + struct netlink_skb_parms *req, + u32 table_id); extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, void *rtattr, diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 0ba06c0c5d39..159c63d99c81 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -136,3 +136,9 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_MULTIPLE_TABLES + bool "IPv6: Multiple Routing Tables" + depends on IPV6 && EXPERIMENTAL + ---help--- + Support multiple routing tables. + diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c7852b38e03e..318767fcefdc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1525,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) rtmsg.rtmsg_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX); } /* Create "default" multicast route to the interface */ @@ -1542,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev) rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_flags = RTF_UP; rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL); } static void sit_route_add(struct net_device *dev) @@ -1559,7 +1559,7 @@ static void sit_route_add(struct net_device *dev) rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN); } static void addrconf_add_lroute(struct net_device *dev) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 764221220afd..fcd7da830aca 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS #include @@ -147,6 +148,126 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } +static struct fib6_table fib6_main_tbl = { + .tb6_id = RT6_TABLE_MAIN, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + +static struct fib6_table *fib6_alloc_table(u32 id) +{ + struct fib6_table *table; + + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (table != NULL) { + table->tb6_id = id; + table->tb6_lock = RW_LOCK_UNLOCKED; + table->tb6_root.leaf = &ip6_null_entry; + table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + } + + return table; +} + +static void fib6_link_table(struct fib6_table *tb) +{ + unsigned int h; + + h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + + /* + * No protection necessary, this is the only list mutatation + * operation, tables never disappear once they exist. + */ + hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); +} + +struct fib6_table *fib6_new_table(u32 id) +{ + struct fib6_table *tb; + + if (id == 0) + id = RT6_TABLE_MAIN; + tb = fib6_get_table(id); + if (tb) + return tb; + + tb = fib6_alloc_table(id); + if (tb != NULL) + fib6_link_table(tb); + + return tb; +} + +struct fib6_table *fib6_get_table(u32 id) +{ + struct fib6_table *tb; + struct hlist_node *node; + unsigned int h; + + if (id == 0) + id = RT6_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { + if (tb->tb6_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + + return NULL; +} + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + /* + * TODO: Add rule lookup + */ + struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN); + + return (struct dst_entry *) lookup(table, fl, flags); +} + +static void __init fib6_tables_init(void) +{ + fib6_link_table(&fib6_main_tbl); +} + +#else + +struct fib6_table *fib6_new_table(u32 id) +{ + return fib6_get_table(id); +} + +struct fib6_table *fib6_get_table(u32 id) +{ + return &fib6_main_tbl; +} + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); +} + +static void __init fib6_tables_init(void) +{ +} + +#endif + /* * Routing Table @@ -1064,6 +1185,22 @@ void fib6_clean_tree(struct fib6_node *root, fib6_walk(&c.w); } +void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) +{ + int i; + struct fib6_table *table; + + for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) { + table = fib6_get_table(i); + if (table != NULL) { + write_lock_bh(&table->tb6_lock); + fib6_clean_tree(&table->tb6_root, func, prune, arg); + write_unlock_bh(&table->tb6_lock); + } + } +} + static int fib6_prune_clone(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & RTF_CACHE) { @@ -1142,11 +1279,8 @@ void fib6_run_gc(unsigned long dummy) } gc_args.more = 0; - - write_lock_bh(&rt6_lock); ndisc_dst_gc(&gc_args.more); - fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_age, 0, NULL); if (gc_args.more) mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); @@ -1165,6 +1299,8 @@ void __init fib6_init(void) NULL, NULL); if (!fib6_node_kmem) panic("cannot create fib6_nodes cache"); + + fib6_tables_init(); } void fib6_gc_cleanup(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ce1f49b595b0..73efdadb9ab8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -140,16 +140,6 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct fib6_node ip6_routing_table = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, -}; - -/* Protects all the ip6 fib */ - -DEFINE_RWLOCK(rt6_lock); - - /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -188,8 +178,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) time_after(jiffies, rt->rt6i_expires)); } +static inline int rt6_need_strict(struct in6_addr *daddr) +{ + return (ipv6_addr_type(daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); +} + /* - * Route lookup. Any rt6_lock is implied. + * Route lookup. Any table->tb6_lock is implied. */ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, @@ -441,27 +437,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +#define BACKTRACK() \ +if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \ + while ((fn = fn->parent) != NULL) { \ + if (fn->fn_flags & RTN_TL_ROOT) { \ + dst_hold(&rt->u.dst); \ + goto out; \ + } \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ + } \ +} + +static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt; - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, daddr, saddr); - rt = rt6_device_match(fn->leaf, oif, strict); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); +restart: + rt = fn->leaf; + rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); + BACKTRACK(); dst_hold(&rt->u.dst); - rt->u.dst.__use++; - read_unlock_bh(&rt6_lock); +out: + read_unlock_bh(&table->tb6_lock); rt->u.dst.lastuse = jiffies; - if (rt->u.dst.error == 0) - return rt; - dst_release(&rt->u.dst); + rt->u.dst.__use++; + + return rt; + +} + +struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, + int oif, int strict) +{ + struct flowi fl = { + .oif = oif, + .nl_u = { + .ip6_u = { + .daddr = *daddr, + /* TODO: saddr */ + }, + }, + }; + struct dst_entry *dst; + int flags = strict ? RT6_F_STRICT : 0; + + dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + if (dst->error == 0) + return (struct rt6_info *) dst; + + dst_release(dst); + return NULL; } -/* ip6_ins_rt is called with FREE rt6_lock. +/* ip6_ins_rt is called with FREE table->tb6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. @@ -471,10 +506,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); - err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); - write_unlock_bh(&rt6_lock); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req); + write_unlock_bh(&table->tb6_lock); return err; } @@ -532,116 +569,34 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -#define BACKTRACK() \ -if (rt == &ip6_null_entry) { \ - while ((fn = fn->parent) != NULL) { \ - if (fn->fn_flags & RTN_ROOT) { \ - goto out; \ - } \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ -} - - -void ip6_route_input(struct sk_buff *skb) +struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, + int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict; + int strict = 0; int attempts = 3; int err; int reachable = RT6_SELECT_F_REACHABLE; - strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; + if (flags & RT6_F_STRICT) + strict = RT6_SELECT_F_IFACE; relookup: - read_lock_bh(&rt6_lock); + read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); + rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); BACKTRACK(); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); - - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); - else { -#if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); -#else - goto out2; -#endif - } - - dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; - - dst_hold(&rt->u.dst); - if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when rt6_lock was - * released someone could insert this route. Relookup. - */ - dst_release(&rt->u.dst); - goto relookup; - -out: - if (reachable) { - reachable = 0; - goto restart_2; - } - dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); -out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; - skb->dst = (struct dst_entry *) rt; - return; -} - -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) -{ - struct fib6_node *fn; - struct rt6_info *rt, *nrt; - int strict; - int attempts = 3; - int err; - int reachable = RT6_SELECT_F_REACHABLE; - - strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; - -relookup: - read_lock_bh(&rt6_lock); - -restart_2: - fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); - -restart: - rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); - BACKTRACK(); - if (rt == &ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) - goto out; - - dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); @@ -667,7 +622,7 @@ restart: goto out2; /* - * Race condition! In the gap, when rt6_lock was + * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -679,11 +634,116 @@ out: goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - return &rt->u.dst; + + return rt; +} + +void ip6_route_input(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct flowi fl = { + .iif = skb->dev->ifindex, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, + }, + }, + .proto = iph->nexthdr, + }; + int flags = 0; + + if (rt6_need_strict(&iph->daddr)) + flags |= RT6_F_STRICT; + + skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); +} + +static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, + struct flowi *fl, int flags) +{ + struct fib6_node *fn; + struct rt6_info *rt, *nrt; + int strict = 0; + int attempts = 3; + int err; + int reachable = RT6_SELECT_F_REACHABLE; + + if (flags & RT6_F_STRICT) + strict = RT6_SELECT_F_IFACE; + +relookup: + read_lock_bh(&table->tb6_lock); + +restart_2: + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + +restart: + rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); + BACKTRACK(); + if (rt == &ip6_null_entry || + rt->rt6i_flags & RTF_CACHE) + goto out; + + dst_hold(&rt->u.dst); + read_unlock_bh(&table->tb6_lock); + + if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); + else { +#if CLONE_OFFLINK_ROUTE + nrt = rt6_alloc_clone(rt, &fl->fl6_dst); +#else + goto out2; +#endif + } + + dst_release(&rt->u.dst); + rt = nrt ? : &ip6_null_entry; + + dst_hold(&rt->u.dst); + if (nrt) { + err = ip6_ins_rt(nrt, NULL, NULL, NULL); + if (!err) + goto out2; + } + + if (--attempts <= 0) + goto out2; + + /* + * Race condition! In the gap, when table->tb6_lock was + * released someone could insert this route. Relookup. + */ + dst_release(&rt->u.dst); + goto relookup; + +out: + if (reachable) { + reachable = 0; + goto restart_2; + } + dst_hold(&rt->u.dst); + read_unlock_bh(&table->tb6_lock); +out2: + rt->u.dst.lastuse = jiffies; + rt->u.dst.__use++; + return rt; +} + +struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +{ + int flags = 0; + + if (rt6_need_strict(&fl->fl6_dst)) + flags |= RT6_F_STRICT; + + return fib6_rule_lookup(fl, flags, ip6_pol_route_output); } @@ -906,7 +966,8 @@ int ipv6_get_hoplimit(struct net_device *dev) */ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) + void *_rtattr, struct netlink_skb_parms *req, + u32 table_id) { int err; struct rtmsg *r; @@ -914,6 +975,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; + struct fib6_table *table; int addr_type; rta = (struct rtattr **) _rtattr; @@ -937,6 +999,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (rtmsg->rtmsg_metric == 0) rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + table = fib6_new_table(table_id); + if (table == NULL) { + err = -ENOBUFS; + goto out; + } + rt = ip6_dst_alloc(); if (rt == NULL) { @@ -1093,6 +1161,7 @@ install_route: rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; + rt->rt6i_table = table; return ip6_ins_rt(rt, nlh, _rtattr, req); out: @@ -1108,26 +1177,35 @@ out: int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); err = fib6_del(rt, nlh, _rtattr, req); dst_release(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return err; } -static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req, + u32 table_id) { + struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - read_lock_bh(&rt6_lock); + table = fib6_get_table(table_id); + if (table == NULL) + return err; - fn = fib6_locate(&ip6_routing_table, + read_lock_bh(&table->tb6_lock); + + fn = fib6_locate(&table->tb6_root, &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); @@ -1144,12 +1222,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r rtmsg->rtmsg_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); return ip6_del_rt(rt, nlh, _rtattr, req); } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); return err; } @@ -1161,10 +1239,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, int on_link) { struct rt6_info *rt, *nrt = NULL; - int strict; struct fib6_node *fn; + struct fib6_table *table; struct netevent_redirect netevent; + /* TODO: Very lazy, might need to check all tables */ + table = fib6_get_table(RT6_TABLE_MAIN); + if (table == NULL) + return; + /* * Get the "current" route for this destination and * check if the redirect has come from approriate router. @@ -1175,10 +1258,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, * is a bit fuzzy and one might need to check all possible * routes. */ - strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, dest, NULL); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, dest, NULL); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* @@ -1201,7 +1283,7 @@ restart: } if (rt) dst_hold(&rt->u.dst); - else if (strict) { + else if (rt6_need_strict(dest)) { while ((fn = fn->parent) != NULL) { if (fn->fn_flags & RTN_ROOT) break; @@ -1209,7 +1291,7 @@ restart: goto restart; } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt) { if (net_ratelimit()) @@ -1384,6 +1466,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif + rt->rt6i_table = ort->rt6i_table; } return rt; } @@ -1394,9 +1477,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle { struct fib6_node *fn; struct rt6_info *rt = NULL; + struct fib6_table *table; - write_lock_bh(&rt6_lock); - fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); + table = fib6_get_table(RT6_TABLE_INFO); + if (table == NULL) + return NULL; + + write_lock_bh(&table->tb6_lock); + fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1411,7 +1499,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle break; } out: - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1433,7 +1521,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle rtmsg.rtmsg_flags |= RTF_DEFAULT; rtmsg.rtmsg_ifindex = ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1442,12 +1530,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; - struct fib6_node *fn; + struct fib6_table *table; - fn = &ip6_routing_table; + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return NULL; - write_lock_bh(&rt6_lock); - for (rt = fn->leaf; rt; rt=rt->u.next) { + write_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -1455,7 +1545,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d } if (rt) dst_hold(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1474,28 +1564,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT); return rt6_get_dflt_router(gwaddr, dev); } void rt6_purge_dflt_routers(void) { struct rt6_info *rt; + struct fib6_table *table; + + /* NOTE: Keep consistent with rt6_get_dflt_router */ + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return; restart: - read_lock_bh(&rt6_lock); - for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { + read_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); - - read_unlock_bh(&rt6_lock); - + read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt, NULL, NULL, NULL); - goto restart; } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); } int ipv6_route_ioctl(unsigned int cmd, void __user *arg) @@ -1516,10 +1609,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL, NULL, NULL); + err = ip6_route_add(&rtmsg, NULL, NULL, NULL, + RT6_TABLE_MAIN); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL, NULL, NULL); + err = ip6_route_del(&rtmsg, NULL, NULL, NULL, + RT6_TABLE_MAIN); break; default: err = -EINVAL; @@ -1593,6 +1688,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; + rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); @@ -1611,9 +1707,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) void rt6_ifdown(struct net_device *dev) { - write_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_ifdown, 0, dev); } struct rt6_mtu_change_arg @@ -1663,13 +1757,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) void rt6_mtu_change(struct net_device *dev, unsigned mtu) { - struct rt6_mtu_change_arg arg; + struct rt6_mtu_change_arg arg = { + .dev = dev, + .mtu = mtu, + }; - arg.dev = dev; - arg.mtu = mtu; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_mtu_change_route, 0, &arg); } static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, @@ -1719,7 +1812,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1729,7 +1822,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); } struct rt6_rtnl_dump_arg @@ -1761,6 +1854,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; + if (rt->rt6i_table) + rtm->rtm_table = rt->rt6i_table->tb6_id; + else + rtm->rtm_table = RT6_TABLE_UNSPEC; rtm->rtm_table = RT_TABLE_MAIN; if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; @@ -1868,7 +1965,6 @@ static void fib6_dump_end(struct netlink_callback *cb) if (w) { cb->args[0] = 0; - fib6_walker_unlink(w); kfree(w); } cb->done = (void*)cb->args[1]; @@ -1883,13 +1979,20 @@ static int fib6_dump_done(struct netlink_callback *cb) int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct fib6_table *table; struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; - int res; + int i, res = 0; arg.skb = skb; arg.cb = cb; + /* + * cb->args[0] = pointer to walker structure + * cb->args[1] = saved cb->done() pointer + * cb->args[2] = current table being dumped + */ + w = (void*)cb->args[0]; if (w == NULL) { /* New dump: @@ -1905,24 +2008,48 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) w = kzalloc(sizeof(*w), GFP_ATOMIC); if (w == NULL) return -ENOMEM; - RT6_TRACE("dump<%p", w); - w->root = &ip6_routing_table; w->func = fib6_dump_node; w->args = &arg; cb->args[0] = (long)w; - read_lock_bh(&rt6_lock); - res = fib6_walk(w); - read_unlock_bh(&rt6_lock); + cb->args[2] = FIB6_TABLE_MIN; } else { w->args = &arg; - read_lock_bh(&rt6_lock); - res = fib6_walk_continue(w); - read_unlock_bh(&rt6_lock); + i = cb->args[2]; + if (i > FIB6_TABLE_MAX) + goto end; + + table = fib6_get_table(i); + if (table != NULL) { + read_lock_bh(&table->tb6_lock); + w->root = &table->tb6_root; + res = fib6_walk_continue(w); + read_unlock_bh(&table->tb6_lock); + if (res != 0) { + if (res < 0) + fib6_walker_unlink(w); + goto end; + } + } + + fib6_walker_unlink(w); + cb->args[2] = ++i; } -#if RT6_DEBUG >= 3 - if (res <= 0 && skb->len == 0) - RT6_TRACE("%p>dump end\n", w); -#endif + + for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) { + table = fib6_get_table(i); + if (table == NULL) + continue; + + read_lock_bh(&table->tb6_lock); + w->root = &table->tb6_root; + res = fib6_walk(w); + read_unlock_bh(&table->tb6_lock); + if (res) + break; + } +end: + cb->args[2] = i; + res = res < 0 ? res : skb->len; /* res < 0 is an error. (really, impossible) res == 0 means that dump is complete, but skb still can contain data. @@ -2102,16 +2229,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) { - struct rt6_proc_arg arg; - arg.buffer = buffer; - arg.offset = offset; - arg.length = length; - arg.skip = 0; - arg.len = 0; + struct rt6_proc_arg arg = { + .buffer = buffer, + .offset = offset, + .length = length, + }; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_info_route, 0, &arg); *start = buffer; if (offset) From 14c0b97ddfc2944982d078b8e33b088840068976 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:38:38 -0700 Subject: [PATCH 0397/1063] [NET]: Protocol Independant Policy Routing Rules Framework Derived from net/ipv/fib_rules.c Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 60 ++++++ include/net/fib_rules.h | 90 +++++++++ net/Kconfig | 3 + net/core/Makefile | 1 + net/core/fib_rules.c | 416 ++++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 9 +- 6 files changed, 577 insertions(+), 2 deletions(-) create mode 100644 include/linux/fib_rules.h create mode 100644 include/net/fib_rules.h create mode 100644 net/core/fib_rules.c diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h new file mode 100644 index 000000000000..5e503f0ca6e4 --- /dev/null +++ b/include/linux/fib_rules.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_FIB_RULES_H +#define __LINUX_FIB_RULES_H + +#include +#include + +/* rule is permanent, and cannot be deleted */ +#define FIB_RULE_PERMANENT 1 + +struct fib_rule_hdr +{ + __u8 family; + __u8 dst_len; + __u8 src_len; + __u8 tos; + + __u8 table; + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + + __u32 flags; +}; + +enum +{ + FRA_UNSPEC, + FRA_DST, /* destination address */ + FRA_SRC, /* source address */ + FRA_IFNAME, /* interface name */ + FRA_UNUSED1, + FRA_UNUSED2, + FRA_PRIORITY, /* priority/preference */ + FRA_UNUSED3, + FRA_UNUSED4, + FRA_UNUSED5, + FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FLOW, /* flow/class id */ + __FRA_MAX +}; + +#define FRA_MAX (__FRA_MAX - 1) + +enum +{ + FR_ACT_UNSPEC, + FR_ACT_TO_TBL, /* Pass to fixed table */ + FR_ACT_RES1, + FR_ACT_RES2, + FR_ACT_RES3, + FR_ACT_RES4, + FR_ACT_BLACKHOLE, /* Drop without notification */ + FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT, /* Drop with EACCES */ + __FR_ACT_MAX, +}; + +#define FR_ACT_MAX (__FR_ACT_MAX - 1) + +#endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h new file mode 100644 index 000000000000..61375d9e53f8 --- /dev/null +++ b/include/net/fib_rules.h @@ -0,0 +1,90 @@ +#ifndef __NET_FIB_RULES_H +#define __NET_FIB_RULES_H + +#include +#include +#include +#include +#include + +struct fib_rule +{ + struct list_head list; + atomic_t refcnt; + int ifindex; + char ifname[IFNAMSIZ]; + u32 pref; + u32 flags; + u32 table; + u8 action; + struct rcu_head rcu; +}; + +struct fib_lookup_arg +{ + void *lookup_ptr; + void *result; + struct fib_rule *rule; +}; + +struct fib_rules_ops +{ + int family; + struct list_head list; + int rule_size; + + int (*action)(struct fib_rule *, + struct flowi *, int, + struct fib_lookup_arg *); + int (*match)(struct fib_rule *, + struct flowi *, int); + int (*configure)(struct fib_rule *, + struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *, + struct nlattr **); + int (*compare)(struct fib_rule *, + struct fib_rule_hdr *, + struct nlattr **); + int (*fill)(struct fib_rule *, struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *); + u32 (*default_pref)(void); + + int nlgroup; + struct nla_policy *policy; + struct list_head *rules_list; + struct module *owner; +}; + +static inline void fib_rule_get(struct fib_rule *rule) +{ + atomic_inc(&rule->refcnt); +} + +static inline void fib_rule_put_rcu(struct rcu_head *head) +{ + struct fib_rule *rule = container_of(head, struct fib_rule, rcu); + kfree(rule); +} + +static inline void fib_rule_put(struct fib_rule *rule) +{ + if (atomic_dec_and_test(&rule->refcnt)) + call_rcu(&rule->rcu, fib_rule_put_rcu); +} + +extern int fib_rules_register(struct fib_rules_ops *); +extern int fib_rules_unregister(struct fib_rules_ops *); + +extern int fib_rules_lookup(struct fib_rules_ops *, + struct flowi *, int flags, + struct fib_lookup_arg *); + +extern int fib_nl_newrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_nl_delrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_rules_dump(struct sk_buff *, + struct netlink_callback *, int); +#endif diff --git a/net/Kconfig b/net/Kconfig index eb855b7fa642..6528a935622c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -251,6 +251,9 @@ config WIRELESS_EXT source "net/netlabel/Kconfig" +config FIB_RULES + bool + endif # if NET endmenu # Networking diff --git a/net/core/Makefile b/net/core/Makefile index 2645ba428d48..119568077dab 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o +obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c new file mode 100644 index 000000000000..6cdad24038e2 --- /dev/null +++ b/net/core/fib_rules.c @@ -0,0 +1,416 @@ +/* + * net/core/fib_rules.c Generic Routing Rules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include + +static LIST_HEAD(rules_ops); +static DEFINE_SPINLOCK(rules_mod_lock); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops); + +static struct fib_rules_ops *lookup_rules_ops(int family) +{ + struct fib_rules_ops *ops; + + rcu_read_lock(); + list_for_each_entry_rcu(ops, &rules_ops, list) { + if (ops->family == family) { + if (!try_module_get(ops->owner)) + ops = NULL; + rcu_read_unlock(); + return ops; + } + } + rcu_read_unlock(); + + return NULL; +} + +static void rules_ops_put(struct fib_rules_ops *ops) +{ + if (ops) + module_put(ops->owner); +} + +int fib_rules_register(struct fib_rules_ops *ops) +{ + int err = -EEXIST; + struct fib_rules_ops *o; + + if (ops->rule_size < sizeof(struct fib_rule)) + return -EINVAL; + + if (ops->match == NULL || ops->configure == NULL || + ops->compare == NULL || ops->fill == NULL || + ops->action == NULL) + return -EINVAL; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) + if (ops->family == o->family) + goto errout; + + list_add_tail_rcu(&ops->list, &rules_ops); + err = 0; +errout: + spin_unlock(&rules_mod_lock); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_register); + +static void cleanup_ops(struct fib_rules_ops *ops) +{ + struct fib_rule *rule, *tmp; + + list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_del_rcu(&rule->list); + fib_rule_put(rule); + } +} + +int fib_rules_unregister(struct fib_rules_ops *ops) +{ + int err = 0; + struct fib_rules_ops *o; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + cleanup_ops(ops); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&rules_mod_lock); + + synchronize_rcu(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_unregister); + +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, + int flags, struct fib_lookup_arg *arg) +{ + struct fib_rule *rule; + int err; + + rcu_read_lock(); + + list_for_each_entry_rcu(rule, ops->rules_list, list) { + if (rule->ifindex && (rule->ifindex != fl->iif)) + continue; + + if (!ops->match(rule, fl, flags)) + continue; + + err = ops->action(rule, fl, flags, arg); + if (err != -EAGAIN) { + fib_rule_get(rule); + arg->rule = rule; + goto out; + } + } + + err = -ENETUNREACH; +out: + rcu_read_unlock(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_lookup); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *r, *last = NULL; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ) + goto errout; + + rule = kzalloc(ops->rule_size, GFP_KERNEL); + if (rule == NULL) { + err = -ENOMEM; + goto errout; + } + + if (tb[FRA_PRIORITY]) + rule->pref = nla_get_u32(tb[FRA_PRIORITY]); + + if (tb[FRA_IFNAME]) { + struct net_device *dev; + + rule->ifindex = -1; + if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME], + IFNAMSIZ) >= IFNAMSIZ) + goto errout_free; + + dev = __dev_get_by_name(rule->ifname); + if (dev) + rule->ifindex = dev->ifindex; + } + + rule->action = frh->action; + rule->flags = frh->flags; + rule->table = frh->table; + + if (!rule->pref && ops->default_pref) + rule->pref = ops->default_pref(); + + err = ops->configure(rule, skb, nlh, frh, tb); + if (err < 0) + goto errout_free; + + list_for_each_entry(r, ops->rules_list, list) { + if (r->pref > rule->pref) + break; + last = r; + } + + fib_rule_get(rule); + + if (last) + list_add_rcu(&rule->list, &last->list); + else + list_add_rcu(&rule->list, ops->rules_list); + + notify_rule_change(RTM_NEWRULE, rule, ops); + rules_ops_put(ops); + return 0; + +errout_free: + kfree(rule); +errout: + rules_ops_put(ops); + return err; +} + +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + list_for_each_entry(rule, ops->rules_list, list) { + if (frh->action && (frh->action != rule->action)) + continue; + + if (frh->table && (frh->table != rule->table)) + continue; + + if (tb[FRA_PRIORITY] && + (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) + continue; + + if (tb[FRA_IFNAME] && + nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + continue; + + if (!ops->compare(rule, frh, tb)) + continue; + + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; + } + + list_del_rcu(&rule->list); + synchronize_rcu(); + notify_rule_change(RTM_DELRULE, rule, ops); + fib_rule_put(rule); + rules_ops_put(ops); + return 0; + } + + err = -ENOENT; +errout: + rules_ops_put(ops); + return err; +} + +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, + u32 pid, u32 seq, int type, int flags, + struct fib_rules_ops *ops) +{ + struct nlmsghdr *nlh; + struct fib_rule_hdr *frh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); + if (nlh == NULL) + return -1; + + frh = nlmsg_data(nlh); + frh->table = rule->table; + frh->res1 = 0; + frh->res2 = 0; + frh->action = rule->action; + frh->flags = rule->flags; + + if (rule->ifname[0]) + NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + + if (rule->pref) + NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + + if (ops->fill(rule, skb, nlh, frh) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); +} + +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) +{ + int idx = 0; + struct fib_rule *rule; + struct fib_rules_ops *ops; + + ops = lookup_rules_ops(family); + if (ops == NULL) + return -EAFNOSUPPORT; + + rcu_read_lock(); + list_for_each_entry(rule, ops->rules_list, list) { + if (idx < cb->args[0]) + goto skip; + + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops) < 0) + break; +skip: + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + rules_ops_put(ops); + + return skb->len; +} + +EXPORT_SYMBOL_GPL(fib_rules_dump); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops) +{ + int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + + if (skb == NULL) + netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS); + else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL); + } else + netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL); +} + +static void attach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) { + if (rule->ifindex == -1 && + strcmp(dev->name, rule->ifname) == 0) + rule->ifindex = dev->ifindex; + } +} + +static void detach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) + if (rule->ifindex == dev->ifindex) + rule->ifindex = -1; +} + + +static int fib_rules_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + rcu_read_lock(); + + switch (event) { + case NETDEV_REGISTER: + list_for_each_entry(ops, &rules_ops, list) + attach_rules(ops->rules_list, dev); + break; + + case NETDEV_UNREGISTER: + list_for_each_entry(ops, &rules_ops, list) + detach_rules(ops->rules_list, dev); + break; + } + + rcu_read_unlock(); + + return NOTIFY_DONE; +} + +static struct notifier_block fib_rules_notifier = { + .notifier_call = fib_rules_event, +}; + +static int __init fib_rules_init(void) +{ + return register_netdevice_notifier(&fib_rules_notifier); +} + +subsys_initcall(fib_rules_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 30cc1ba6ed5c..aa7cff2257b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #ifdef CONFIG_NET_WIRELESS_RTNETLINK #include @@ -103,7 +104,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -120,7 +121,7 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, - [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, + [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, @@ -757,6 +758,10 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, +#ifdef CONFIG_FIB_RULES + [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, + [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, +#endif [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, From 101367c2f8c464ea96643192673aa18d88e6336d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:02 -0700 Subject: [PATCH 0398/1063] [IPV6]: Policy Routing Rules Adds support for policy routing rules including a new local table for routes with a local destination. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 + include/net/ip6_fib.h | 9 +- include/net/ip6_route.h | 5 + net/ipv6/Kconfig | 1 + net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 1 + net/ipv6/fib6_rules.c | 251 ++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_fib.c | 21 ++-- net/ipv6/route.c | 50 ++++++++ 9 files changed, 329 insertions(+), 12 deletions(-) create mode 100644 net/ipv6/fib6_rules.c diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee37b76..bf353538ae93 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -889,6 +889,8 @@ enum rtnetlink_groups { RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 818411519c89..7b47e8d5a765 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -155,7 +155,6 @@ struct fib6_table { #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN -#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN @@ -163,9 +162,11 @@ struct fib6_table { #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX +#define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN +#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif #define RT6_F_STRICT 1 @@ -221,5 +222,11 @@ extern void fib6_run_gc(unsigned long dummy); extern void fib6_gc_cleanup(void); extern void fib6_init(void); + +extern void fib6_rules_init(void); +extern void fib6_rules_cleanup(void); +extern int fib6_rules_dump(struct sk_buff *, + struct netlink_callback *); + #endif #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d49c8c90eb68..9bfa3cc6cedb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -41,6 +41,11 @@ struct pol_chain { extern struct rt6_info ip6_null_entry; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +extern struct rt6_info ip6_prohibit_entry; +extern struct rt6_info ip6_blk_hole_entry; +#endif + extern int ip6_rt_gc_interval; extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 159c63d99c81..36a6c2b79889 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -139,6 +139,7 @@ config IPV6_TUNNEL config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on IPV6 && EXPERIMENTAL + select FIB_RULES ---help--- Support multiple routing tables. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 386e0a626948..9eebf6091279 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -13,6 +13,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o +ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 318767fcefdc..ed766eebc022 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3528,6 +3528,7 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c new file mode 100644 index 000000000000..c3c8195744ee --- /dev/null +++ b/net/ipv6/fib6_rules.c @@ -0,0 +1,251 @@ +/* + * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors + * Thomas Graf + * Ville Nuorvala + */ + +#include +#include + +#include +#include +#include +#include + +struct fib6_rule +{ + struct fib_rule common; + struct rt6key src; + struct rt6key dst; + u8 tclass; +}; + +static struct fib_rules_ops fib6_rules_ops; + +static struct fib6_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_MAIN, + }, +}; + +static struct fib6_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_LOCAL, + .flags = FIB_RULE_PERMANENT, + }, +}; + +static LIST_HEAD(fib6_rules); + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + }; + + fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + if (arg.rule) + fib_rule_put(arg.rule); + + return (struct dst_entry *) arg.result; +} + +int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) +{ + struct rt6_info *rt = NULL; + struct fib6_table *table; + pol_lookup_t lookup = arg->lookup_ptr; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + rt = &ip6_null_entry; + goto discard_pkt; + default: + case FR_ACT_BLACKHOLE: + rt = &ip6_blk_hole_entry; + goto discard_pkt; + case FR_ACT_PROHIBIT: + rt = &ip6_prohibit_entry; + goto discard_pkt; + } + + table = fib6_get_table(rule->table); + if (table) + rt = lookup(table, flp, flags); + + if (rt != &ip6_null_entry) + goto out; + + dst_release(&rt->u.dst); +discard_pkt: + dst_hold(&rt->u.dst); +out: + arg->result = rt; + return rt == NULL ? -EAGAIN : 0; +} + + +static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib6_rule *r = (struct fib6_rule *) rule; + + if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + return 0; + + if ((flags & RT6_F_HAS_SADDR) && + !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) + return 0; + + return 1; +} + +static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, + [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, +}; + +static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len > 128 || frh->dst_len > 128 || + (frh->tos & ~IPV6_FLOWINFO_MASK)) + goto errout; + + if (rule->action == FR_ACT_TO_TBL) { + if (rule->table == RT6_TABLE_UNSPEC) + goto errout; + + if (fib6_new_table(rule->table) == NULL) { + err = -ENOBUFS; + goto errout; + } + } + + if (tb[FRA_SRC]) + nla_memcpy(&rule6->src.addr, tb[FRA_SRC], + sizeof(struct in6_addr)); + + if (tb[FRA_DST]) + nla_memcpy(&rule6->dst.addr, tb[FRA_DST], + sizeof(struct in6_addr)); + + rule6->src.plen = frh->src_len; + rule6->dst.plen = frh->dst_len; + rule6->tclass = frh->tos; + + err = 0; +errout: + return err; +} + +static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len && (rule6->src.plen != frh->src_len)) + return 0; + + if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) + return 0; + + if (frh->tos && (rule6->tclass != frh->tos)) + return 0; + + if (tb[FRA_SRC] && + nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) + return 0; + + if (tb[FRA_DST] && + nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) + return 0; + + return 1; +} + +static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + frh->family = AF_INET6; + frh->dst_len = rule6->dst.plen; + frh->src_len = rule6->src.plen; + frh->tos = rule6->tclass; + + if (rule6->dst.plen) + NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), + &rule6->dst.addr); + + if (rule6->src.plen) + NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), + &rule6->src.addr); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET6); +} + +static u32 fib6_rule_default_pref(void) +{ + return 0x3FFF; +} + +static struct fib_rules_ops fib6_rules_ops = { + .family = AF_INET6, + .rule_size = sizeof(struct fib6_rule), + .action = fib6_rule_action, + .match = fib6_rule_match, + .configure = fib6_rule_configure, + .compare = fib6_rule_compare, + .fill = fib6_rule_fill, + .default_pref = fib6_rule_default_pref, + .nlgroup = RTNLGRP_IPV6_RULE, + .policy = fib6_rule_policy, + .rules_list = &fib6_rules, + .owner = THIS_MODULE, +}; + +void __init fib6_rules_init(void) +{ + list_add_tail(&local_rule.common.list, &fib6_rules); + list_add_tail(&main_rule.common.list, &fib6_rules); + + fib_rules_register(&fib6_rules_ops); +} + +void fib6_rules_cleanup(void) +{ + fib_rules_unregister(&fib6_rules_ops); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fcd7da830aca..ce226c14bef5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -159,6 +159,15 @@ static struct fib6_table fib6_main_tbl = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES +static struct fib6_table fib6_local_tbl = { + .tb6_id = RT6_TABLE_LOCAL, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; @@ -228,20 +237,10 @@ struct fib6_table *fib6_get_table(u32 id) return NULL; } -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) -{ - /* - * TODO: Add rule lookup - */ - struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN); - - return (struct dst_entry *) lookup(table, fl, flags); -} - static void __init fib6_tables_init(void) { fib6_link_table(&fib6_main_tbl); + fib6_link_table(&fib6_local_tbl); } #else diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 73efdadb9ab8..438977e2085d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -140,6 +140,50 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + +struct rt6_info ip6_prohibit_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_prohibit_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +struct rt6_info ip6_blk_hole_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_blk_hole_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +#endif + /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -2408,10 +2452,16 @@ void __init ip6_route_init(void) #ifdef CONFIG_XFRM xfrm6_init(); #endif +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_init(); +#endif } void ip6_route_cleanup(void) { +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_cleanup(); +#endif #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); proc_net_remove("rt6_stats"); From e1ef4bf23b1ced0bf78a1c98289f746486e5c912 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:22 -0700 Subject: [PATCH 0399/1063] [IPV4]: Use Protocol Independant Policy Routing Rules Framework Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 14 +- net/ipv4/Kconfig | 1 + net/ipv4/devinet.c | 4 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_rules.c | 663 ++++++++++++++++------------------------ 5 files changed, 278 insertions(+), 406 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a095d1dec7a4..14c82e611c95 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -18,6 +18,7 @@ #include #include +#include /* WARNING: The ordering of these elements must match ordering * of RTA_* rtnetlink attribute numbers. @@ -203,9 +204,8 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; -extern int fib_lookup(const struct flowi *flp, struct fib_result *res); +extern int fib_lookup(struct flowi *flp, struct fib_result *res); extern struct fib_table *__fib_new_table(int id); -extern void fib_rule_put(struct fib_rule *r); static inline struct fib_table *fib_get_table(int id) { @@ -251,15 +251,15 @@ extern u32 __fib_res_prefsrc(struct fib_result *res); extern struct fib_table *fib_hash_init(int id); #ifdef CONFIG_IP_MULTIPLE_TABLES -/* Exported by fib_rules.c */ +extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); + +extern void __init fib4_rules_init(void); +extern void __exit fib4_rules_cleanup(void); -extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); -extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); -extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); #endif -extern void fib_rules_init(void); + #endif static inline void fib_combine_itag(u32 *itag, struct fib_result *res) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 3b5d504a74be..1650b64415aa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -88,6 +88,7 @@ config IP_FIB_HASH config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER + select FIB_RULES ---help--- Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a6cc31d911eb..9f3ffbec3296 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1151,9 +1151,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, - [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, #endif }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ba2a70745a63..fe4a53d4d10d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -656,7 +656,7 @@ void __init ip_fib_init(void) ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); #else - fib_rules_init(); + fib4_rules_init(); #endif register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 79b04718bdfd..23ec6ae1a0f6 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -5,9 +5,8 @@ * * IPv4 Forwarding Information Base: policy rules. * - * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, + * Thomas Graf * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,129 +18,154 @@ * Marc Boucher : routing by fwmark */ -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include #include #include #include - #include -#include #include #include -#include #include +#include -#define FRprintk(a...) +static struct fib_rules_ops fib4_rules_ops; -struct fib_rule +struct fib4_rule { - struct hlist_node hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - u32 r_src; - u32 r_srcmask; - u32 r_dst; - u32 r_dstmask; - u32 r_srcmap; - u8 r_flags; - u8 r_tos; + struct fib_rule common; + u8 dst_len; + u8 src_len; + u8 tos; + u32 src; + u32 srcmask; + u32 dst; + u32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; #endif - int r_ifindex; #ifdef CONFIG_NET_CLS_ROUTE - __u32 r_tclassid; + u32 tclassid; #endif - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; -static struct fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFF, - .r_table = RT_TABLE_DEFAULT, - .r_action = RTN_UNICAST, +static struct fib4_rule default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule main_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFE, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST, +static struct fib4_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule local_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_table = RT_TABLE_LOCAL, - .r_action = RTN_UNICAST, +static struct fib4_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, }; -static struct hlist_head fib_rules; +static LIST_HEAD(fib4_rules); -/* writer func called from netlink -- rtnl_sem hold*/ - -static void rtmsg_rule(int, struct fib_rule *); - -int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && - rtm->rtm_tos == r->r_tos && -#ifdef CONFIG_IP_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && + return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; +} #endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - err = -EPERM; - if (r == &local_rule) - break; - hlist_del_rcu(&r->hlist); - r->r_dead = 1; - rtmsg_rule(RTM_DELRULE, r); - fib_rule_put(r); - err = 0; - break; - } - } +int fib_lookup(struct flowi *flp, struct fib_result *res) +{ + struct fib_lookup_arg arg = { + .result = res, + }; + int err; + + err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + res->r = arg.rule; + return err; } -/* Allocate new unique table id */ +int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, + struct fib_lookup_arg *arg) +{ + int err = -EAGAIN; + struct fib_table *tbl; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; + } + + if ((tbl = fib_get_table(rule->table)) == NULL) + goto errout; + + err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; +} + + +void fib_select_default(const struct flowi *flp, struct fib_result *res) +{ + if (res->r && res->r->action == FR_ACT_TO_TBL && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; + if ((tb = fib_get_table(res->r->table)) != NULL) + tb->tb_select_default(tb, flp, res); + } +} + +static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib4_rule *r = (struct fib4_rule *) rule; + u32 daddr = fl->fl4_dst; + u32 saddr = fl->fl4_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; + + if (r->tos && (r->tos != fl->fl4_tos)) + return 0; + +#ifdef CONFIG_IP_ROUTE_FWMARK + if (r->fwmark && (r->fwmark != fl->fl4_fwmark)) + return 0; +#endif + + return 1; +} static struct fib_table *fib_empty_table(void) { @@ -153,329 +177,178 @@ static struct fib_table *fib_empty_table(void) return NULL; } -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *r = container_of(head, struct fib_rule, rcu); - kfree(r); -} - -void fib_rule_put(struct fib_rule *r) -{ - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, fib_rule_put_rcu); - else - printk("Freeing alive rule %p\n", r); - } -} - -/* writer func called from netlink -- rtnl_sem hold*/ - -int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) -{ - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || - (rtm->rtm_tos & ~IPTOS_TOS_MASK)) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct fib_table *table; - if (rtm->rtm_type == RTN_UNICAST) { - if ((table = fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = table->tb_id; - } - } - - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); - new_r->r_tos = rtm->rtm_tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); -#endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = __dev_get_by_name(new_r->r_ifname); - if (dev) - new_r->r_ifindex = dev->ifindex; - } -#ifdef CONFIG_NET_CLS_ROUTE - if (rta[RTA_FLOW-1]) - memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); -#endif - r = container_of(fib_rules.first, struct fib_rule, hlist); - - if (!new_r->r_preference) { - if (r && r->hlist.next != NULL) { - r = container_of(r->hlist.next, struct fib_rule, hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; - } - } - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); - - if (last) - hlist_add_after_rcu(&last->hlist, &new_r->hlist); - else - hlist_add_before_rcu(&new_r->hlist, &r->hlist); - - rtmsg_rule(RTM_NEWRULE, new_r); - return 0; -} - -#ifdef CONFIG_NET_CLS_ROUTE -u32 fib_rules_tclass(struct fib_result *res) -{ - if (res->r) - return res->r->r_tclassid; - return 0; -} -#endif - -/* callers should hold rtnl semaphore */ - -static void fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; - - } -} - -/* callers should hold rtnl semaphore */ - -static void fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } -} - -int fib_lookup(const struct flowi *flp, struct fib_result *res) -{ - int err; - struct fib_rule *r, *policy; - struct fib_table *tb; - struct hlist_node *node; - - u32 daddr = flp->fl4_dst; - u32 saddr = flp->fl4_src; - -FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", - NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); - - rcu_read_lock(); - - hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || - (r->r_tos && r->r_tos != flp->fl4_tos) || -#ifdef CONFIG_IP_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || -#endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - -FRprintk("tb %d r %d ", r->r_table, r->r_action); - switch (r->r_action) { - case RTN_UNICAST: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - - if ((tb = fib_get_table(r->r_table)) == NULL) - continue; - err = tb->tb_lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } -FRprintk("FAILURE\n"); - rcu_read_unlock(); - return -ENETUNREACH; -} - -void fib_select_default(const struct flowi *flp, struct fib_result *res) -{ - if (res->r && res->r->r_action == RTN_UNICAST && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(res->r->r_table)) != NULL) - tb->tb_select_default(tb, flp, res); - } -} - -static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - if (event == NETDEV_UNREGISTER) - fib_rules_detach(dev); - else if (event == NETDEV_REGISTER) - fib_rules_attach(dev); - return NOTIFY_DONE; -} - - -static struct notifier_block fib_rules_notifier = { - .notifier_call =fib_rules_event, +static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U32 }, + [FRA_DST] = { .type = NLA_U32 }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FLOW] = { .type = NLA_U32 }, }; -static __inline__ int inet_fill_rule(struct sk_buff *skb, - struct fib_rule *r, - u32 pid, u32 seq, int event, - unsigned int flags) +static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + int err = -EINVAL; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + if (frh->src_len > 32 || frh->dst_len > 32 || + (frh->tos & ~IPTOS_TOS_MASK)) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct fib_table *table; + + table = fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + + rule->table = table->tb_id; + } + } + + if (tb[FRA_SRC]) + rule4->src = nla_get_u32(tb[FRA_SRC]); + + if (tb[FRA_DST]) + rule4->dst = nla_get_u32(tb[FRA_DST]); - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_INET; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = r->r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); + if (tb[FRA_FWMARK]) + rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); #endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 4, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 4, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); #ifdef CONFIG_NET_CLS_ROUTE - if (r->r_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); + if (tb[FRA_FLOW]) + rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); #endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + rule4->src_len = frh->src_len; + rule4->srcmask = inet_make_mask(rule4->src_len); + rule4->dst_len = frh->dst_len; + rule4->dstmask = inet_make_mask(rule4->dst_len); + rule4->tos = frh->tos; + + err = 0; +errout: + return err; } -/* callers should hold rtnl semaphore */ - -static void rtmsg_rule(int event, struct fib_rule *r) +static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); - else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { - kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); + if (frh->src_len && (rule4->src_len != frh->src_len)) + return 0; + + if (frh->dst_len && (rule4->dst_len != frh->dst_len)) + return 0; + + if (frh->tos && (rule4->tos != frh->tos)) + return 0; + +#ifdef CONFIG_IP_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; +#endif + +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) + return 0; +#endif + + if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC]))) + return 0; + + if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST]))) + return 0; + + return 1; +} + +static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + frh->family = AF_INET; + frh->dst_len = rule4->dst_len; + frh->src_len = rule4->src_len; + frh->tos = rule4->tos; + +#ifdef CONFIG_IP_ROUTE_FWMARK + if (rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); +#endif + + if (rule4->dst_len) + NLA_PUT_U32(skb, FRA_DST, rule4->dst); + + if (rule4->src_len) + NLA_PUT_U32(skb, FRA_SRC, rule4->src); + +#ifdef CONFIG_NET_CLS_ROUTE + if (rule4->tclassid) + NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); +#endif + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET); +} + +static u32 fib4_rule_default_pref(void) +{ + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&fib4_rules)) { + pos = fib4_rules.next; + if (pos->next != &fib4_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } + + return 0; } -int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +static struct fib_rules_ops fib4_rules_ops = { + .family = AF_INET, + .rule_size = sizeof(struct fib4_rule), + .action = fib4_rule_action, + .match = fib4_rule_match, + .configure = fib4_rule_configure, + .compare = fib4_rule_compare, + .fill = fib4_rule_fill, + .default_pref = fib4_rule_default_pref, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, + .rules_list = &fib4_rules, + .owner = THIS_MODULE, +}; + +void __init fib4_rules_init(void) { - int idx = 0; - int s_idx = cb->args[0]; - struct fib_rule *r; - struct hlist_node *node; + list_add_tail(&local_rule.common.list, &fib4_rules); + list_add_tail(&main_rule.common.list, &fib4_rules); + list_add_tail(&default_rule.common.list, &fib4_rules); - rcu_read_lock(); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (idx < s_idx) - goto next; - if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWRULE, NLM_F_MULTI) < 0) - break; -next: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - - return skb->len; + fib_rules_register(&fib4_rules_ops); } -void __init fib_rules_init(void) +void __exit fib4_rules_cleanup(void) { - INIT_HLIST_HEAD(&fib_rules); - hlist_add_head(&local_rule.hlist, &fib_rules); - hlist_add_after(&local_rule.hlist, &main_rule.hlist); - hlist_add_after(&main_rule.hlist, &default_rule.hlist); - register_netdevice_notifier(&fib_rules_notifier); + fib_rules_unregister(&fib4_rules_ops); } From fe4944e59c357f945f81bc67edb7ed1392e875ad Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:03:05 -0700 Subject: [PATCH 0400/1063] [NETLINK]: Extend netlink messaging interface Adds: nlmsg_get_pos() return current position in message nlmsg_trim() trim part of message nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr nla_put_nohdr(skb, len, data) add attribute w/o hdr nla_find_nested() find attribute in nested attributes Fixes nlmsg_new() to take allocation flags and consider size. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 74 ++++++++++++++++++++++++++++++++++------ kernel/taskstats.c | 2 +- net/netlink/attr.c | 75 +++++++++++++++++++++++++++++++++++++++++ net/netlink/genetlink.c | 2 +- 4 files changed, 141 insertions(+), 12 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 640c26a90cf1..3a5e40b1e045 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -35,6 +35,8 @@ * nlmsg_put() add a netlink message to an skb * nlmsg_put_answer() callback based nlmsg_put() * nlmsg_end() finanlize netlink message + * nlmsg_get_pos() return current position in message + * nlmsg_trim() trim part of message * nlmsg_cancel() cancel message construction * nlmsg_free() free a netlink message * @@ -80,8 +82,10 @@ * struct nlattr netlink attribtue header * * Attribute Construction: - * nla_reserve(skb, type, len) reserve skb tailroom for an attribute + * nla_reserve(skb, type, len) reserve room for an attribute + * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr * nla_put(skb, type, len, data) add attribute to skb + * nla_put_nohdr(skb, len, data) add attribute w/o hdr * * Attribute Construction for Basic Types: * nla_put_u8(skb, type, value) add u8 attribute to skb @@ -139,6 +143,7 @@ * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes * nla_find() find attribute in stream of attributes + * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets * nla_for_each_attr() loop over all attributes @@ -203,12 +208,18 @@ extern int nla_memcmp(const struct nlattr *nla, const void *data, extern int nla_strcmp(const struct nlattr *nla, const char *str); extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +extern void * __nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +extern void * nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +extern void __nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); extern int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +extern int nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); /************************************************************************** * Netlink Messages @@ -453,12 +464,13 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, /** * nlmsg_new - Allocate a new netlink message * @size: maximum size of message + * @flags: the type of memory to allocate. * * Use NLMSG_GOODSIZE if size isn't know and you need a good default size. */ -static inline struct sk_buff *nlmsg_new(int size) +static inline struct sk_buff *nlmsg_new(int size, gfp_t flags) { - return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + return alloc_skb(size, flags); } /** @@ -479,6 +491,32 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) return skb->len; } +/** + * nlmsg_get_pos - return current position in netlink message + * @skb: socket buffer the message is stored in + * + * Returns a pointer to the current tail of the message. + */ +static inline void *nlmsg_get_pos(struct sk_buff *skb) +{ + return skb->tail; +} + +/** + * nlmsg_trim - Trim message to a mark + * @skb: socket buffer the message is stored in + * @mark: mark to trim to + * + * Trims the message to the provided mark. Returns -1. + */ +static inline int nlmsg_trim(struct sk_buff *skb, void *mark) +{ + if (mark) + skb_trim(skb, (unsigned char *) mark - skb->data); + + return -1; +} + /** * nlmsg_cancel - Cancel construction of a netlink message * @skb: socket buffer the message is stored in @@ -489,9 +527,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) */ static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) { - skb_trim(skb, (unsigned char *) nlh - skb->data); - - return -1; + return nlmsg_trim(skb, nlh); } /** @@ -630,6 +666,18 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) return (struct nlattr *) ((char *) nla + totlen); } +/** + * nla_find_nested - find attribute in a set of nested attributes + * @nla: attribute containing the nested attributes + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) +{ + return nla_find(nla_data(nla), nla_len(nla), attrtype); +} + /** * nla_parse_nested - parse nested attributes * @tb: destination array with maxtype+1 elements @@ -862,10 +910,7 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) */ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) { - if (start) - skb_trim(skb, (unsigned char *) start - skb->data); - - return -1; + return nlmsg_trim(skb, start); } /** @@ -880,4 +925,13 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_for_each_nested - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested(pos, nla, rem) \ + nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) + #endif diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e78187657330..2ed4040d0dc5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -75,7 +75,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(size); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index dddbd15135a8..136e529e5780 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -254,6 +254,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return nla; } +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + /** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on @@ -274,6 +294,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return __nla_reserve(skb, attrtype, attrlen); } +/** + * nla_reserve - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @len: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + /** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -293,6 +331,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, memcpy(nla_data(nla), data, attrlen); } +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} /** * nla_put - Add a netlink attribute to a socket buffer @@ -313,15 +367,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) return 0; } +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -1; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} EXPORT_SYMBOL(nla_validate); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); EXPORT_SYMBOL(nla_memcpy); EXPORT_SYMBOL(nla_memcmp); EXPORT_SYMBOL(nla_strcmp); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a298f77cc3e3..75bb47a898dd 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -440,7 +440,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE); + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) return ERR_PTR(-ENOBUFS); From bf8b79e444a748963c71d2a58709e1ce5597e1b5 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:03:29 -0700 Subject: [PATCH 0401/1063] [NETLINK]: Convert core netlink handling to new netlink api Fixes a theoretical memory and locking leak when the size of the netlink header would exceed the skb tailroom. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b85036ba8e3..0f36ddc0b72d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = nlmsg_new(len, GFP_KERNEL); if (skb==NULL) goto out; @@ -1341,19 +1341,18 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb; struct nlmsghdr *nlh; - int len; + int len, err = -ENOBUFS; skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); if (!skb) - return -ENOBUFS; + goto errout; spin_lock(&nlk->cb_lock); cb = nlk->cb; if (cb == NULL) { - spin_unlock(&nlk->cb_lock); - kfree_skb(skb); - return -EINVAL; + err = -EINVAL; + goto errout_skb; } len = cb->dump(skb, cb); @@ -1365,8 +1364,12 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); + if (!nlh) + goto errout_skb; + + memcpy(nlmsg_data(nlh), &len, sizeof(len)); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); @@ -1378,8 +1381,11 @@ static int netlink_dump(struct sock *sk) netlink_destroy_callback(cb); return 0; -nlmsg_failure: - return -ENOBUFS; +errout_skb: + spin_unlock(&nlk->cb_lock); + kfree_skb(skb); +errout: + return err; } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -1431,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) int size; if (err == 0) - size = NLMSG_SPACE(sizeof(struct nlmsgerr)); + size = nlmsg_total_size(sizeof(*errmsg)); else - size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); + size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh)); - skb = alloc_skb(size, GFP_KERNEL); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -1451,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, NLMSG_ERROR, sizeof(struct nlmsgerr), 0); - errmsg = NLMSG_DATA(rep); + errmsg = nlmsg_data(rep); errmsg->error = err; - memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); + memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) { - unsigned int total_len; struct nlmsghdr *nlh; int err; @@ -1470,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return 0; - total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); - if (cb(skb, nlh, &err) < 0) { /* Not an error, but we have to interrupt processing * here. Note: that in this case we do not pull @@ -1483,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, } else if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, 0); - skb_pull(skb, total_len); + netlink_queue_skip(nlh, skb); } return 0; From 5c7539781d392629fb40b04aad9a1f197b66cd01 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:03:53 -0700 Subject: [PATCH 0402/1063] [IPV4]: Convert address addition to new netlink api Adds rtm_to_ifaddr() transforming a netlink message to a struct in_ifaddr. Fixes various unvalidated netlink attributes causing memory corruptions when left empty by userspace applications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 112 +++++++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 35 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9f3ffbec3296..6b297c8697e6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -62,6 +62,7 @@ #include #include #include +#include struct ipv4_devconf ipv4_devconf = { .accept_redirects = 1, @@ -78,6 +79,14 @@ static struct ipv4_devconf ipv4_devconf_dflt = { .accept_source_route = 1, }; +static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { + [IFA_LOCAL] = { .type = NLA_U32 }, + [IFA_ADDRESS] = { .type = NLA_U32 }, + [IFA_BROADCAST] = { .type = NLA_U32 }, + [IFA_ANYCAST] = { .type = NLA_U32 }, + [IFA_LABEL] = { .type = NLA_STRING }, +}; + static void rtmsg_ifa(int event, struct in_ifaddr *); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); @@ -451,57 +460,90 @@ out: return -EADDRNOTAVAIL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; + struct in_ifaddr *ifa; + struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); - struct in_ifaddr *ifa; - int rc = -EINVAL; + int err = -EINVAL; - ASSERT_RTNL(); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; - if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) - goto out; + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) + goto errout; - rc = -ENODEV; - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) - goto out; - - rc = -ENOBUFS; - if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { - in_dev = inetdev_init(dev); - if (!in_dev) - goto out; + dev = __dev_get_by_index(ifm->ifa_index); + if (dev == NULL) { + err = -ENODEV; + goto errout; } - if ((ifa = inet_alloc_ifa()) == NULL) - goto out; + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) { + in_dev = inetdev_init(dev); + if (in_dev == NULL) { + err = -ENOBUFS; + goto errout; + } + } + + ifa = inet_alloc_ifa(); + if (ifa == NULL) { + /* + * A potential indev allocation can be left alive, it stays + * assigned to its device and is destroy with it. + */ + err = -ENOBUFS; + goto errout; + } + + in_dev_hold(in_dev); + + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - if (!rta[IFA_ADDRESS - 1]) - rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; - memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); - memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - if (rta[IFA_BROADCAST - 1]) - memcpy(&ifa->ifa_broadcast, - RTA_DATA(rta[IFA_BROADCAST - 1]), 4); - if (rta[IFA_ANYCAST - 1]) - memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; - in_dev_hold(in_dev); - ifa->ifa_dev = in_dev; - if (rta[IFA_LABEL - 1]) - rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); + ifa->ifa_dev = in_dev; + + ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]); + + if (tb[IFA_BROADCAST]) + ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]); + + if (tb[IFA_ANYCAST]) + ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]); + + if (tb[IFA_LABEL]) + nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - rc = inet_insert_ifa(ifa); -out: - return rc; + return ifa; + +errout: + return ERR_PTR(err); +} + +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct in_ifaddr *ifa; + + ASSERT_RTNL(); + + ifa = rtm_to_ifaddr(nlh); + if (IS_ERR(ifa)) + return PTR_ERR(ifa); + + return inet_insert_ifa(ifa); } /* From dfdd5fd4e93d98e06be9ac9db84e3b98c6c26706 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:04:17 -0700 Subject: [PATCH 0403/1063] [IPV4]: Convert address deletion to new netlink api Fixes various unvalidated netlink attributes causing memory corruptions when left empty by userspace. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6b297c8697e6..309640e9ede1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -430,34 +430,48 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; struct in_ifaddr *ifa, **ifap; + int err = -EINVAL; ASSERT_RTNL(); - if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) - goto out; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; + + ifm = nlmsg_data(nlh); + in_dev = inetdev_by_index(ifm->ifa_index); + if (in_dev == NULL) { + err = -ENODEV; + goto errout; + } + __in_dev_put(in_dev); for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if ((rta[IFA_LOCAL - 1] && - memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), - &ifa->ifa_local, 4)) || - (rta[IFA_LABEL - 1] && - rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || - (rta[IFA_ADDRESS - 1] && - (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), - ifa)))) + if (tb[IFA_LOCAL] && + ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL])) continue; + + if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) + continue; + + if (tb[IFA_ADDRESS] && + (ifm->ifa_prefixlen != ifa->ifa_prefixlen || + !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa))) + continue; + inet_del_ifa(in_dev, ifap, 1); return 0; } -out: - return -EADDRNOTAVAIL; + + err = -EADDRNOTAVAIL; +errout: + return err; } static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) From 47f68512d2685431f1781830dfcbab31bda87644 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:04:36 -0700 Subject: [PATCH 0404/1063] [IPV4]: Convert address dumping to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 52 +++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 309640e9ede1..80bf5b2ea2e6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1112,32 +1112,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; - if (ifa->ifa_address) - RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); - if (ifa->ifa_local) - RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); - if (ifa->ifa_broadcast) - RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); - if (ifa->ifa_anycast) - RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); - if (ifa->ifa_label[0]) - RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); - nlh->nlmsg_len = skb->tail - b; - return skb->len; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (ifa->ifa_address) + NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address); + + if (ifa->ifa_local) + NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local); + + if (ifa->ifa_broadcast) + NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast); + + if (ifa->ifa_anycast) + NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast); + + if (ifa->ifa_label[0]) + NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1185,17 +1190,16 @@ done: static void rtmsg_ifa(int event, struct in_ifaddr* ifa) { - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + struct sk_buff *skb; - if (!skb) + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); - } else { + } else netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); - } } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { From 1823730fbc89fadde72a7bb3b7bdf03cc7b8835c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:04:54 -0700 Subject: [PATCH 0405/1063] [IPv4]: Move interface address bits to linux/if_addr.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_addr.h | 53 ++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 56 --------------------------------------- net/core/rtnetlink.c | 1 + net/decnet/dn_dev.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv6/addrconf.c | 1 + net/ipv6/ndisc.c | 1 + 8 files changed, 59 insertions(+), 56 deletions(-) create mode 100644 include/linux/if_addr.h diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h new file mode 100644 index 000000000000..e1590454db59 --- /dev/null +++ b/include/linux/if_addr.h @@ -0,0 +1,53 @@ +#ifndef __LINUX_IF_ADDR_H +#define __LINUX_IF_ADDR_H + +#include + +struct ifaddrmsg +{ + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + */ +enum +{ + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 + +struct ifa_cacheinfo +{ + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bf353538ae93..890c4d4038b6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,62 +384,6 @@ struct rta_session }; -/********************************************************* - * Interface address. - ****/ - -struct ifaddrmsg -{ - unsigned char ifa_family; - unsigned char ifa_prefixlen; /* The prefix length */ - unsigned char ifa_flags; /* Flags */ - unsigned char ifa_scope; /* See above */ - int ifa_index; /* Link index */ -}; - -enum -{ - IFA_UNSPEC, - IFA_ADDRESS, - IFA_LOCAL, - IFA_LABEL, - IFA_BROADCAST, - IFA_ANYCAST, - IFA_CACHEINFO, - IFA_MULTICAST, - __IFA_MAX -}; - -#define IFA_MAX (__IFA_MAX - 1) - -/* ifa_flags */ - -#define IFA_F_SECONDARY 0x01 -#define IFA_F_TEMPORARY IFA_F_SECONDARY - -#define IFA_F_DEPRECATED 0x20 -#define IFA_F_TENTATIVE 0x40 -#define IFA_F_PERMANENT 0x80 - -struct ifa_cacheinfo -{ - __u32 ifa_prefered; - __u32 ifa_valid; - __u32 cstamp; /* created timestamp, hundredths of seconds */ - __u32 tstamp; /* updated timestamp, hundredths of seconds */ -}; - - -#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) -#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) - -/* - Important comment: - IFA_ADDRESS is prefix address, rather than local interface address. - It makes no difference for normally configured broadcast interfaces, - but for point-to-point IFA_ADDRESS is DESTINATION address, - local address is supplied in IFA_LOCAL attribute. - */ /************************************************************** * Neighbour discovery. diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index aa7cff2257b1..35712031e2c3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 476455fbdb03..632c5a90b589 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 80bf5b2ea2e6..398e7b9ca66b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe4a53d4d10d..a83f1aa8034e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ed766eebc022..c2a4db843e51 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 67cfc3813c32..5743e8bffefd 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -62,6 +62,7 @@ #include #endif +#include #include #include #include From da5e0494c542dddc56a1f1edfd30310ea30f41ff Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 10 Aug 2006 21:17:37 -0700 Subject: [PATCH 0406/1063] [NET]: Convert link modification to new netlink api Transforms do_setlink() into rtnl_setlink() using the new netlink api. A warning message printed to the console is added in the event that a change request fails while part of the change request has been comitted already. The ioctl() based nature of net devices makes it almost impossible to move on to atomic netlink operations without obsoleting some of the functionality. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 203 ++++++++++++++++++++++--------------------- 1 file changed, 106 insertions(+), 97 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 35712031e2c3..2adc966d981e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -336,52 +336,69 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c return skb->len; } -static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { + [IFLA_IFNAME] = { .type = NLA_STRING }, + [IFLA_MAP] = { .minlen = sizeof(struct rtnl_link_ifmap) }, + [IFLA_MTU] = { .type = NLA_U32 }, + [IFLA_TXQLEN] = { .type = NLA_U32 }, + [IFLA_WEIGHT] = { .type = NLA_U32 }, + [IFLA_OPERSTATE] = { .type = NLA_U8 }, + [IFLA_LINKMODE] = { .type = NLA_U8 }, +}; + +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ifinfomsg *ifm = NLMSG_DATA(nlh); - struct rtattr **ida = arg; + struct ifinfomsg *ifm; struct net_device *dev; - int err, send_addr_notify = 0; + int err, send_addr_notify = 0, modified = 0; + struct nlattr *tb[IFLA_MAX+1]; + char ifname[IFNAMSIZ]; - if (ifm->ifi_index >= 0) - dev = dev_get_by_index(ifm->ifi_index); - else if (ida[IFLA_IFNAME - 1]) { - char ifname[IFNAMSIZ]; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; - if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], - IFNAMSIZ) >= IFNAMSIZ) - return -EINVAL; - dev = dev_get_by_name(ifname); - } else + if (tb[IFLA_IFNAME] && + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ) >= IFNAMSIZ) return -EINVAL; - if (!dev) - return -ENODEV; - err = -EINVAL; + ifm = nlmsg_data(nlh); + if (ifm->ifi_index >= 0) + dev = dev_get_by_index(ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = dev_get_by_name(ifname); + else + goto errout; - if (ifm->ifi_flags) - dev_change_flags(dev, ifm->ifi_flags); + if (dev == NULL) { + err = -ENODEV; + goto errout; + } - if (ida[IFLA_MAP - 1]) { + if (tb[IFLA_ADDRESS] && + nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_BROADCAST] && + nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) + goto errout_dev; + + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; if (!dev->set_config) { err = -EOPNOTSUPP; - goto out; + goto errout_dev; } if (!netif_device_present(dev)) { err = -ENODEV; - goto out; + goto errout_dev; } - - if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map))) - goto out; - - u_map = RTA_DATA(ida[IFLA_MAP - 1]); + u_map = nla_data(tb[IFLA_MAP]); k_map.mem_start = (unsigned long) u_map->mem_start; k_map.mem_end = (unsigned long) u_map->mem_end; k_map.base_addr = (unsigned short) u_map->base_addr; @@ -390,119 +407,111 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) k_map.port = (unsigned char) u_map->port; err = dev->set_config(dev, &k_map); + if (err < 0) + goto errout_dev; - if (err) - goto out; + modified = 1; } - if (ida[IFLA_ADDRESS - 1]) { + if (tb[IFLA_ADDRESS]) { struct sockaddr *sa; int len; if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto out; + goto errout_dev; } + if (!netif_device_present(dev)) { err = -ENODEV; - goto out; + goto errout_dev; } - if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) - goto out; len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto out; + goto errout_dev; } sa->sa_family = dev->type; - memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), + memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto out; + goto errout_dev; send_addr_notify = 1; + modified = 1; } - if (ida[IFLA_BROADCAST - 1]) { - if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) - goto out; - memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), - dev->addr_len); - send_addr_notify = 1; + if (tb[IFLA_MTU]) { + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + if (err < 0) + goto errout_dev; + modified = 1; } - if (ida[IFLA_MTU - 1]) { - if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; - err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); - - if (err) - goto out; - - } - - if (ida[IFLA_TXQLEN - 1]) { - if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; - - dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); - } - - if (ida[IFLA_WEIGHT - 1]) { - if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32))) - goto out; - - dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); - } - - if (ida[IFLA_OPERSTATE - 1]) { - if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) - goto out; - - set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); - } - - if (ida[IFLA_LINKMODE - 1]) { - if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) - goto out; - - write_lock_bh(&dev_base_lock); - dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); - write_unlock_bh(&dev_base_lock); - } - - if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { - char ifname[IFNAMSIZ]; - - if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], - IFNAMSIZ) >= IFNAMSIZ) - goto out; + /* + * Interface selected by interface index but interface + * name provided implies that a name change has been + * requested. + */ + if (ifm->ifi_index >= 0 && ifname[0]) { err = dev_change_name(dev, ifname); - if (err) - goto out; + if (err < 0) + goto errout_dev; + modified = 1; } #ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (ida[IFLA_WIRELESS - 1]) { - + if (tb[IFLA_WIRELESS]) { /* Call Wireless Extensions. * Various stuff checked in there... */ - err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len); - if (err) - goto out; + err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]), + nla_len(tb[IFLA_WIRELESS])); + if (err < 0) + goto errout_dev; } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ + if (tb[IFLA_BROADCAST]) { + nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); + send_addr_notify = 1; + } + + + if (ifm->ifi_flags) + dev_change_flags(dev, ifm->ifi_flags); + + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + + if (tb[IFLA_WEIGHT]) + dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + + if (tb[IFLA_LINKMODE]) { + write_lock_bh(&dev_base_lock); + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + write_unlock_bh(&dev_base_lock); + } + err = 0; -out: +errout_dev: + if (err < 0 && modified && net_ratelimit()) + printk(KERN_WARNING "A link change request failed with " + "some changes comitted already. Interface %s may " + "have been left with an inconsistent configuration, " + "please check.\n", dev->name); + if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); dev_put(dev); +errout: return err; } @@ -753,7 +762,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = .doit = do_getlink, #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ .dumpit = rtnetlink_dump_ifinfo }, - [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, + [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink }, [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, From b60c5115f4abf0b961a18682889798dcfbe6a801 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:05:34 -0700 Subject: [PATCH 0407/1063] [NET]: Convert link dumping to new netlink api Transforms netlink code to dump link tables to use the new netlink api. Makes rtnl_getlink() available regardless of the availability of the wireless extensions. Adding copy_rtnl_link_stats() avoids the structural dependency of struct rtnl_link_stats on struct net_device_stats and thus avoids troubles later on. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 273 +++++++++++++++++++++---------------------- 1 file changed, 133 insertions(+), 140 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2adc966d981e..93ba04fb8444 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -218,41 +218,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } -static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - int type, u32 pid, u32 seq, u32 change, - unsigned int flags) +static void copy_rtnl_link_stats(struct rtnl_link_stats *a, + struct net_device_stats *b) { - struct ifinfomsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + a->rx_packets = b->rx_packets; + a->tx_packets = b->tx_packets; + a->rx_bytes = b->rx_bytes; + a->tx_bytes = b->tx_bytes; + a->rx_errors = b->rx_errors; + a->tx_errors = b->tx_errors; + a->rx_dropped = b->rx_dropped; + a->tx_dropped = b->tx_dropped; - nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = change; + a->multicast = b->multicast; + a->collisions = b->collisions; - RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); + a->rx_length_errors = b->rx_length_errors; + a->rx_over_errors = b->rx_over_errors; + a->rx_crc_errors = b->rx_crc_errors; + a->rx_frame_errors = b->rx_frame_errors; + a->rx_fifo_errors = b->rx_fifo_errors; + a->rx_missed_errors = b->rx_missed_errors; - if (1) { - u32 txqlen = dev->tx_queue_len; - RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); - } + a->tx_aborted_errors = b->tx_aborted_errors; + a->tx_carrier_errors = b->tx_carrier_errors; + a->tx_fifo_errors = b->tx_fifo_errors; + a->tx_heartbeat_errors = b->tx_heartbeat_errors; + a->tx_window_errors = b->tx_window_errors; - if (1) { - u32 weight = dev->weight; - RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); - } + a->rx_compressed = b->rx_compressed; + a->tx_compressed = b->tx_compressed; +}; - if (1) { - u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN; - u8 link_mode = dev->link_mode; - RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); - RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode); - } +static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, + void *iwbuf, int iwbuflen, int type, u32 pid, + u32 seq, u32 change, unsigned int flags) +{ + struct ifinfomsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_UNSPEC; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = change; + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); + NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); + NLA_PUT_U8(skb, IFLA_OPERSTATE, + netif_running(dev) ? dev->operstate : IF_OPER_DOWN); + NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); + NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + + if (dev->ifindex != dev->iflink) + NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); + + if (dev->master) + NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); + + if (dev->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { @@ -263,58 +295,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, .dma = dev->dma, .port = dev->if_port, }; - RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); + NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); } if (dev->addr_len) { - RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); - } - - if (1) { - u32 mtu = dev->mtu; - RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); - } - - if (dev->ifindex != dev->iflink) { - u32 iflink = dev->iflink; - RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink); - } - - if (dev->qdisc_sleeping) - RTA_PUT(skb, IFLA_QDISC, - strlen(dev->qdisc_sleeping->ops->id) + 1, - dev->qdisc_sleeping->ops->id); - - if (dev->master) { - u32 master = dev->master->ifindex; - RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master); + NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } if (dev->get_stats) { - unsigned long *stats = (unsigned long*)dev->get_stats(dev); + struct net_device_stats *stats = dev->get_stats(dev); if (stats) { - struct rtattr *a; - __u32 *s; - int i; - int n = sizeof(struct rtnl_link_stats)/4; + struct nlattr *attr; - a = __RTA_PUT(skb, IFLA_STATS, n*4); - s = RTA_DATA(a); - for (i=0; inlmsg_len = skb->tail - b; - return skb->len; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + if (iwbuf) + NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->args[0]; @@ -324,10 +336,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; - if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI) <= 0) + if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) break; } read_unlock(&dev_base_lock); @@ -515,84 +526,69 @@ errout: return err; } -#ifdef CONFIG_NET_WIRELESS_RTNETLINK -static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct ifinfomsg *ifm = NLMSG_DATA(in_nlh); - struct rtattr **ida = arg; - struct net_device *dev; - struct ifinfomsg *r; - struct nlmsghdr *nlh; - int err = -ENOBUFS; - struct sk_buff *skb; - unsigned char *b; - char *iw_buf = NULL; + struct ifinfomsg *ifm; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *dev = NULL; + struct sk_buff *nskb; + char *iw_buf = NULL, *iw = NULL; int iw_buf_len = 0; + int err, payload; - if (ifm->ifi_index >= 0) + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + if (err < 0) + goto errout; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index >= 0) { dev = dev_get_by_index(ifm->ifi_index); - else + if (dev == NULL) + return -ENODEV; + } else return -EINVAL; - if (!dev) - return -ENODEV; + #ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (ida[IFLA_WIRELESS - 1]) { - + if (tb[IFLA_WIRELESS]) { /* Call Wireless Extensions. We need to know the size before * we can alloc. Various stuff checked in there... */ - err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len); - if (err) - goto out; + err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]), + nla_len(tb[IFLA_WIRELESS]), + &iw_buf, &iw_buf_len); + if (err < 0) + goto errout; + + iw += IW_EV_POINT_OFF; } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - /* Create a skb big enough to include all the data. - * Some requests are way bigger than 4k... Jean II */ - skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)), - GFP_KERNEL); - if (!skb) - goto out; - b = skb->tail; + payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + + nla_total_size(iw_buf_len)); + nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + if (nskb == NULL) { + err = -ENOBUFS; + goto errout; + } - /* Put in the message the usual good stuff */ - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq, - RTM_NEWLINK, sizeof(*r)); - r = NLMSG_DATA(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev->flags; - r->ifi_change = 0; + err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, + NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); + if (err <= 0) { + kfree_skb(skb); + goto errout; + } - /* Put the wireless payload if it exist */ - if(iw_buf != NULL) - RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len, - iw_buf + IW_EV_POINT_OFF); - - nlh->nlmsg_len = skb->tail - b; - - /* Needed ? */ - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; -out: - if(iw_buf != NULL) - kfree(iw_buf); +errout: + kfree(iw_buf); dev_put(dev); + return err; - -rtattr_failure: -nlmsg_failure: - kfree_skb(skb); - goto out; } -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ -static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->family; @@ -623,11 +619,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) sizeof(struct rtnl_link_ifmap) + sizeof(struct rtnl_link_stats) + 128); - skb = alloc_skb(size, GFP_KERNEL); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return; - if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { + if (rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0) < 0) { kfree_skb(skb); return; } @@ -757,14 +753,11 @@ static void rtnetlink_rcv(struct sock *sk, int len) static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = { - [RTM_GETLINK - RTM_BASE] = { -#ifdef CONFIG_NET_WIRELESS_RTNETLINK - .doit = do_getlink, -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - .dumpit = rtnetlink_dump_ifinfo }, + [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo }, [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, - [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all }, + [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all }, [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, @@ -772,7 +765,7 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, #endif - [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, }; From 0844565fb8a9418f5a860aa480c1aef70319c9a2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:05:56 -0700 Subject: [PATCH 0408/1063] [NET]: Move netlink interface bits to linux/if.h Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if.h | 129 ++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 133 +------------------------------------- 2 files changed, 130 insertions(+), 132 deletions(-) diff --git a/include/linux/if.h b/include/linux/if.h index 374e20ad8b0d..cd080d765324 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -212,5 +212,134 @@ struct ifconf #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ +/* The struct should be in sync with struct net_device_stats */ +struct rtnl_link_stats +{ + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap +{ + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +enum +{ + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum +{ + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +struct ifla_cacheinfo +{ + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; #endif /* _LINUX_IF_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 890c4d4038b6..84f3eb426da2 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -2,6 +2,7 @@ #define __LINUX_RTNETLINK_H #include +#include /**** * Routing/neighbour discovery messages. @@ -607,138 +608,6 @@ struct prefix_cacheinfo __u32 valid_time; }; -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -enum -{ - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) -#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum -{ - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -struct ifla_cacheinfo -{ - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; /***************************************************************** * Traffic control messages. From 8584d6df39db5601965f9bc5e3bf2fea833ad7bb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 00:56:16 -0700 Subject: [PATCH 0409/1063] [NETFILTER]: netbios conntrack: fix compile Fix compile breakage caused by move of IFA_F_SECONDARY to new header file. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netbios_ns.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index a566a81325b2..3d0b438783db 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include From 84fa7933a33f806bbbaae6775e87459b1ec584c0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 29 Aug 2006 16:44:56 -0700 Subject: [PATCH 0410/1063] [NET]: Replace CHECKSUM_HW by CHECKSUM_PARTIAL/CHECKSUM_COMPLETE Replace CHECKSUM_HW by CHECKSUM_PARTIAL (for outgoing packets, whose checksum still needs to be completed) and CHECKSUM_COMPLETE (for incoming packets, device supplied full checksum). Patch originally from Herbert Xu, updated by myself for 2.6.18-rc3. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/atm/he.c | 2 +- drivers/net/3c59x.c | 2 +- drivers/net/8139cp.c | 6 +++--- drivers/net/acenic.c | 8 ++++---- drivers/net/bnx2.c | 2 +- drivers/net/cassini.c | 4 ++-- drivers/net/chelsio/sge.c | 10 +++++----- drivers/net/dl2k.c | 2 +- drivers/net/e1000/e1000_main.c | 8 ++++---- drivers/net/forcedeth.c | 3 ++- drivers/net/gianfar.c | 2 +- drivers/net/hamachi.c | 2 +- drivers/net/ibm_emac/ibm_emac_core.c | 2 +- drivers/net/ioc3-eth.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/mv643xx_eth.c | 2 +- drivers/net/myri10ge/myri10ge.c | 8 ++++---- drivers/net/ns83820.c | 2 +- drivers/net/r8169.c | 2 +- drivers/net/s2io.c | 2 +- drivers/net/sk98lin/skge.c | 6 +++--- drivers/net/skge.c | 4 ++-- drivers/net/sky2.c | 6 +++--- drivers/net/starfire.c | 6 +++--- drivers/net/sungem.c | 4 ++-- drivers/net/sunhme.c | 6 +++--- drivers/net/tg3.c | 6 +++--- drivers/net/typhoon.c | 2 +- drivers/net/via-rhine.c | 2 +- drivers/net/via-velocity.c | 2 +- include/linux/netdevice.h | 4 ++-- include/linux/skbuff.h | 17 +++++++++-------- net/core/datagram.c | 4 ++-- net/core/dev.c | 12 ++++++------ net/core/netpoll.c | 2 +- net/core/skbuff.c | 14 +++++++------- net/ipv4/icmp.c | 2 +- net/ipv4/igmp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_output.c | 6 +++--- net/ipv4/ipvs/ip_vs_proto_tcp.c | 8 ++++---- net/ipv4/ipvs/ip_vs_proto_udp.c | 8 ++++---- net/ipv4/netfilter.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 +-- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 3 +-- net/ipv4/netfilter/ip_nat_standalone.c | 5 +++-- net/ipv4/netfilter/ip_queue.c | 6 +++--- net/ipv4/netfilter/ipt_ECN.c | 9 +++++---- net/ipv4/netfilter/ipt_TCPMSS.c | 5 +++-- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_output.c | 18 ++++++++---------- net/ipv4/udp.c | 6 +++--- net/ipv4/xfrm4_output.c | 4 ++-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 6 +++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- net/ipv6/raw.c | 2 +- net/ipv6/reassembly.c | 6 +++--- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 2 +- net/ipv6/xfrm6_output.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 3 +-- net/netfilter/nf_conntrack_proto_udp.c | 3 +-- net/netfilter/nfnetlink_queue.c | 6 +++--- net/packet/af_packet.c | 2 +- net/sched/sch_netem.c | 4 ++-- net/sunrpc/socklib.c | 2 +- 72 files changed, 168 insertions(+), 169 deletions(-) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index ffcb9fd31c38..41e052fecd7f 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1912,7 +1912,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) skb->tail = skb->data + skb->len; #ifdef USE_CHECKSUM_HW if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = TCP_CKSUM(skb->data, he_vcc->pdu_len); } diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 80e8ca013e44..29dede2eaa85 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -2077,7 +2077,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) vp->tx_ring[entry].next = 0; #if DO_ZEROCOPY - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); else vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 1428bb7715af..a48b211c489d 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -813,7 +813,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) flags |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_HW) { + else if (skb->ip_summed == CHECKSUM_PARTIAL) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) flags |= IPCS | TCPCS; @@ -867,7 +867,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) ctrl |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_HW) { + else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (ip->protocol == IPPROTO_TCP) ctrl |= IPCS | TCPCS; else if (ip->protocol == IPPROTO_UDP) @@ -898,7 +898,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) txd->addr = cpu_to_le64(first_mapping); wmb(); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (ip->protocol == IPPROTO_TCP) txd->opts1 = cpu_to_le32(first_eor | first_len | FirstFrag | DescOwn | diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 1c01e9b3d07c..826548644d7b 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -2040,7 +2040,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) */ if (bd_flags & BD_FLG_TCP_UDP_SUM) { skb->csum = htons(csum); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } else { skb->ip_summed = CHECKSUM_NONE; } @@ -2511,7 +2511,7 @@ restart: mapping = ace_map_tx_skb(ap, skb, skb, idx); flagsize = (skb->len << 16) | (BD_FLG_END); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2534,7 +2534,7 @@ restart: mapping = ace_map_tx_skb(ap, skb, NULL, idx); flagsize = (skb_headlen(skb) << 16); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2560,7 +2560,7 @@ restart: PCI_DMA_TODEVICE); flagsize = (frag->size << 16); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 652eb05a6c2d..7857b4630124 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -4423,7 +4423,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) ring_prod = TX_RING_IDX(prod); vlan_tag_flags = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM; } diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index a31544ccb3c4..558fdb8ad2dc 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2167,7 +2167,7 @@ end_copy_pkt: cas_page_unmap(addr); } skb->csum = ntohs(i ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->protocol = eth_type_trans(skb, cp->dev); return len; } @@ -2821,7 +2821,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, } ctrl = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 61b3754f50ff..ddd0bdb498f4 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1470,9 +1470,9 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (!(adapter->flags & UDP_CSUM_CAPABLE) && - skb->ip_summed == CHECKSUM_HW && + skb->ip_summed == CHECKSUM_PARTIAL && skb->nh.iph->protocol == IPPROTO_UDP) - if (unlikely(skb_checksum_help(skb, 0))) { + if (unlikely(skb_checksum_help(skb))) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1495,11 +1495,11 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl)); cpl->opcode = CPL_TX_PKT; cpl->ip_csum_dis = 1; /* SW calculates IP csum */ - cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1; /* the length field isn't used so don't bother setting it */ - st->tx_cso += (skb->ip_summed == CHECKSUM_HW); - sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW); + st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL); + sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL); sge->stats.tx_reg_pkts++; } cpl->iff = dev->if_port; diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index 402961e68c89..b74e67654764 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -611,7 +611,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev) txdesc = &np->tx_ring[entry]; #if 0 - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { txdesc->status |= cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable | IPChecksumEnable); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 98ef9f85482f..2ab9f96f5dab 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2600,7 +2600,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, unsigned int i; uint8_t css; - if (likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { css = skb->h.raw - skb->data; i = tx_ring->next_to_use; @@ -2927,11 +2927,11 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /* reserve a descriptor for the offload context */ - if ((mss) || (skb->ip_summed == CHECKSUM_HW)) + if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; #else - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) count++; #endif @@ -3608,7 +3608,7 @@ e1000_rx_checksum(struct e1000_adapter *adapter, */ csum = ntohl(csum ^ 0xFFFF); skb->csum = csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } adapter->hw_csum_good++; } diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 11b8f1b43dd5..32cacf115f75 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -1503,7 +1503,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); else #endif - tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); + tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ? + NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0; /* vlan tag */ if (np->vlangrp && vlan_tx_tag_present(skb)) { diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index ebbbd6ca6204..ba960913c034 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -947,7 +947,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Set up checksumming */ if (likely((dev->features & NETIF_F_IP_CSUM) - && (CHECKSUM_HW == skb->ip_summed))) { + && (CHECKSUM_PARTIAL == skb->ip_summed))) { fcb = gfar_add_fcb(skb, txbdp); status |= TXBD_TOE; gfar_tx_checksum(skb, fcb); diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 409c6aab0411..763373ae9666 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -1648,7 +1648,7 @@ static int hamachi_rx(struct net_device *dev) * could do the pseudo myself and return * CHECKSUM_UNNECESSARY */ - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } } } diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c index 82468e2dc799..57e214d85e9a 100644 --- a/drivers/net/ibm_emac/ibm_emac_core.c +++ b/drivers/net/ibm_emac/ibm_emac_core.c @@ -1036,7 +1036,7 @@ static inline u16 emac_tx_csum(struct ocp_enet_private *dev, struct sk_buff *skb) { #if defined(CONFIG_IBM_EMAC_TAH) - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { ++dev->stats.tx_packets_csum; return EMAC_TX_CTRL_TAH_CSUM; } diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c index 68d8af7df08e..65f897ddb920 100644 --- a/drivers/net/ioc3-eth.c +++ b/drivers/net/ioc3-eth.c @@ -1387,7 +1387,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev) * MAC header which should not be summed and the TCP/UDP pseudo headers * manually. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int proto = ntohs(skb->nh.iph->protocol); unsigned int csoff; struct iphdr *ih = skb->nh.iph; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 7bbd447289b5..9405b44f3214 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1232,7 +1232,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb) unsigned int i; uint8_t css, cso; - if(likely(skb->ip_summed == CHECKSUM_HW)) { + if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) { css = skb->h.raw - skb->data; cso = (skb->h.raw + skb->csum) - skb->data; diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index eeab1df5bef3..38df58fdb358 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1147,7 +1147,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, desc->byte_cnt = length; desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { BUG_ON(skb->protocol != ETH_P_IP); cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 9bdd43ab3573..9f16681d0e7e 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -930,7 +930,7 @@ static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum) (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { skb->csum = hw_csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } } @@ -973,7 +973,7 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, if ((skb->protocol == ntohs(ETH_P_IP)) || (skb->protocol == ntohs(ETH_P_IPV6))) { skb->csum = ntohs((u16) csum); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } else myri10ge_vlan_ip_csum(skb, ntohs((u16) csum)); } @@ -1897,13 +1897,13 @@ again: pseudo_hdr_offset = 0; odd_flag = 0; flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); - if (likely(skb->ip_summed == CHECKSUM_HW)) { + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { cksum_offset = (skb->h.raw - skb->data); pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data; /* If the headers are excessively large, then we must * fall back to a software checksum */ if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto drop; cksum_offset = 0; pseudo_hdr_offset = 0; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 0e76859c90a2..5143f5dbb2e5 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1153,7 +1153,7 @@ again: if (!nr_frags) frag = NULL; extsts = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { extsts |= EXTSTS_IPPKT; if (IPPROTO_TCP == skb->nh.iph->protocol) extsts |= EXTSTS_TCPPKT; diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 4c2f575faad7..d9b960aa9b0d 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2169,7 +2169,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) if (mss) return LargeSend | ((mss & MSSMask) << MSSShift); } - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index e72e0e099060..5b3713f622d7 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -3893,7 +3893,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb)); } #endif - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { txdp->Control_2 |= (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN | TXD_TX_CKO_UDP_EN); diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index ee62845d3ac9..eb3b35180c2f 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -1559,7 +1559,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32); pTxd->pMBuf = pMessage; - if (pMessage->ip_summed == CHECKSUM_HW) { + if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -1678,7 +1678,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ /* ** Does the HW need to evaluate checksum for TCP or UDP packets? */ - if (pMessage->ip_summed == CHECKSUM_HW) { + if (pMessage->ip_summed == CHECKSUM_PARTIAL) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -2158,7 +2158,7 @@ rx_start: #ifdef USE_SK_RX_CHECKSUM pMsg->csum = pRxd->TcpSums & 0xffff; - pMsg->ip_summed = CHECKSUM_HW; + pMsg->ip_summed = CHECKSUM_COMPLETE; #else pMsg->ip_summed = CHECKSUM_NONE; #endif diff --git a/drivers/net/skge.c b/drivers/net/skge.c index ad878dfddef4..b3d6fa3d6df4 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2338,7 +2338,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) td->dma_lo = map; td->dma_hi = map >> 32; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = skb->h.raw - skb->data; /* This seems backwards, but it is what the sk98lin @@ -2642,7 +2642,7 @@ static inline struct sk_buff *skge_rx_get(struct skge_port *skge, skb->dev = skge->netdev; if (skge->rx_csum) { skb->csum = csum; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } skb->protocol = eth_type_trans(skb, skge->netdev); diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 933e87f1cc68..8e92566b587e 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1163,7 +1163,7 @@ static unsigned tx_le_req(const struct sk_buff *skb) if (skb_is_gso(skb)) ++count; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) ++count; return count; @@ -1272,7 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) #endif /* Handle TCP checksum offload */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u16 hdr = skb->h.raw - skb->data; u16 offset = hdr + skb->csum; @@ -2000,7 +2000,7 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do) #endif case OP_RXCHKS: skb = sky2->rx_ring[sky2->rx_next].skb; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = le16_to_cpu(status); break; diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index c0a62b00ffc8..2607aa51d8e0 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -1230,7 +1230,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) } #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE) - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK)) return NETDEV_TX_OK; } @@ -1252,7 +1252,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) status |= TxDescIntr; np->reap_tx = 0; } - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { status |= TxCalTCP; np->stats.tx_compressed++; } @@ -1499,7 +1499,7 @@ static int __netdev_rx(struct net_device *dev, int *quota) * Until then, the printk stays. :-) -Ion */ else if (le16_to_cpu(desc->status2) & 0x0040) { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = le16_to_cpu(desc->csum); printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2)); } diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index d7b1d1882cab..b388651b7836 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -855,7 +855,7 @@ static int gem_rx(struct gem *gp, int work_to_do) } skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; skb->protocol = eth_type_trans(skb, gp->dev); netif_receive_skb(skb); @@ -1026,7 +1026,7 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; ctrl = 0; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index c6f5bc3c042f..17981da22730 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -1207,7 +1207,7 @@ static void happy_meal_transceiver_check(struct happy_meal *hp, void __iomem *tr * flags, thus: * * skb->csum = rxd->rx_flags & 0xffff; - * skb->ip_summed = CHECKSUM_HW; + * skb->ip_summed = CHECKSUM_COMPLETE; * * before sending off the skb to the protocols, and we are good as gold. */ @@ -2074,7 +2074,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) /* This card is _fucking_ hot... */ skb->csum = ntohs(csum ^ 0xffff); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; RXD(("len=%d csum=%4x]", len, csum)); skb->protocol = eth_type_trans(skb, dev); @@ -2268,7 +2268,7 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 tx_flags; tx_flags = TXFLAG_OWN; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { u32 csum_start_off, csum_stuff_off; csum_start_off = (u32) (skb->h.raw - skb->data); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index eafabb253f08..6f5d3a38c582 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3851,11 +3851,11 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.th->check = 0; } - else if (skb->ip_summed == CHECKSUM_HW) + else if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #else mss = 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #endif #if TG3_VLAN_TAG_USED @@ -3981,7 +3981,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) entry = tp->tx_prod; base_flags = 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) base_flags |= TXD_FLAG_TCPUDP_CSUM; #if TG3_TSO_SUPPORT != 0 mss = 0; diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 4103c37172f9..c6e601dc6bbc 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -830,7 +830,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) first_txd->addrHi = (u64)((unsigned long) skb) >> 32; first_txd->processFlags = 0; - if(skb->ip_summed == CHECKSUM_HW) { + if(skb->ip_summed == CHECKSUM_PARTIAL) { /* The 3XP will figure out if this is UDP/TCP */ first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM; first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index ae971080e2e4..66547159bfd9 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -1230,7 +1230,7 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev) rp->tx_skbuff[entry] = skb; if ((rp->quirks & rqRhineI) && - (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) { + (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) { /* Must use alignment buffer. */ if (skb->len > PKT_BUF_SZ) { /* packet too long, drop it */ diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index aa9cd92f46b2..f1e0c746a388 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -2002,7 +2002,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev) * Handle hardware checksum */ if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM) - && (skb->ip_summed == CHECKSUM_HW)) { + && (skb->ip_summed == CHECKSUM_PARTIAL)) { struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) td_ptr->tdesc1.TCR |= TCR0_TCPCK; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 50a4719512ed..4f2c2b6beb5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -976,7 +976,7 @@ extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int skb_checksum_help(struct sk_buff *skb, int inward); +extern int skb_checksum_help(struct sk_buff *skb); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); @@ -1012,7 +1012,7 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_HW)); + unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } /* On bonding slaves other than the currently active slave, suppress diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 755e9cddac47..85577a4ffa61 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,8 +34,9 @@ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 +#define CHECKSUM_PARTIAL 1 #define CHECKSUM_UNNECESSARY 2 +#define CHECKSUM_COMPLETE 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -56,17 +57,17 @@ * Apparently with secret goal to sell you new device, when you * will add new protocol to your host. F.e. IPv6. 8) * - * HW: the most generic way. Device supplied checksum of _all_ + * COMPLETE: the most generic way. Device supplied checksum of _all_ * the packet as seen by netif_rx in skb->csum. * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, + * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * - * HW: device is required to csum packet as seen by hard_start_xmit + * PARTIAL: device is required to csum packet as seen by hard_start_xmit * from skb->h.raw to the end and to record the checksum * at skb->h.raw+skb->csum. * @@ -1261,14 +1262,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE - * so that it can be recomputed from scratch. + * update the CHECKSUM_COMPLETE checksum, or set ip_summed to + * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } @@ -1287,7 +1288,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __pskb_trim(skb, len); } diff --git a/net/core/datagram.c b/net/core/datagram.c index aecddcc30401..f558c61aecc7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb) sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, goto fault; if ((unsigned short)csum_fold(csum)) goto csum_error; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); iov->iov_len -= chunk; iov->iov_base += chunk; diff --git a/net/core/dev.c b/net/core/dev.c index d4a1ec3bded5..fc82f6f6e1c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,12 +1166,12 @@ EXPORT_SYMBOL(netif_device_attach); * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */ -int skb_checksum_help(struct sk_buff *skb, int inward) +int skb_checksum_help(struct sk_buff *skb) { unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) + if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { @@ -1223,7 +1223,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -1444,11 +1444,11 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_PARTIAL && (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb, 0)) + if (skb_checksum_help(skb)) goto out_kfree_skb; gso: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 471da451cd48..ead5920c26d6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !(u16)csum_fold(csum_add(psum, skb->csum))) return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c54f3664bce5..8a476f1956e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) unsigned int csum; long csstart; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb->h.raw - skb->data; else csstart = skb_headlen(skb); @@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart, 0); - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum; *((unsigned short *)(to + csstuff)) = csum_fold(csum); @@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, * @len: length of data pulled * * This function performs an skb_pull on the packet and updates - * update the CHECKSUM_HW checksum. It should be used on receive - * path processing instead of skb_pull unless you know that the - * checksum difference is zero (e.g., a valid IP header) or you - * are setting ip_summed to CHECKSUM_NONE. + * update the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_pull unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { @@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag = skb_shinfo(nskb)->frags; k = 0; - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum = skb->csum; memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6ad797c14163..6d223e5c6741 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -930,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8e8117c19e4d..7003e763d970 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) goto drop; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b84b53a47526..8d7f107c2eef 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f9b3a31997b..e66f6ff2e198 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -576,7 +576,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: csum = (u16)csum_fold(skb->csum); if (!csum) break; @@ -584,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } offset += 4; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 308bdeac3455..1b9b6742ef77 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -680,7 +680,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk { struct iovec *iov = from; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { @@ -736,7 +736,7 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -844,7 +844,7 @@ int ip_append_data(struct sock *sk, length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) - csummode = CHECKSUM_HW; + csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bc28b1160a3a..820e8318d10d 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - tcphoff, skb->nh.iph->protocol, skb->csum)) { @@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 89d9175d8f28..90c8166c0ec1 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - udphoff, @@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6a9e34b794bc..f88347de21a9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index fb920e76ec10..9de81ff645d5 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -865,8 +865,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because it is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 9b2c16b4d2ff..e58e52f14553 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 17de077a7901..f4f00c816d87 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -111,8 +111,9 @@ ip_nat_fn(unsigned int hooknum, & htons(IP_MF|IP_OFFSET))); /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_HW) - if (skb_checksum_help(*pskb, (out == NULL))) + if ((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) + if (skb_checksum_help(*pskb)) return NF_DROP; ct = ip_conntrack_get(*pskb, &ctinfo); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 198ac36db861..276a964ee6cf 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4adf5c9d34f5..4ec43f98fe49 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -49,7 +49,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) /* Return 0 if there was an error. */ static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; u_int16_t diffs[2]; @@ -70,8 +70,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, inward)) + if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) && + skb_checksum_help(*pskb)) return 0; diffs[0] = ((u_int16_t *)tcph)[6]; @@ -106,7 +107,7 @@ target(struct sk_buff **pskb, if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo, (out == NULL))) + if (!set_ect_tcp(pskb, einfo)) return NF_DROP; return IPT_CONTINUE; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ef2fe5b3f0d8..c998dc0fcd15 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -62,8 +62,9 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, out == NULL)) + if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || + (*pskb)->ip_summed == CHECKSUM_COMPLETE) && + skb_checksum_help(*pskb)) return NF_DROP; iph = (*pskb)->nh.iph; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 934396bb1376..b0124e69ab38 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -568,7 +568,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -723,7 +723,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -2205,7 +2205,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) th->fin = th->psh = 0; th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2219,7 +2219,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 43f6740244f8..b2aa512a30e9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -973,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static int tcp_v4_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4f3ffe1b3b4..9252a50c4b49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), nsize, 0); @@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); } @@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->truesize -= len; sk->sk_wmem_queued -= len; @@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; - buff->ip_summed = skb->ip_summed = CHECKSUM_HW; + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ @@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - if (skb->ip_summed == CHECKSUM_HW) - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = skb->ip_summed; len = 0; while (len < probe_size) { @@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); @@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - if (next_skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = next_skb->ip_summed; - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a4d005eccc7f..87152510980c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -429,7 +429,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) /* * Only one fragment on the socket. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb->csum = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, IPPROTO_UDP, 0); @@ -448,7 +448,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) * fragments on the socket so that all csums of sk_buffs * should be together. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = (unsigned char *)uh - skb->data; skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -1088,7 +1088,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_HW) { + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d16f863cf687..4a96a9e3ef3b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -48,8 +48,8 @@ static int xfrm4_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 86dac106873b..05afa6b1912b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -294,7 +294,7 @@ looped_back: hdr = (struct ipv6_rt_hdr *) skb2->h.raw; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index dbfce089e916..103055107674 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -606,7 +606,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) /* Perform checksum. */ switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) break; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4fb47a252913..65514f21c186 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -866,7 +866,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 395a417ba955..580b1aba6722 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 968a14be0d05..c01c126224e2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 00d5583807f7..7a4e4c2e3197 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, return -1; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr + 1) - skb->nh.raw, @@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &nf_ct_frag6_mem); @@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->nh.ipv6h->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); fq->fragments = NULL; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d5040e172292..d4af1cb5e19f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -334,7 +334,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { skb_postpull_rcsum(skb, skb->nh.raw, skb->h.raw - skb->nh.raw); if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4e299c69e1c6..a8623d2b0879 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return; } - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); @@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip6_frag_mem); @@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, *skb_in = head; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) + if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 302786a11cd6..7f1b660493b7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -545,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -570,7 +570,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -1033,7 +1033,7 @@ out: static int tcp_v6_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 82c7c9cde2a8..780b89f6dfcc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -475,7 +475,7 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; } - if (skb->ip_summed == CHECKSUM_HW && + if (skb->ip_summed == CHECKSUM_COMPLETE && !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c8c8b44a0f58..6d111743e508 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -41,8 +41,8 @@ static int xfrm6_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af8adcba23a7..308d2abd7ee5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -823,8 +823,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae07ebe3ab37..d36e03139e8b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 49ef41e34c48..eddfbe4441a2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entskb, - outdev == NULL))) { + if ((entskb->ip_summed == CHECKSUM_PARTIAL || + entskb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entskb))) { spin_unlock_bh(&queue->lock); return NULL; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4172a5235916..300215bdbf46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -586,7 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a08ec4c7c55d..45939bafbdf8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_HW - && skb_checksum_help(skb, 0))) { + || (skb->ip_summed == CHECKSUM_PARTIAL + && skb_checksum_help(skb))) { sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index eb330d4f66d6..6f17527b9e69 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if ((unsigned short)csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_HW)) + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: From 4cf411de49c65140b3c259748629b561c0d3340f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 00:58:33 -0700 Subject: [PATCH 0411/1063] [NETFILTER]: Get rid of HW checksum invalidation Update hardware checksums incrementally to avoid breaking GSO. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 6 +++ include/linux/netfilter_ipv4/ip_nat.h | 4 -- include/linux/netfilter_ipv4/ip_nat_core.h | 8 +-- net/ipv4/netfilter/ip_nat_core.c | 52 ++++++++----------- net/ipv4/netfilter/ip_nat_helper.c | 59 +++++++++++++++------- net/ipv4/netfilter/ip_nat_proto_gre.c | 5 +- net/ipv4/netfilter/ip_nat_proto_icmp.c | 8 +-- net/ipv4/netfilter/ip_nat_proto_tcp.c | 7 ++- net/ipv4/netfilter/ip_nat_proto_udp.c | 15 ++++-- net/ipv4/netfilter/ip_nat_standalone.c | 10 +--- net/ipv4/netfilter/ipt_ECN.c | 19 +++---- net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv4/netfilter/ipt_TCPMSS.c | 39 ++++++-------- net/netfilter/core.c | 22 ++++++++ 14 files changed, 138 insertions(+), 117 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10168e26a846..b7e67d1d4382 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, + u_int32_t csum); +extern u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr); + struct nf_afinfo { unsigned short family; unsigned int (*checksum)(struct sk_buff *skb, unsigned int hook, diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index e9f5ed1d9f68..98f8407e4cb5 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); -/* Calculate relative checksum. */ -extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, - u_int32_t newval, - u_int16_t oldcheck); #else /* !__KERNEL__: iptables wants this to compile. */ #define ip_nat_multi_range ip_nat_multi_range_compat #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 30db23f06b03..60566f9fd7b3 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct, unsigned int hooknum, struct sk_buff **pskb); -extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir); +extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb); #endif /* _IP_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1741d555ad0d..4c540d03d48e 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -101,18 +101,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) write_unlock_bh(&ip_nat_lock); } -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} -EXPORT_SYMBOL(ip_nat_cheat_check); - /* Is this tuple already taken? (not by us) */ int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, @@ -378,12 +366,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); + iph->check = nf_csum_update(~iph->saddr, target->src.ip, + iph->check); iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); + iph->check = nf_csum_update(~iph->daddr, target->dst.ip, + iph->check); iph->daddr = target->dst.ip; } return 1; @@ -423,10 +411,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -434,7 +422,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; + enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; @@ -443,12 +433,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -487,12 +473,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, !manip)) return 0; - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + } /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index cbcaa45370ae..021c3daae3ed 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -165,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int datalen; + int oldlen, datalen; if (!skb_make_writable(pskb, (*pskb)->len)) return 0; @@ -180,13 +180,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + tcph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ 0xFFFF, + htons(datalen), + tcph->check, 1); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -221,6 +230,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; + int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -238,22 +248,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - } - + if (!udph->check) + udph->check = -1; + } else + udph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ 0xFFFF, + htons(datalen), + udph->check, 1); return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -293,11 +313,14 @@ sack_adjust(struct sk_buff *skb, ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); + tcph->check = nf_proto_csum_update(skb, + ~sack->start_seq, + new_start_seq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(skb, + ~sack->end_seq, + new_end_seq, + tcph->check, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -381,10 +404,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb, newack = ntohl(tcph->ack_seq) - other_way->offset_before; newack = htonl(newack); - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, + tcph->check, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 38acfdf540eb..70a65372225a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -130,9 +130,10 @@ gre_manip_pkt(struct sk_buff **pskb, if (greh->csum) { /* FIXME: Never tested this code... */ *(gre_csum(greh)) = - ip_nat_cheat_check(~*(gre_key(greh)), + nf_proto_csum_update(*pskb, + ~*(gre_key(greh)), tuple->dst.u.gre.key, - *(gre_csum(greh))); + *(gre_csum(greh)), 0); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 31a3f4ccb99c..ec50cc295317 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - - hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum); + hdr->checksum = nf_proto_csum_update(*pskb, + hdr->un.echo.id ^ 0xFFFF, + tuple->src.u.icmp.id, + hdr->checksum, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a3d14079eba6..72a6307bd2db 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport, + hdr->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index ec6053fdc867..5da196ae758c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); + + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, + hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, + *portptr ^ 0xFFFF, newport, + hdr->check, 0); + if (!hdr->check) + hdr->check = -1; + } *portptr = newport; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f4f00c816d87..f3b778355432 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -110,12 +110,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) - if (skb_checksum_help(*pskb)) - return NF_DROP; - ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to @@ -146,8 +140,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(ct, ctinfo, + hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4ec43f98fe49..35916c74fe4e 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -52,7 +52,7 @@ static inline int set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t diffs[2]; + u_int16_t oldval; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -70,23 +70,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) && - skb_checksum_help(*pskb)) - return 0; - - diffs[0] = ((u_int16_t *)tcph)[6]; + oldval = ((u_int16_t *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)tcph)[6]; - diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) - tcph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph->check^0xFFFF)); + tcph->check = nf_proto_csum_update((*pskb), + oldval ^ 0xFFFF, + ((u_int16_t *)tcph)[6], + tcph->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 7f905bf2bde5..95c6662b663c 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -185,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->urg_ptr = 0; /* Adjust TCP checksum */ + nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index c998dc0fcd15..0fce85e05507 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -27,14 +27,6 @@ MODULE_DESCRIPTION("iptables TCP MSS modification module"); #define DEBUGP(format, args...) #endif -static u_int16_t -cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} - static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { @@ -62,11 +54,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if (((*pskb)->ip_summed == CHECKSUM_PARTIAL || - (*pskb)->ip_summed == CHECKSUM_COMPLETE) && - skb_checksum_help(*pskb)) - return NF_DROP; - iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; @@ -120,9 +107,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = cheat_check(htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + htons(oldmss)^0xFFFF, + htons(newmss), + tcph->check, 0); DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" "->%u.%u.%u.%u:%hu changed TCP MSS option" @@ -162,8 +150,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + htons(tcplen) ^ 0xFFFF, + htons(tcplen + TCPOLEN_MSS), + tcph->check, 1); tcplen += TCPOLEN_MSS; opt[0] = TCPOPT_MSS; @@ -171,16 +161,19 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt), + tcph->check, 0); oldval = ((u_int16_t *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = cheat_check(oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + oldval ^ 0xFFFF, + ((u_int16_t *)tcph)[6], + tcph->check, 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = cheat_check(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); + iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, + newtotlen, iph->check); iph->tot_len = newtotlen; DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5d29d5e23624..27f639f3ac2a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -222,6 +222,28 @@ copy_skb: } EXPORT_SYMBOL(skb_make_writable); +u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) +{ + u_int32_t diff[] = { oldval, newval }; + + return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); +} +EXPORT_SYMBOL(nf_csum_update); + +u_int16_t nf_proto_csum_update(struct sk_buff *skb, + u_int32_t oldval, u_int32_t newval, + u_int16_t csum, int pseudohdr) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + csum = nf_csum_update(oldval, newval, csum); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = nf_csum_update(oldval, newval, skb->csum); + } else if (pseudohdr) + csum = ~nf_csum_update(oldval, newval, ~csum); + + return csum; +} +EXPORT_SYMBOL(nf_proto_csum_update); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence From 394f545db6e7e4d7a6a2fa3f543b755ca39d58ac Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 00:58:52 -0700 Subject: [PATCH 0412/1063] [NETFILTER]: nf_queue: handle GSO packets Handle GSO packets in nf_queue by segmenting them before queueing to avoid breaking GSO in case they get mangled. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/core.c | 2 +- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 80 +++++++++++++++++++++++++++--------- 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 27f639f3ac2a..d80b935b3a92 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -182,7 +182,7 @@ next_hook: ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 86e392bfe833..a981971ce1d5 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head, int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff **skb, +extern int nf_queue(struct sk_buff *skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 662a869593bf..4d8936ed581d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); * Any packet that leaves via this function must come back * through nf_reinject(). */ -int nf_queue(struct sk_buff **skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -94,14 +94,14 @@ int nf_queue(struct sk_buff **skb, read_lock(&queue_handler_lock); if (!queue_handler[pf]) { read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } afinfo = nf_get_afinfo(pf); if (!afinfo) { read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -109,9 +109,9 @@ int nf_queue(struct sk_buff **skb, if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - *skb); + skb); read_unlock(&queue_handler_lock); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -130,15 +130,15 @@ int nf_queue(struct sk_buff **skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if ((*skb)->nf_bridge) { - physindev = (*skb)->nf_bridge->physindev; + if (skb->nf_bridge) { + physindev = skb->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = (*skb)->nf_bridge->physoutdev; + physoutdev = skb->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif - afinfo->saveroute(*skb, info); - status = queue_handler[pf]->outfn(*skb, info, queuenum, + afinfo->saveroute(skb, info); + status = queue_handler[pf]->outfn(skb, info, queuenum, queue_handler[pf]->data); read_unlock(&queue_handler_lock); @@ -153,7 +153,7 @@ int nf_queue(struct sk_buff **skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(*skb); + kfree_skb(skb); return 1; } @@ -161,6 +161,46 @@ int nf_queue(struct sk_buff **skb, return 1; } +int nf_queue(struct sk_buff *skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) +{ + struct sk_buff *segs; + + if (!skb_is_gso(skb)) + return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + queuenum); + + switch (pf) { + case AF_INET: + skb->protocol = htons(ETH_P_IP); + break; + case AF_INET6: + skb->protocol = htons(ETH_P_IPV6); + break; + } + + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return 1; + + do { + struct sk_buff *nskb = segs->next; + + segs->next = NULL; + if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, + queuenum)) + kfree_skb(segs); + segs = nskb; + } while (segs); + return 1; +} + void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict) { @@ -224,9 +264,9 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, case NF_STOLEN: break; case NF_QUEUE: - if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn, - verdict >> NF_VERDICT_BITS)) + if (!__nf_queue(skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; default: From d7aba67f814729647c938ac6da2d5224b790f926 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 5 Aug 2006 02:20:42 -0700 Subject: [PATCH 0413/1063] [IPV6]: Fix thinko in rt6_fill_node This looks like a mistake, the table ID is overwritten again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 438977e2085d..ff5affe2636c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1902,7 +1902,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_table = rt->rt6i_table->tb6_id; else rtm->rtm_table = RT6_TABLE_UNSPEC; - rtm->rtm_table = RT_TABLE_MAIN; if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) From 6c813a7297e3af4cd7c3458e09e9ee3d161c6830 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 6 Aug 2006 22:22:47 -0700 Subject: [PATCH 0414/1063] [IPV6]: Fix crash in ip6_del_rt ip6_null_entry doesn't have rt6i_table set, when trying to delete it the kernel crashes dereferencing table->tb6_lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ff5affe2636c..41c5905d3191 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1223,6 +1223,9 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct int err; struct fib6_table *table; + if (rt == &ip6_null_entry) + return -ENOENT; + table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); From 3226f6881719e61e00e92b4c85a8ef49aa4d42b1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 6 Aug 2006 22:24:08 -0700 Subject: [PATCH 0415/1063] [IPV6]: Fix policy routing lookup When the lookup in a table returns ip6_null_entry the policy routing lookup returns it instead of continuing in the next table, which effectively means it only searches the local table. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index c3c8195744ee..94a46ec967a4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -94,8 +94,10 @@ int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (rt != &ip6_null_entry) goto out; - dst_release(&rt->u.dst); + rt = NULL; + goto out; + discard_pkt: dst_hold(&rt->u.dst); out: From a14a49d2b7b9290e87751f21f503f1954267d4c4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:53:08 -0700 Subject: [PATCH 0416/1063] [NEIGH]: Convert neighbour deletion to new netlink api Fixes: Return ENOENT if the neighbour is not found (was EINVAL) Return EAFNOSUPPORT if no table matches the specified address family. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 53 ++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index fe2113f54e2b..39c07cc66ee7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1440,48 +1441,62 @@ int neigh_table_clear(struct neigh_table *tbl) int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *dst_attr; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err = -EINVAL; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; + dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); + if (dst_attr == NULL) + goto out; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + } + read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct rtattr *dst_attr = nda[NDA_DST - 1]; - struct neighbour *n; + struct neighbour *neigh; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < tbl->key_len) goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); + err = pneigh_delete(tbl, nla_data(dst_attr), dev); goto out_dev_put; } - if (!dev) - goto out; + if (dev == NULL) + goto out_dev_put; - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { - err = neigh_update(n, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); - neigh_release(n); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; + goto out_dev_put; } + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); From 5208debd0f1da07bbb350f8b0b142775d4f002ea Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:55:40 -0700 Subject: [PATCH 0417/1063] [NEIGH]: Convert neighbour addition to new netlink api Fixes: Return EAFNOSUPPORT if no table matches the specified address family. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 92 +++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 39c07cc66ee7..6036f43c1fd6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1506,76 +1506,88 @@ out: int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) goto out; + err = -EINVAL; + if (tb[NDA_DST] == NULL) + goto out; + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + + if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) + goto out_dev_put; + } + read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; - struct rtattr *dst_attr = nda[NDA_DST - 1]; - int override = 1; - struct neighbour *n; + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; + struct neighbour *neigh; + void *dst, *lladdr; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - err = -EINVAL; - if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < tbl->key_len) goto out_dev_put; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm->ndm_flags & NTF_PROXY) { - err = -ENOBUFS; - if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) - err = 0; + err = 0; + if (pneigh_lookup(tbl, dst, dev, 1) == NULL) + err = -ENOBUFS; goto out_dev_put; } - err = -EINVAL; - if (!dev) - goto out; - if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len) + if (dev == NULL) goto out_dev_put; + + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; + goto out_dev_put; + } - n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); - if (n) { + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); + goto out_dev_put; + } + } else { if (nlh->nlmsg_flags & NLM_F_EXCL) { err = -EEXIST; - neigh_release(n); - goto out_dev_put; - } - - override = nlh->nlmsg_flags & NLM_F_REPLACE; - } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - err = -ENOENT; - goto out_dev_put; - } else { - n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); - if (IS_ERR(n)) { - err = PTR_ERR(n); + neigh_release(neigh); goto out_dev_put; } + + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) + flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(n, - lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, - ndm->ndm_state, - (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | - NEIGH_UPDATE_F_ADMIN); - - neigh_release(n); + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); From 8b8aec508302d4e63fd88f47894805115277f70f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:56:37 -0700 Subject: [PATCH 0418/1063] [NEIGH]: Convert neighbour dumping to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 106 +++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6036f43c1fd6..5490afd23b82 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1901,48 +1901,49 @@ out: return skb->len; } -static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, - u32 pid, u32 seq, int event, unsigned int flags) +static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, + u32 pid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; - unsigned char *b = skb->tail; struct nda_cacheinfo ci; - int locked = 0; - u32 probes; - struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, - sizeof(struct ndmsg), flags); - struct ndmsg *ndm = NLMSG_DATA(nlh); + struct nlmsghdr *nlh; + struct ndmsg *ndm; - ndm->ndm_family = n->ops->family; + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = neigh->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = n->flags; - ndm->ndm_type = n->type; - ndm->ndm_ifindex = n->dev->ifindex; - RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); - read_lock_bh(&n->lock); - locked = 1; - ndm->ndm_state = n->nud_state; - if (n->nud_state & NUD_VALID) - RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); - ci.ndm_used = now - n->used; - ci.ndm_confirmed = now - n->confirmed; - ci.ndm_updated = now - n->updated; - ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; - probes = atomic_read(&n->probes); - read_unlock_bh(&n->lock); - locked = 0; - RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); - RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + ndm->ndm_flags = neigh->flags; + ndm->ndm_type = neigh->type; + ndm->ndm_ifindex = neigh->dev->ifindex; -nlmsg_failure: -rtattr_failure: - if (locked) - read_unlock_bh(&n->lock); - skb_trim(skb, b - skb->data); - return -1; + NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); + + read_lock_bh(&neigh->lock); + ndm->ndm_state = neigh->nud_state; + if ((neigh->nud_state & NUD_VALID) && + nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } + + ci.ndm_used = now - neigh->used; + ci.ndm_confirmed = now - neigh->confirmed; + ci.ndm_updated = now - neigh->updated; + ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; + read_unlock_bh(&neigh->lock); + + NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); + NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } @@ -1986,7 +1987,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int t, family, s_t; read_lock(&neigh_tbl_lock); - family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; s_t = cb->args[0]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { @@ -2367,39 +2368,34 @@ static struct file_operations neigh_stat_seq_fops = { #ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + struct sk_buff *skb; - if (!skb) + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) return; - if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) { + if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) <= 0) kfree_skb(skb); - return; + else { + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } - nlh = (struct nlmsghdr *)skb->data; - nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + struct sk_buff *skb; - if (!skb) + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (skb == NULL) return; - if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) { + if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) <= 0) kfree_skb(skb); - return; + else { + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } - nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ From 9067c722cf6930adf1df2d169de9094dd90b0c33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:57:44 -0700 Subject: [PATCH 0419/1063] [NEIGH]: Move netlink neighbour bits to linux/neighbour.h Moves netlink neighbour bits to linux/neighbour.h. Also moves bits to be exported to userspace from net/neighbour.h to linux/neighbour.h and removes __KERNEL__ guards, userspace is not supposed to be using it. rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 65 +++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 63 ------------------------------------- include/net/neighbour.h | 39 ++--------------------- net/core/rtnetlink.c | 2 -- 4 files changed, 67 insertions(+), 102 deletions(-) create mode 100644 include/linux/neighbour.h diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h new file mode 100644 index 000000000000..8e8293d86fbb --- /dev/null +++ b/include/linux/neighbour.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_NEIGHBOUR_H +#define __LINUX_NEIGHBOUR_H + +#include + +struct ndmsg +{ + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 84f3eb426da2..9750f0214c22 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -386,69 +386,6 @@ struct rta_session -/************************************************************** - * Neighbour discovery. - ****/ - -struct ndmsg -{ - unsigned char ndm_family; - unsigned char ndm_pad1; - unsigned short ndm_pad2; - int ndm_ifindex; /* Link index */ - __u16 ndm_state; - __u8 ndm_flags; - __u8 ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) -#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - - /***************************************************************** * Neighbour tables specific messages. * diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4901ee446879..74c4b6ff8a5c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -1,6 +1,8 @@ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H +#include + /* * Generic neighbour manipulation * @@ -14,40 +16,6 @@ * - Add neighbour cache statistics like rtstat */ -/* The following flags & states are exported to user space, - so that they should be moved to include/linux/ directory. - */ - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT is also cannot be deleted by garbage collectors. - */ - -#ifdef __KERNEL__ - #include #include #include @@ -374,6 +342,3 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif -#endif - - diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 93ba04fb8444..78ccbd4c4e37 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -104,7 +104,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -121,7 +120,6 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, From 6b3f8674bccbb2e784d01e44373fb730af6cb149 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:58:53 -0700 Subject: [PATCH 0420/1063] [NEIGH]: Convert neighbour table modification to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 178 ++++++++++++++++++++++++++----------------- 1 file changed, 107 insertions(+), 71 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5490afd23b82..5a0b8f48a099 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1754,28 +1754,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, return NULL; } +static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { + [NDTA_NAME] = { .type = NLA_STRING }, + [NDTA_THRESH1] = { .type = NLA_U32 }, + [NDTA_THRESH2] = { .type = NLA_U32 }, + [NDTA_THRESH3] = { .type = NLA_U32 }, + [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, + [NDTA_PARMS] = { .type = NLA_NESTED }, +}; + +static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { + [NDTPA_IFINDEX] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, + [NDTPA_APP_PROBES] = { .type = NLA_U32 }, + [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, + [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, + [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, + [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, + [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, + [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, + [NDTPA_LOCKTIME] = { .type = NLA_U64 }, +}; + int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct neigh_table *tbl; - struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); - struct rtattr **tb = arg; - int err = -EINVAL; + struct ndtmsg *ndtmsg; + struct nlattr *tb[NDTA_MAX+1]; + int err; - if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy); + if (err < 0) + goto errout; + if (tb[NDTA_NAME] == NULL) { + err = -EINVAL; + goto errout; + } + + ndtmsg = nlmsg_data(nlh); read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; - if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id)) + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) break; } if (tbl == NULL) { err = -ENOENT; - goto errout; + goto errout_locked; } /* @@ -1784,86 +1817,89 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) */ write_lock_bh(&tbl->lock); - if (tb[NDTA_THRESH1 - 1]) - tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); - - if (tb[NDTA_THRESH2 - 1]) - tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]); - - if (tb[NDTA_THRESH3 - 1]) - tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]); - - if (tb[NDTA_GC_INTERVAL - 1]) - tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]); - - if (tb[NDTA_PARMS - 1]) { - struct rtattr *tbp[NDTPA_MAX]; + if (tb[NDTA_PARMS]) { + struct nlattr *tbp[NDTPA_MAX+1]; struct neigh_parms *p; - u32 ifindex = 0; + int i, ifindex = 0; - if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0) - goto rtattr_failure; + err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], + nl_ntbl_parm_policy); + if (err < 0) + goto errout_tbl_lock; - if (tbp[NDTPA_IFINDEX - 1]) - ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); + if (tbp[NDTPA_IFINDEX]) + ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); p = lookup_neigh_params(tbl, ifindex); if (p == NULL) { err = -ENOENT; - goto rtattr_failure; + goto errout_tbl_lock; } - - if (tbp[NDTPA_QUEUE_LEN - 1]) - p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]); - if (tbp[NDTPA_PROXY_QLEN - 1]) - p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]); + for (i = 1; i <= NDTPA_MAX; i++) { + if (tbp[i] == NULL) + continue; - if (tbp[NDTPA_APP_PROBES - 1]) - p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]); - - if (tbp[NDTPA_UCAST_PROBES - 1]) - p->ucast_probes = - RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]); - - if (tbp[NDTPA_MCAST_PROBES - 1]) - p->mcast_probes = - RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]); - - if (tbp[NDTPA_BASE_REACHABLE_TIME - 1]) - p->base_reachable_time = - RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]); - - if (tbp[NDTPA_GC_STALETIME - 1]) - p->gc_staletime = - RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]); - - if (tbp[NDTPA_DELAY_PROBE_TIME - 1]) - p->delay_probe_time = - RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]); - - if (tbp[NDTPA_RETRANS_TIME - 1]) - p->retrans_time = - RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]); - - if (tbp[NDTPA_ANYCAST_DELAY - 1]) - p->anycast_delay = - RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]); - - if (tbp[NDTPA_PROXY_DELAY - 1]) - p->proxy_delay = - RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]); - - if (tbp[NDTPA_LOCKTIME - 1]) - p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]); + switch (i) { + case NDTPA_QUEUE_LEN: + p->queue_len = nla_get_u32(tbp[i]); + break; + case NDTPA_PROXY_QLEN: + p->proxy_qlen = nla_get_u32(tbp[i]); + break; + case NDTPA_APP_PROBES: + p->app_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_UCAST_PROBES: + p->ucast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_MCAST_PROBES: + p->mcast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_BASE_REACHABLE_TIME: + p->base_reachable_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_GC_STALETIME: + p->gc_staletime = nla_get_msecs(tbp[i]); + break; + case NDTPA_DELAY_PROBE_TIME: + p->delay_probe_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_RETRANS_TIME: + p->retrans_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_ANYCAST_DELAY: + p->anycast_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_PROXY_DELAY: + p->proxy_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_LOCKTIME: + p->locktime = nla_get_msecs(tbp[i]); + break; + } + } } + if (tb[NDTA_THRESH1]) + tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); + + if (tb[NDTA_THRESH2]) + tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); + + if (tb[NDTA_THRESH3]) + tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); + + if (tb[NDTA_GC_INTERVAL]) + tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); + err = 0; -rtattr_failure: +errout_tbl_lock: write_unlock_bh(&tbl->lock); -errout: +errout_locked: read_unlock(&neigh_tbl_lock); +errout: return err; } From ca860fb39b4aa1479e2fea67435a2c1eac9ce789 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 18:00:18 -0700 Subject: [PATCH 0421/1063] [NEIGH]: Convert neighbour table dumping to new netlink api Also fixes skipping of already dumped neighbours. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 139 +++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5a0b8f48a099..2f4e06a13457 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1597,56 +1597,59 @@ out: static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { - struct rtattr *nest = NULL; - - nest = RTA_NEST(skb, NDTA_PARMS); + struct nlattr *nest; + + nest = nla_nest_start(skb, NDTA_PARMS); + if (nest == NULL) + return -ENOBUFS; if (parms->dev) - RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); + NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); - RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); - RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); - RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); - RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); - RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); - RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); - RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, + NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); + NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); + NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); + NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); + NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); + NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); + NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); + NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, parms->base_reachable_time); - RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); - RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); - RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); - RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); - RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); - RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); + NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); + NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); + NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); + NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); + NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); + NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); - return RTA_NEST_END(skb, nest); + return nla_nest_end(skb, nest); -rtattr_failure: - return RTA_NEST_CANCEL(skb, nest); +nla_put_failure: + return nla_nest_cancel(skb, nest); } -static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, - struct netlink_callback *cb) +static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, + u32 pid, u32 seq, int type, int flags) { struct nlmsghdr *nlh; struct ndtmsg *ndtmsg; - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), - NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; - ndtmsg = NLMSG_DATA(nlh); + ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); - RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); - RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); - RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); - RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); + NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); + NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); + NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); + NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); + NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); { unsigned long now = jiffies; @@ -1665,7 +1668,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); } { @@ -1690,55 +1693,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, ndst.ndts_forced_gc_runs += st->forced_gc_runs; } - RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); } BUG_ON(tbl->parms.dev); if (neightbl_fill_parms(skb, &tbl->parms) < 0) - goto rtattr_failure; + goto nla_put_failure; read_unlock_bh(&tbl->lock); - return NLMSG_END(skb, nlh); + return nlmsg_end(skb, nlh); -rtattr_failure: +nla_put_failure: read_unlock_bh(&tbl->lock); - return NLMSG_CANCEL(skb, nlh); - -nlmsg_failure: - return -1; + return nlmsg_cancel(skb, nlh); } -static int neightbl_fill_param_info(struct neigh_table *tbl, +static int neightbl_fill_param_info(struct sk_buff *skb, + struct neigh_table *tbl, struct neigh_parms *parms, - struct sk_buff *skb, - struct netlink_callback *cb) + u32 pid, u32 seq, int type, + unsigned int flags) { struct ndtmsg *ndtmsg; struct nlmsghdr *nlh; - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), - NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; - ndtmsg = NLMSG_DATA(nlh); + ndtmsg = nlmsg_data(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); - if (neightbl_fill_parms(skb, parms) < 0) - goto rtattr_failure; + if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || + neightbl_fill_parms(skb, parms) < 0) + goto errout; read_unlock_bh(&tbl->lock); - return NLMSG_END(skb, nlh); - -rtattr_failure: + return nlmsg_end(skb, nlh); +errout: read_unlock_bh(&tbl->lock); - return NLMSG_CANCEL(skb, nlh); - -nlmsg_failure: - return -1; + return nlmsg_cancel(skb, nlh); } static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, @@ -1905,34 +1903,43 @@ errout: int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - int idx, family; - int s_idx = cb->args[0]; + int family, tidx, nidx = 0; + int tbl_skip = cb->args[0]; + int neigh_skip = cb->args[1]; struct neigh_table *tbl; - family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) { + for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { struct neigh_parms *p; - if (idx < s_idx || (family && tbl->family != family)) + if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(tbl, skb, cb) <= 0) + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) break; - for (++idx, p = tbl->parms.next; p; p = p->next, idx++) { - if (idx < s_idx) + for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { + if (nidx < neigh_skip) continue; - if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0) + if (neightbl_fill_param_info(skb, tbl, p, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) goto out; } + neigh_skip = 0; } out: read_unlock(&neigh_tbl_lock); - cb->args[0] = idx; + cb->args[0] = tidx; + cb->args[1] = nidx; return skb->len; } From b63bbc5006a0a62fabc81c4f77e95f16ff16f340 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 18:00:57 -0700 Subject: [PATCH 0422/1063] [NEIGH]: Move netlink neighbour table bits to linux/neighbour.h rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour tables layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 94 +++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 101 -------------------------------------- net/core/rtnetlink.c | 2 - 3 files changed, 94 insertions(+), 103 deletions(-) diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h index 8e8293d86fbb..bd3bbf668cdb 100644 --- a/include/linux/neighbour.h +++ b/include/linux/neighbour.h @@ -62,4 +62,98 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9750f0214c22..784a1a29490e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -384,107 +384,6 @@ struct rta_session } u; }; - - -/***************************************************************** - * Neighbour tables specific messages. - * - * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the - * NLM_F_DUMP flag set. Every neighbour table configuration is - * spread over multiple messages to avoid running into message - * size limits on systems with many interfaces. The first message - * in the sequence transports all not device specific data such as - * statistics, configuration, and the default parameter set. - * This message is followed by 0..n messages carrying device - * specific parameter sets. - * Although the ordering should be sufficient, NDTA_NAME can be - * used to identify sequences. The initial message can be identified - * by checking for NDTA_CONFIG. The device specific messages do - * not contain this TLV but have NDTPA_IFINDEX set to the - * corresponding interface index. - * - * To change neighbour table attributes, send RTM_SETNEIGHTBL - * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], - * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked - * otherwise. Device specific parameter sets can be changed by - * setting NDTPA_IFINDEX to the interface index of the corresponding - * device. - ****/ - -struct ndt_stats -{ - __u64 ndts_allocs; - __u64 ndts_destroys; - __u64 ndts_hash_grows; - __u64 ndts_res_failed; - __u64 ndts_lookups; - __u64 ndts_hits; - __u64 ndts_rcv_probes_mcast; - __u64 ndts_rcv_probes_ucast; - __u64 ndts_periodic_gc_runs; - __u64 ndts_forced_gc_runs; -}; - -enum { - NDTPA_UNSPEC, - NDTPA_IFINDEX, /* u32, unchangeable */ - NDTPA_REFCNT, /* u32, read-only */ - NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ - NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ - NDTPA_RETRANS_TIME, /* u64, msecs */ - NDTPA_GC_STALETIME, /* u64, msecs */ - NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ - NDTPA_QUEUE_LEN, /* u32 */ - NDTPA_APP_PROBES, /* u32 */ - NDTPA_UCAST_PROBES, /* u32 */ - NDTPA_MCAST_PROBES, /* u32 */ - NDTPA_ANYCAST_DELAY, /* u64, msecs */ - NDTPA_PROXY_DELAY, /* u64, msecs */ - NDTPA_PROXY_QLEN, /* u32 */ - NDTPA_LOCKTIME, /* u64, msecs */ - __NDTPA_MAX -}; -#define NDTPA_MAX (__NDTPA_MAX - 1) - -struct ndtmsg -{ - __u8 ndtm_family; - __u8 ndtm_pad1; - __u16 ndtm_pad2; -}; - -struct ndt_config -{ - __u16 ndtc_key_len; - __u16 ndtc_entry_size; - __u32 ndtc_entries; - __u32 ndtc_last_flush; /* delta to now in msecs */ - __u32 ndtc_last_rand; /* delta to now in msecs */ - __u32 ndtc_hash_rnd; - __u32 ndtc_hash_mask; - __u32 ndtc_hash_chain_gc; - __u32 ndtc_proxy_qlen; -}; - -enum { - NDTA_UNSPEC, - NDTA_NAME, /* char *, unchangeable */ - NDTA_THRESH1, /* u32 */ - NDTA_THRESH2, /* u32 */ - NDTA_THRESH3, /* u32 */ - NDTA_CONFIG, /* struct ndt_config, read-only */ - NDTA_PARMS, /* nested TLV NDTPA_* */ - NDTA_STATS, /* struct ndt_stats, read-only */ - NDTA_GC_INTERVAL, /* u64, msecs */ - __NDTA_MAX -}; -#define NDTA_MAX (__NDTA_MAX - 1) - -#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ - NLMSG_ALIGN(sizeof(struct ndtmsg)))) -#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) - - /**** * General form of address family dependent message. ****/ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 78ccbd4c4e37..a1b783a6afc6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -112,7 +112,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)), }; static const int rta_max[RTM_NR_FAMILIES] = @@ -125,7 +124,6 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, - [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX, }; void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) From ac5a488ef252ed673cb067843e411f8cc43f7ab9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Aug 2006 20:57:31 -0700 Subject: [PATCH 0423/1063] [NET]: Round out in-kernel sockets API This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs & ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/net.h | 19 ++++++++ net/socket.c | 113 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index b20c53c74413..19da2c08d7b6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -208,6 +208,25 @@ extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); +extern int kernel_bind(struct socket *sock, struct sockaddr *addr, + int addrlen); +extern int kernel_listen(struct socket *sock, int backlog); +extern int kernel_accept(struct socket *sock, struct socket **newsock, + int flags); +extern int kernel_connect(struct socket *sock, struct sockaddr *addr, + int addrlen, int flags); +extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen); +extern int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen); +extern int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen); +extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags); +extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); + #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) diff --git a/net/socket.c b/net/socket.c index 6756e57e1ff0..2eaebf934a1a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2170,6 +2170,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, } #endif +int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +{ + return sock->ops->bind(sock, addr, addrlen); +} + +int kernel_listen(struct socket *sock, int backlog) +{ + return sock->ops->listen(sock, backlog); +} + +int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +{ + struct sock *sk = sock->sk; + int err; + + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, + newsock); + if (err < 0) + goto done; + + err = sock->ops->accept(sock, *newsock, flags); + if (err < 0) { + sock_release(*newsock); + goto done; + } + + (*newsock)->ops = sock->ops; + +done: + return err; +} + +int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, + int flags) +{ + return sock->ops->connect(sock, addr, addrlen, flags); +} + +int kernel_getsockname(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 0); +} + +int kernel_getpeername(struct socket *sock, struct sockaddr *addr, + int *addrlen) +{ + return sock->ops->getname(sock, addr, addrlen, 1); +} + +int kernel_getsockopt(struct socket *sock, int level, int optname, + char *optval, int *optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->getsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_setsockopt(struct socket *sock, int level, int optname, + char *optval, int optlen) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + set_fs(oldfs); + return err; +} + +int kernel_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) +{ + if (sock->ops->sendpage) + return sock->ops->sendpage(sock, page, offset, size, flags); + + return sock_no_sendpage(sock, page, offset, size, flags); +} + +int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) +{ + mm_segment_t oldfs = get_fs(); + int err; + + set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, cmd, arg); + set_fs(oldfs); + + return err; +} + /* ABI emulation layers need these two */ EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); @@ -2186,3 +2289,13 @@ EXPORT_SYMBOL(sock_wake_async); EXPORT_SYMBOL(sockfd_lookup); EXPORT_SYMBOL(kernel_sendmsg); EXPORT_SYMBOL(kernel_recvmsg); +EXPORT_SYMBOL(kernel_bind); +EXPORT_SYMBOL(kernel_listen); +EXPORT_SYMBOL(kernel_accept); +EXPORT_SYMBOL(kernel_connect); +EXPORT_SYMBOL(kernel_getsockname); +EXPORT_SYMBOL(kernel_getpeername); +EXPORT_SYMBOL(kernel_getsockopt); +EXPORT_SYMBOL(kernel_setsockopt); +EXPORT_SYMBOL(kernel_sendpage); +EXPORT_SYMBOL(kernel_sock_ioctl); From e6242e928ef1e4ed853f909a7479e4934f4bcb70 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Mon, 7 Aug 2006 20:58:01 -0700 Subject: [PATCH 0424/1063] [SUNRPC]: Update to use in-kernel sockets API. Signed-off-by: Sridhar Samudrala Acked-by: James Morris Signed-off-by: David S. Miller --- net/sunrpc/svcsock.c | 38 ++++++++++++++------------------------ net/sunrpc/xprtsock.c | 8 ++++---- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d9a95732df46..953aff89bcac 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); + len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; slen -= xdr->head[0].iov_len; @@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) while (pglen > 0) { if (slen == size) flags = 0; - result = sock->ops->sendpage(sock, *ppage, base, size, flags); + result = kernel_sendpage(sock, *ppage, base, size, flags); if (result > 0) len += result; if (result != size) @@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) } /* send tail */ if (xdr->tail[0].iov_len) { - result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], + result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); @@ -434,13 +434,10 @@ out: static int svc_recv_available(struct svc_sock *svsk) { - mm_segment_t oldfs; struct socket *sock = svsk->sk_sock; int avail, err; - oldfs = get_fs(); set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); - set_fs(oldfs); + err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); return (err >= 0)? avail : err; } @@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) * at accept time. FIXME */ alen = sizeof(rqstp->rq_addr); - sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); + kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; - const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; @@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk) if (!sock) return; - err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); - if (err) { + clear_bit(SK_CONN, &svsk->sk_flags); + err = kernel_accept(sock, &newsock, O_NONBLOCK); + if (err < 0) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); + else if (err != -EAGAIN && net_ratelimit()) + printk(KERN_WARNING "%s: accept failed (err %d)!\n", + serv->sv_name, -err); return; } - dprintk("svc: tcp_accept %p allocated\n", newsock); - newsock->ops = ops = sock->ops; - - clear_bit(SK_CONN, &svsk->sk_flags); - if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { - if (err != -EAGAIN && net_ratelimit()) - printk(KERN_WARNING "%s: accept failed (err %d)!\n", - serv->sv_name, -err); - goto failed; /* aborted connection or whatever */ - } set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); - err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); + err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", @@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if (sin != NULL) { if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ - error = sock->ops->bind(sock, (struct sockaddr *) sin, + error = kernel_bind(sock, (struct sockaddr *) sin, sizeof(*sin)); if (error < 0) goto bummer; } if (protocol == IPPROTO_TCP) { - if ((error = sock->ops->listen(sock, 64)) < 0) + if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 441bd53f5eca..8b319e375049 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -207,7 +207,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a base &= ~PAGE_CACHE_MASK; } - sendpage = sock->ops->sendpage ? : sock_no_sendpage; + sendpage = kernel_sendpage; do { int flags = XS_SENDMSG_FLAGS; @@ -986,7 +986,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) do { myaddr.sin_port = htons(port); - err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { xprt->port = port; @@ -1081,7 +1081,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = sock->ops->connect(sock, &any, sizeof(any), 0); + result = kernel_connect(sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1151,7 +1151,7 @@ static void xs_tcp_connect_worker(void *args) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, + status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, sizeof(xprt->addr), O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); From 8ce11e6a9faf1f1c849b77104adc1642c46aee95 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Aug 2006 21:50:48 -0700 Subject: [PATCH 0425/1063] [NET]: Make code static. This patch makes needlessly global code static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ---- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/fib_rules.c | 4 ++-- net/ipv6/fib6_rules.c | 4 ++-- net/ipv6/ip6_fib.c | 6 +++--- net/ipv6/route.c | 6 +++--- net/netlabel/netlabel_domainhash.c | 4 ++-- 7 files changed, 13 insertions(+), 17 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7b47e8d5a765..c0660cea9a2f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -192,10 +192,6 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct in6_addr *daddr, int dst_len, struct in6_addr *saddr, int src_len); -extern void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg); - extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index b82a101c95c5..80a2a0911b49 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -60,7 +60,7 @@ struct cipso_v4_domhsh_entry { * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we * can do quick lookups. */ -DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); /* Label mapping cache */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 23ec6ae1a0f6..03d1e8a43a48 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -101,8 +101,8 @@ int fib_lookup(struct flowi *flp, struct fib_result *res) return err; } -int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, - struct fib_lookup_arg *arg) +static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 94a46ec967a4..bf9bba83b852 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -66,8 +66,8 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, return (struct dst_entry *) arg.result; } -int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) +static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { struct rt6_info *rt = NULL; struct fib6_table *table; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ce226c14bef5..1f2316187ca4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1169,9 +1169,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) +static void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) { struct fib6_cleaner_t c; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 41c5905d3191..e08d84063c1f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -613,8 +613,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, - int flags) +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -872,7 +872,7 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) } static struct dst_entry *ndisc_dst_gc_list; -DEFINE_SPINLOCK(ndisc_lock); +static DEFINE_SPINLOCK(ndisc_lock); struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 5bb3fad4a115..0489a1378101 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -50,11 +50,11 @@ struct netlbl_domhsh_tbl { /* Domain hash table */ /* XXX - updates should be so rare that having one spinlock for the entire * hash table should be okay */ -DEFINE_SPINLOCK(netlbl_domhsh_lock); +static DEFINE_SPINLOCK(netlbl_domhsh_lock); static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; /* Default domain mapping */ -DEFINE_SPINLOCK(netlbl_domhsh_def_lock); +static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); static struct netlbl_dom_map *netlbl_domhsh_def = NULL; /* From 8423a9aadfaa135fd5fd1ab8bbd4a1e76b4143c9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Aug 2006 21:54:37 -0700 Subject: [PATCH 0426/1063] [IPV6]: Protect RTM_GETRULE table entry with IPV6_MULTIPLE_TABLES ifdef This is how IPv4 handles this case too. Based upon a patch from Andrew Morton. Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c2a4db843e51..9ba1e811ba50 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3529,7 +3529,9 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, +#ifdef CONFIG_IPV6_MULTIPLE_TABLES [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, +#endif }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) From 0298f36a579b5bd7f10f6f6d57e5929977a865a1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Aug 2006 21:56:52 -0700 Subject: [PATCH 0427/1063] [IPV4]: Kill fib4_rules_clean(). As noted by Adrian Bunk this function is totally unused. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - net/ipv4/fib_rules.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14c82e611c95..adf73586bc05 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -254,7 +254,6 @@ extern struct fib_table *fib_hash_init(int id); extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); extern void __init fib4_rules_init(void); -extern void __exit fib4_rules_cleanup(void); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 03d1e8a43a48..d242e5291fcc 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -347,8 +347,3 @@ void __init fib4_rules_init(void) fib_rules_register(&fib4_rules_ops); } - -void __exit fib4_rules_cleanup(void) -{ - fib_rules_unregister(&fib4_rules_ops); -} From 1a01912ae0a5666c4c24eaae2b4821711e2ad79a Mon Sep 17 00:00:00 2001 From: Louis Nyffenegger Date: Tue, 8 Aug 2006 00:56:11 -0700 Subject: [PATCH 0428/1063] [INET]: Remove is_setbyuser patch The value is_setbyuser from struct ip_options is never used and set only one time (http://linux-net.osdl.org/index.php/TODO#IPV4). This little patch removes it from the kernel source. Signed-off-by: Louis Nyffenegger Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 +--- net/ipv4/ip_options.c | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f4caad56cd03..f6242710f2ff 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -27,7 +27,6 @@ /** struct ip_options - IP Options * * @faddr - Saved first hop address - * @is_setbyuser - Set by setsockopt? * @is_data - Options in __data, rather than skb * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one @@ -42,8 +41,7 @@ struct ip_options { unsigned char srr; unsigned char rr; unsigned char ts; - unsigned char is_setbyuser:1, - is_data:1, + unsigned char is_data:1, is_strictroute:1, srr_is_hit:1, is_changed:1, diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e0a93b4fa8cc..e7437c091326 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -525,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp, opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; opt->is_data = 1; - opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { kfree(opt); return -EINVAL; From 99a92ff50424146ba01a222248fd47a1cd55b78f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 Aug 2006 02:18:10 -0700 Subject: [PATCH 0429/1063] [IPV4]: Uninline inet_lookup_listener By modern standards this function is way too big to be inlined. It's even bigger than __inet_lookup_listener :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 33 ++------------------------------- net/ipv4/inet_hashtables.c | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 98e0bb3014fe..bd513f3b9c7e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -271,39 +271,10 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -extern struct sock *__inet_lookup_listener(const struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif); - -/* Optimize the common listener case. */ -static inline struct sock * +extern struct sock * inet_lookup_listener(struct inet_hashinfo *hashinfo, const u32 daddr, - const unsigned short hnum, const int dif) -{ - struct sock *sk = NULL; - const struct hlist_head *head; - - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) - goto sherry_cache; - sk = __inet_lookup_listener(head, daddr, hnum, dif); - } - if (sk) { -sherry_cache: - sock_hold(sk); - } - read_unlock(&hashinfo->lhash_lock); - return sk; -} + const unsigned short hnum, const int dif); /* Socket demux engine toys. */ #ifdef __BIG_ENDIAN diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 95fac5532994..bfc39066e730 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, - const unsigned short hnum, const int dif) +static struct sock *__inet_lookup_listener(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -159,7 +161,34 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad return result; } -EXPORT_SYMBOL_GPL(__inet_lookup_listener); +/* Optimize the common listener case. */ +struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} +EXPORT_SYMBOL_GPL(inet_lookup_listener); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, From b14295532421c40f82ee099fdbd3d011f022e756 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Tue, 8 Aug 2006 16:44:17 -0700 Subject: [PATCH 0430/1063] [IPV6]: Make sure fib6_rule_lookup doesn't return NULL The callers of fib6_rule_lookup don't expect it to return NULL, therefore it must return ip6_null_entry whenever fib_rule_lookup fails. Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index bf9bba83b852..22a2fdb09831 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -63,7 +63,11 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, if (arg.rule) fib_rule_put(arg.rule); - return (struct dst_entry *) arg.result; + if (arg.result) + return (struct dst_entry *) arg.result; + + dst_hold(&ip6_null_entry.u.dst); + return &ip6_null_entry.u.dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, From 832b4c5e184391773e462653aa862a8cab71f38d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 16:48:09 -0700 Subject: [PATCH 0431/1063] [IPV4] fib: convert reader/writer to spinlock Ther is no point in using a more expensive reader/writer lock for a low contention lock like the fib_info_lock. The only reader case is in handling route redirects. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 51738000f3dc..38bca473c7e2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -49,7 +49,7 @@ #define FSprintk(a...) -static DEFINE_RWLOCK(fib_info_lock); +static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_hash_size; @@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -254,7 +254,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) struct fib_nh *nh; unsigned int hash; - read_lock(&fib_info_lock); + spin_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; @@ -262,12 +262,12 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags&RTNH_F_DEAD)) { - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return 0; } } - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return -1; } @@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); @@ -820,7 +820,7 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -839,7 +839,7 @@ link_it: head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); return fi; err_inval: From 8f491069b40be5d627007a343f99759e9da6a178 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 Aug 2006 15:47:12 -0700 Subject: [PATCH 0432/1063] [IPV4]: Use network-order dport for all visible inet_lookup_* Right now most inet_lookup_* functions take a host-order hnum instead of a network-order dport because that's how it is represented internally. This means that users of these functions have to be careful about using the right byte-order. To add more confusion, inet_lookup takes a network-order dport unlike all other functions. So this patch changes all visible inet_lookup functions to take a dport and move all dport->hnum conversion inside them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 31 ++++++++++++++++++++++++------- net/dccp/ipv4.c | 10 +++++----- net/ipv4/inet_hashtables.c | 18 +++++++++--------- net/ipv4/tcp_ipv4.c | 10 +++++----- 4 files changed, 43 insertions(+), 26 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bd513f3b9c7e..b4491c9e2a5a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -271,10 +271,16 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -extern struct sock * - inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, - const unsigned short hnum, const int dif); +extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + u32 daddr, u16 dport, int dif) +{ + return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); +} /* Socket demux engine toys. */ #ifdef __BIG_ENDIAN @@ -362,14 +368,25 @@ hit: goto out; } +static inline struct sock * + inet_lookup_established(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 dport, + const int dif) +{ + return __inet_lookup_established(hashinfo, saddr, sport, daddr, + ntohs(dport), dif); +} + static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, - const u32 daddr, const u16 hnum, + const u32 daddr, const u16 dport, const int dif) { + u16 hnum = ntohs(dport); struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); + return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, @@ -380,7 +397,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); local_bh_enable(); return sk; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 171d363876ee..9a1a76a7dc41 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -608,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); + nsk = inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -925,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, ntohs(dh->dccph_dport), + skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); /* diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bfc39066e730..fb296c9a7f3f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -124,10 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *__inet_lookup_listener(const struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif) +static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -162,9 +162,9 @@ static struct sock *__inet_lookup_listener(const struct hlist_head *head, } /* Optimize the common listener case. */ -struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, const unsigned short hnum, - const int dif) +struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, const unsigned short hnum, + const int dif) { struct sock *sk = NULL; const struct hlist_head *head; @@ -179,7 +179,7 @@ struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; - sk = __inet_lookup_listener(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(head, daddr, hnum, dif); } if (sk) { sherry_cache: @@ -188,7 +188,7 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } -EXPORT_SYMBOL_GPL(inet_lookup_listener); +EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b2aa512a30e9..2973dee0a489 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -951,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, - th->source, skb->nh.iph->daddr, - ntohs(th->dest), inet_iif(skb)); + nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1090,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), + skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (!sk) @@ -1168,7 +1168,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, - ntohs(th->dest), + th->dest, inet_iif(skb)); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, From a8731cbf61c8768ea129780b70dc7dfc6795aad4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 9 Aug 2006 15:56:46 -0700 Subject: [PATCH 0433/1063] [DECNET]: Covert rules to use generic code This patch converts the DECnet rules code to use the generic rules system created by Thomas Graf . Signed-off-by: Steven Whitehouse Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +- include/net/dn_fib.h | 8 +- net/decnet/Kconfig | 1 + net/decnet/af_decnet.c | 1 + net/decnet/dn_dev.c | 3 +- net/decnet/dn_fib.c | 1 + net/decnet/dn_route.c | 3 +- net/decnet/dn_rules.c | 506 ++++++++++++++------------------------ net/decnet/dn_table.c | 1 + 9 files changed, 202 insertions(+), 325 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 784a1a29490e..0aaffa2ae666 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -534,7 +534,8 @@ enum rtnetlink_groups { RTNLGRP_NOP2, RTNLGRP_DECnet_ROUTE, #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE - RTNLGRP_NOP3, + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index a15dcf0d5c1e..32bc8ce5c5ce 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -22,7 +22,7 @@ struct dn_kern_rta }; struct dn_fib_res { - struct dn_fib_rule *r; + struct fib_rule *r; struct dn_fib_info *fi; unsigned char prefixlen; unsigned char nh_sel; @@ -147,10 +147,8 @@ extern void dn_fib_table_cleanup(void); */ extern void dn_fib_rules_init(void); extern void dn_fib_rules_cleanup(void); -extern void dn_fib_rule_put(struct dn_fib_rule *); -extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags); extern unsigned dnet_addr_type(__le16 addr); -extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res); +extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res); /* * rtnetlink interface @@ -176,7 +174,7 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) if (res->fi) dn_fib_info_put(res->fi); if (res->r) - dn_fib_rule_put(res->r); + fib_rule_put(res->r); } extern struct dn_fib_table *dn_fib_tables[]; diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 92f2ec46fd22..36e72cb145b0 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -27,6 +27,7 @@ config DECNET config DECNET_ROUTER bool "DECnet: router support (EXPERIMENTAL)" depends on DECNET && EXPERIMENTAL + select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5486247735f6..70e027375682 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -130,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 632c5a90b589..88ea7a13bb24 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1418,8 +1419,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, - [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fa20e2efcfc1..846df3954a63 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 743e9fcf7c5a..5e6f4616ca10 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); + src_map = fl.fld_src; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6986be754ef2..096f1273e714 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -11,259 +11,198 @@ * * * Changes: + * Steve Whitehouse + * Updated for Thomas Graf's generic rules * */ -#include #include -#include -#include #include -#include #include #include -#include #include -#include #include -#include #include #include -#include -#include #include #include #include +#include #include #include #include #include +static struct fib_rules_ops dn_fib_rules_ops; + struct dn_fib_rule { - struct hlist_node r_hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - __le16 r_src; - __le16 r_srcmask; - __le16 r_dst; - __le16 r_dstmask; - __le16 r_srcmap; - u8 r_flags; + struct fib_rule common; + unsigned char dst_len; + unsigned char src_len; + __le16 src; + __le16 srcmask; + __le16 dst; + __le16 dstmask; + __le16 srcmap; + u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; #endif - int r_ifindex; - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; static struct dn_fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7fff, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7fff, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct hlist_head dn_fib_rules; +static LIST_HEAD(dn_fib_rules); -int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + +int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; + struct fib_lookup_arg arg = { + .result = res, + }; + int err; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - - err = -EPERM; - if (r == &default_rule) - break; - - hlist_del_rcu(&r->r_hlist); - r->r_dead = 1; - dn_fib_rule_put(r); - err = 0; - break; - } - } + err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + res->r = arg.rule; return err; } -static inline void dn_fib_rule_put_rcu(struct rcu_head *head) +int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { - struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); - kfree(r); -} + int err = -EAGAIN; + struct dn_fib_table *tbl; -void dn_fib_rule_put(struct dn_fib_rule *r) -{ - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, dn_fib_rule_put_rcu); - else - printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); - } -} + switch(rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; -int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; - if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - if (rtm->rtm_type == RTN_NAT) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct dn_fib_table *tb; - if (rtm->rtm_type == RTN_UNICAST) { - if ((tb = dn_fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = tb->n; - } + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; + tbl = dn_fib_get_table(rule->table, 0); + if (tbl == NULL) + goto errout; + + err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; +} + +static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U16 }, + [FRA_DST] = { .type = NLA_U16 }, + [FRA_FWMARK] = { .type = NLA_U32 }, +}; + +static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + u16 daddr = fl->fld_dst; + u16 saddr = fl->fld_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); + if (r->fwmark && (r->fwmark != fl->fld_fwmark)) + return 0; #endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = dev_get_by_name(new_r->r_ifname); - if (dev) { - new_r->r_ifindex = dev->ifindex; - dev_put(dev); - } - } - r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); - if (!new_r->r_preference) { - if (r && r->r_hlist.next != NULL) { - r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; - } - } - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); - - if (last) - hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); - else - hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); - return 0; + return 1; } - -int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct dn_fib_rule *r, *policy; - struct dn_fib_table *tb; - __le16 saddr = flp->fld_src; - __le16 daddr = flp->fld_dst; - struct hlist_node *node; - int err; + int err = -EINVAL; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - rcu_read_lock(); + if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) + goto errout; - hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || -#endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct dn_fib_table *table; - switch(r->r_action) { - case RTN_UNICAST: - case RTN_NAT: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } + table = dn_fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } - if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) - continue; - err = tb->lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; + rule->table = table->n; } } - rcu_read_unlock(); - return -ESRCH; + if (tb[FRA_SRC]) + r->src = nla_get_u16(tb[FRA_SRC]); + + if (tb[FRA_DST]) + r->dst = nla_get_u16(tb[FRA_DST]); + +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + r->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + + r->src_len = frh->src_len; + r->srcmask = dnet_make_mask(r->src_len); + r->dst_len = frh->dst_len; + r->dstmask = dnet_make_mask(r->dst_len); + err = 0; +errout: + return err; +} + +static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len && (r->src_len != frh->src_len)) + return 0; + + if (frh->dst_len && (r->dst_len != frh->dst_len)) + return 0; + +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; +#endif + + if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC]))) + return 0; + + if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST]))) + return 0; + + return 1; } unsigned dnet_addr_type(__le16 addr) @@ -284,142 +223,77 @@ unsigned dnet_addr_type(__le16 addr) return ret; } -__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) +static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) { - struct dn_fib_rule *r = res->r; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (r->r_action == RTN_NAT) { - int addrtype = dnet_addr_type(r->r_srcmap); + frh->family = AF_DECnet; + frh->dst_len = r->dst_len; + frh->src_len = r->src_len; + frh->tos = 0; - if (addrtype == RTN_NAT) { - saddr = (saddr&~r->r_srcmask)|r->r_srcmap; - *flags |= RTCF_SNAT; - } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { - saddr = r->r_srcmap; - *flags |= RTCF_MASQ; +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (r->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); +#endif + if (r->dst_len) + NLA_PUT_U16(skb, FRA_DST, r->dst); + if (r->src_len) + NLA_PUT_U16(skb, FRA_SRC, r->src); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +static u32 dn_fib_rule_default_pref(void) +{ + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&dn_fib_rules)) { + pos = dn_fib_rules.next; + if (pos->next != &dn_fib_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; } } - return saddr; -} -static void dn_fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; - } -} - -static void dn_fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } -} - -static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - switch(event) { - case NETDEV_UNREGISTER: - dn_fib_rules_detach(dev); - dn_fib_sync_down(0, dev, 1); - case NETDEV_REGISTER: - dn_fib_rules_attach(dev); - dn_fib_sync_up(dev); - } - - return NOTIFY_DONE; -} - - -static struct notifier_block dn_fib_rules_notifier = { - .notifier_call = dn_fib_rules_event, -}; - -static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, - struct netlink_callback *cb, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); -#endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 2, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 2, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return 0; } int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - int s_idx = cb->args[0]; - struct dn_fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (idx < s_idx) - goto next; - if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) - break; -next: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - - return skb->len; + return fib_rules_dump(skb, cb, AF_DECnet); } +static struct fib_rules_ops dn_fib_rules_ops = { + .family = AF_DECnet, + .rule_size = sizeof(struct dn_fib_rule), + .action = dn_fib_rule_action, + .match = dn_fib_rule_match, + .configure = dn_fib_rule_configure, + .compare = dn_fib_rule_compare, + .fill = dn_fib_rule_fill, + .default_pref = dn_fib_rule_default_pref, + .nlgroup = RTNLGRP_DECnet_RULE, + .policy = dn_fib_rule_policy, + .rules_list = &dn_fib_rules, + .owner = THIS_MODULE, +}; + void __init dn_fib_rules_init(void) { - INIT_HLIST_HEAD(&dn_fib_rules); - hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); - register_netdevice_notifier(&dn_fib_rules_notifier); + list_add_tail(&default_rule.common.list, &dn_fib_rules); + fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - unregister_netdevice_notifier(&dn_fib_rules_notifier); + fib_rules_unregister(&dn_fib_rules_ops); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index e926c952e363..2e01b67398c8 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include From a22ec367b08455f95fa0096ce1999950b6f6911c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 9 Aug 2006 16:00:57 -0700 Subject: [PATCH 0434/1063] [DECNET]: Convert rwlock to spinlock As per Stephen Hemminger's recent patch to ipv4/fib_semantics.c this is the same change but for DECnet. Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_fib.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 846df3954a63..ed5fb5c3eab5 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -59,7 +59,7 @@ extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); static DEFINE_SPINLOCK(dn_fib_multipath_lock); static struct dn_fib_info *dn_fib_info_list; -static DEFINE_RWLOCK(dn_fib_info_lock); +static DEFINE_SPINLOCK(dn_fib_info_lock); static struct { @@ -97,7 +97,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) void dn_fib_release_info(struct dn_fib_info *fi) { - write_lock(&dn_fib_info_lock); + spin_lock(&dn_fib_info_lock); if (fi && --fi->fib_treeref == 0) { if (fi->fib_next) fi->fib_next->fib_prev = fi->fib_prev; @@ -108,7 +108,7 @@ void dn_fib_release_info(struct dn_fib_info *fi) fi->fib_dead = 1; dn_fib_info_put(fi); } - write_unlock(&dn_fib_info_lock); + spin_unlock(&dn_fib_info_lock); } static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) @@ -379,13 +379,13 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock(&dn_fib_info_lock); + spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; fi->fib_prev = NULL; if (dn_fib_info_list) dn_fib_info_list->fib_prev = fi; dn_fib_info_list = fi; - write_unlock(&dn_fib_info_lock); + spin_unlock(&dn_fib_info_lock); return fi; err_inval: From 53fad3cbff120d8987f377eff374cf4db4ecb177 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Wed, 9 Aug 2006 17:03:17 -0700 Subject: [PATCH 0435/1063] [SUNRPC]: Remove the unnecessary check for highmem in xs_sendpages(). Just call kernel_sendpage() directly. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sunrpc/xprtsock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8b319e375049..897bdd982315 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -174,7 +174,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; int err, ret = 0; - ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); if (unlikely(!sock)) return -ENOTCONN; @@ -207,7 +206,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a base &= ~PAGE_CACHE_MASK; } - sendpage = kernel_sendpage; do { int flags = XS_SENDMSG_FLAGS; @@ -220,10 +218,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a if (pglen != len || xdr->tail[0].iov_len != 0) flags |= MSG_MORE; - /* Hmm... We might be dealing with highmem pages */ - if (PageHighMem(*ppage)) - sendpage = sock_no_sendpage; - err = sendpage(sock, *ppage, base, len, flags); + err = kernel_sendpage(sock, *ppage, base, len, flags); if (ret == 0) ret = err; else if (err > 0) From 89bddce58e85bb18b13f5077e8349ba9a3ee2597 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 1 Sep 2006 00:19:31 -0700 Subject: [PATCH 0436/1063] [NET] socket: code style cleanup Make socket.c conform to current style: * run through Lindent * get rid of unneeded casts * split assignment and comparsion where possible Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/socket.c | 729 +++++++++++++++++++++++++++------------------------ 1 file changed, 388 insertions(+), 341 deletions(-) diff --git a/net/socket.c b/net/socket.c index 2eaebf934a1a..156f2efa4e4a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -42,7 +42,7 @@ * Andi Kleen : Some small cleanups, optimizations, * and fixed a copy_from_user() bug. * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) - * Tigran Aivazian : Made listen(2) backlog sanity checks + * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent * * @@ -53,7 +53,7 @@ * * * This module is effectively the top level interface to the BSD socket - * paradigm. + * paradigm. * * Based upon Swansea University Computer Society NET3.039 */ @@ -96,25 +96,24 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); + size_t size, loff_t pos); static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); -static int sock_mmap(struct file *file, struct vm_area_struct * vma); + size_t size, loff_t pos); +static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); static unsigned int sock_poll(struct file *file, struct poll_table_struct *wait); -static long sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); +static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); + unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_readv(struct file *file, const struct iovec *vector, unsigned long count, loff_t *ppos); static ssize_t sock_writev(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); + unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); @@ -193,7 +192,6 @@ static __inline__ void net_family_read_unlock(void) #define net_family_read_unlock() do { } while(0) #endif - /* * Statistics counters of the socket lists */ @@ -201,19 +199,20 @@ static __inline__ void net_family_read_unlock(void) static DEFINE_PER_CPU(int, sockets_in_use) = 0; /* - * Support routines. Move socket addresses back and forth across the kernel/user - * divide and look after the messy bits. + * Support routines. + * Move socket addresses back and forth across the kernel/user + * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, 24 for IPv6, - about 80 for AX.25 + about 80 for AX.25 must be at least one bigger than the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). + :unix_mkname()). */ - + /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -227,11 +226,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) { - if(ulen<0||ulen>MAX_SOCK_ADDR) + if (ulen < 0 || ulen > MAX_SOCK_ADDR) return -EINVAL; - if(ulen==0) + if (ulen == 0) return 0; - if(copy_from_user(kaddr,uaddr,ulen)) + if (copy_from_user(kaddr, uaddr, ulen)) return -EFAULT; return audit_sockaddr(ulen, kaddr); } @@ -252,44 +251,46 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) * length of the data is written over the length limit the user * specified. Zero is returned for a success. */ - -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen) + +int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, + int __user *ulen) { int err; int len; - if((err=get_user(len, ulen))) + err = get_user(len, ulen); + if (err) return err; - if(len>klen) - len=klen; - if(len<0 || len> MAX_SOCK_ADDR) + if (len > klen) + len = klen; + if (len < 0 || len > MAX_SOCK_ADDR) return -EINVAL; - if(len) - { + if (len) { if (audit_sockaddr(klen, kaddr)) return -ENOMEM; - if(copy_to_user(uaddr,kaddr,len)) + if (copy_to_user(uaddr, kaddr, len)) return -EFAULT; } /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g + * "fromlen shall refer to the value before truncation.." + * 1003.1g */ return __put_user(klen, ulen); } #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t * sock_inode_cachep __read_mostly; +static kmem_cache_t *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + + ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); - + ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -307,22 +308,25 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { - struct socket_alloc *ei = (struct socket_alloc *) foo; + struct socket_alloc *ei = (struct socket_alloc *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) + == SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), - 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); + sizeof(struct socket_alloc), + 0, + (SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD), + init_once, + NULL); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; @@ -335,7 +339,8 @@ static struct super_operations sockfs_ops = { }; static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, mnt); @@ -348,12 +353,13 @@ static struct file_system_type sock_fs_type = { .get_sb = sockfs_get_sb, .kill_sb = kill_anon_super, }; + static int sockfs_delete_dentry(struct dentry *dentry) { return 1; } static struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, + .d_delete = sockfs_delete_dentry, }; /* @@ -477,10 +483,12 @@ struct socket *sockfd_lookup(int fd, int *err) struct file *file; struct socket *sock; - if (!(file = fget(fd))) { + file = fget(fd); + if (!file) { *err = -EBADF; return NULL; } + sock = sock_from_file(file, err); if (!sock) fput(file); @@ -505,7 +513,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) /** * sock_alloc - allocate a socket - * + * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes * NULL is returned. @@ -513,8 +521,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) static struct socket *sock_alloc(void) { - struct inode * inode; - struct socket * sock; + struct inode *inode; + struct socket *sock; inode = new_inode(sock_mnt->mnt_sb); if (!inode) @@ -522,7 +530,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); - inode->i_mode = S_IFSOCK|S_IRWXUGO; + inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -536,7 +544,7 @@ static struct socket *sock_alloc(void) * a back door. Remember to keep it shut otherwise you'll let the * creepy crawlies in. */ - + static int sock_no_open(struct inode *irrelevant, struct file *dontcare) { return -ENXIO; @@ -553,9 +561,9 @@ const struct file_operations bad_sock_fops = { * * The socket is released from the protocol stack if it has a release * callback, and the inode is then released if the socket is bound to - * an inode not a file. + * an inode not a file. */ - + void sock_release(struct socket *sock) { if (sock->ops) { @@ -575,10 +583,10 @@ void sock_release(struct socket *sock) iput(SOCK_INODE(sock)); return; } - sock->file=NULL; + sock->file = NULL; } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); @@ -621,14 +629,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec, + msg->msg_iov = (struct iovec *)vec; msg->msg_iovlen = num; result = sock_sendmsg(sock, msg, size); set_fs(oldfs); return result; } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { int err; @@ -647,14 +655,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, return sock->ops->recvmsg(iocb, sock, msg, size, flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; struct sock_iocb siocb; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) @@ -662,9 +670,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } -int kernel_recvmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, - size_t size, int flags) +int kernel_recvmsg(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size, int flags) { mm_segment_t oldfs = get_fs(); int result; @@ -674,8 +681,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec, - msg->msg_iovlen = num; + msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; result = sock_recvmsg(sock, msg, size, flags); set_fs(oldfs); return result; @@ -702,7 +708,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, struct sock_iocb *siocb) + char __user *ubuf, size_t size, + struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); @@ -720,20 +727,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, unsigned long nr_segs) + struct file *file, struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0 ; i < nr_segs ; i++) - size += iov[i].iov_len; + for (i = 0; i < nr_segs; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *) iov; + msg->msg_iov = (struct iovec *)iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; @@ -748,7 +756,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, struct msghdr msg; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); @@ -758,7 +766,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -771,24 +779,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, if (!x) return -ENOMEM; return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, unsigned long nr_segs) + struct file *file, struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0 ; i < nr_segs ; i++) - size += iov[i].iov_len; + for (i = 0; i < nr_segs; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *) iov; + msg->msg_iov = (struct iovec *)iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; if (sock->type == SOCK_SEQPACKET) @@ -815,7 +824,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -829,46 +838,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return -ENOMEM; return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } - /* * Atomic setting of ioctl hooks to avoid race * with module unload. */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook)(unsigned int, void __user *)) +void brioctl_set(int (*hook) (unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } + EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook)(void __user *arg); +static int (*vlan_ioctl_hook) (void __user *arg); -void vlan_ioctl_set(int (*hook)(void __user *)) +void vlan_ioctl_set(int (*hook) (void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } + EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook)(unsigned int, void __user *); +static int (*dlci_ioctl_hook) (unsigned int, void __user *); -void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)) +void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) { mutex_lock(&dlci_ioctl_mutex); dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } + EXPORT_SYMBOL(dlci_ioctl_set); /* @@ -890,8 +901,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(cmd, argp); } else -#endif /* CONFIG_WIRELESS_EXT */ - switch (cmd) { +#endif /* CONFIG_WIRELESS_EXT */ + switch (cmd) { case FIOSETOWN: case SIOCSPGRP: err = -EFAULT; @@ -901,7 +912,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case FIOGETOWN: case SIOCGPGRP: - err = put_user(sock->file->f_owner.pid, (int __user *)argp); + err = put_user(sock->file->f_owner.pid, + (int __user *)argp); break; case SIOCGIFBR: case SIOCSIFBR: @@ -912,7 +924,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) request_module("bridge"); mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) + if (br_ioctl_hook) err = br_ioctl_hook(cmd, argp); mutex_unlock(&br_ioctl_mutex); break; @@ -929,7 +941,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case SIOCGIFDIVERT: case SIOCSIFDIVERT: - /* Convert this to call through a hook */ + /* Convert this to call through a hook */ err = divert_ioctl(cmd, argp); break; case SIOCADDDLCI: @@ -954,7 +966,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (err == -ENOIOCTLCMD) err = dev_ioctl(cmd, argp); break; - } + } return err; } @@ -962,7 +974,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; struct socket *sock = NULL; - + err = security_socket_create(family, type, protocol, 1); if (err) goto out; @@ -988,18 +1000,18 @@ out_release: } /* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table * wait) +static unsigned int sock_poll(struct file *file, poll_table *wait) { struct socket *sock; /* - * We can't return errors to poll, so it's either yes or no. + * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; return sock->ops->poll(file, sock, wait); } -static int sock_mmap(struct file * file, struct vm_area_struct * vma) +static int sock_mmap(struct file *file, struct vm_area_struct *vma) { struct socket *sock = file->private_data; @@ -1009,12 +1021,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) static int sock_close(struct inode *inode, struct file *filp) { /* - * It was possible the inode is NULL we were - * closing an unfinished socket. + * It was possible the inode is NULL we were + * closing an unfinished socket. */ - if (!inode) - { + if (!inode) { printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } @@ -1040,57 +1051,52 @@ static int sock_close(struct inode *inode, struct file *filp) static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna=NULL, **prev; + struct fasync_struct *fa, *fna = NULL, **prev; struct socket *sock; struct sock *sk; - if (on) - { + if (on) { fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if(fna==NULL) + if (fna == NULL) return -ENOMEM; } sock = filp->private_data; - if ((sk=sock->sk) == NULL) { + sk = sock->sk; + if (sk == NULL) { kfree(fna); return -EINVAL; } lock_sock(sk); - prev=&(sock->fasync_list); + prev = &(sock->fasync_list); - for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) - if (fa->fa_file==filp) + for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) + if (fa->fa_file == filp) break; - if(on) - { - if(fa!=NULL) - { + if (on) { + if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd=fd; + fa->fa_fd = fd; write_unlock_bh(&sk->sk_callback_lock); kfree(fna); goto out; } - fna->fa_file=filp; - fna->fa_fd=fd; - fna->magic=FASYNC_MAGIC; - fna->fa_next=sock->fasync_list; + fna->fa_file = filp; + fna->fa_fd = fd; + fna->magic = FASYNC_MAGIC; + fna->fa_next = sock->fasync_list; write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list=fna; + sock->fasync_list = fna; write_unlock_bh(&sk->sk_callback_lock); - } - else - { - if (fa!=NULL) - { + } else { + if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); - *prev=fa->fa_next; + *prev = fa->fa_next; write_unlock_bh(&sk->sk_callback_lock); kfree(fa); } @@ -1107,10 +1113,9 @@ int sock_wake_async(struct socket *sock, int how, int band) { if (!sock || !sock->fasync_list) return -1; - switch (how) - { + switch (how) { case 1: - + if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; @@ -1119,7 +1124,7 @@ int sock_wake_async(struct socket *sock, int how, int band) break; /* fall through */ case 0: - call_kill: +call_kill: __kill_fasync(sock->fasync_list, SIGIO, band); break; case 3: @@ -1128,13 +1133,14 @@ int sock_wake_async(struct socket *sock, int how, int band) return 0; } -static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) +static int __sock_create(int family, int type, int protocol, + struct socket **res, int kern) { int err; struct socket *sock; /* - * Check protocol is in range + * Check protocol is in range */ if (family < 0 || family >= NPROTO) return -EAFNOSUPPORT; @@ -1147,10 +1153,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res deadlock in module load. */ if (family == PF_INET && type == SOCK_PACKET) { - static int warned; + static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); + printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", + current->comm); } family = PF_PACKET; } @@ -1158,17 +1165,16 @@ static int __sock_create(int family, int type, int protocol, struct socket **res err = security_socket_create(family, type, protocol, kern); if (err) return err; - + #if defined(CONFIG_KMOD) - /* Attempt to load a protocol module if the find failed. - * - * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user + /* Attempt to load a protocol module if the find failed. + * + * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family]==NULL) - { - request_module("net-pf-%d",family); + if (net_families[family] == NULL) { + request_module("net-pf-%d", family); } #endif @@ -1187,12 +1193,12 @@ static int __sock_create(int family, int type, int protocol, struct socket **res if (!(sock = sock_alloc())) { if (net_ratelimit()) printk(KERN_WARNING "socket: no more sockets\n"); - err = -ENFILE; /* Not exactly a match, but its the - closest posix thing */ + err = -ENFILE; /* Not exactly a match, but its the + closest posix thing */ goto out; } - sock->type = type; + sock->type = type; /* * We will call the ->create function, that possibly is in a loadable @@ -1271,7 +1277,8 @@ out_release: * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec) +asmlinkage long sys_socketpair(int family, int type, int protocol, + int __user *usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1290,7 +1297,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u goto out_release_1; err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) + if (err < 0) goto out_release_both; fd1 = fd2 = -1; @@ -1309,7 +1316,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); + err = put_user(fd1, &usockvec[0]); if (!err) err = put_user(fd2, &usockvec[1]); if (!err) @@ -1320,19 +1327,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u return err; out_close_1: - sock_release(sock2); + sock_release(sock2); sys_close(fd1); return err; out_release_both: - sock_release(sock2); + sock_release(sock2); out_release_1: - sock_release(sock1); + sock_release(sock1); out: return err; } - /* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. @@ -1347,20 +1353,23 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) char address[MAX_SOCK_ADDR]; int err, fput_needed; - if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) - { - if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { - err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if(sock) { + err = move_addr_to_kernel(umyaddr, addrlen, address); + if (err >= 0) { + err = security_socket_bind(sock, + (struct sockaddr *)address, + addrlen); if (!err) err = sock->ops->bind(sock, - (struct sockaddr *)address, addrlen); + (struct sockaddr *) + address, addrlen); } fput_light(sock->file, fput_needed); - } + } return err; } - /* * Perform a listen. Basically, we allow the protocol to do anything * necessary for a listen, and if that works, we mark the socket as @@ -1373,9 +1382,10 @@ asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; int err, fput_needed; - - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { - if ((unsigned) backlog > sysctl_somaxconn) + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock) { + if ((unsigned)backlog > sysctl_somaxconn) backlog = sysctl_somaxconn; err = security_socket_listen(sock, backlog); @@ -1387,7 +1397,6 @@ asmlinkage long sys_listen(int fd, int backlog) return err; } - /* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new @@ -1400,7 +1409,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen) { struct socket *sock, *newsock; struct file *newfile; @@ -1412,7 +1422,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + if (!(newsock = sock_alloc())) goto out_put; newsock->type = sock->type; @@ -1444,11 +1454,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ goto out_fd; if (upeer_sockaddr) { - if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { + if (newsock->ops->getname(newsock, (struct sockaddr *)address, + &len, 2) < 0) { err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); + err = move_addr_to_user(address, len, upeer_sockaddr, + upeer_addrlen); if (err < 0) goto out_fd; } @@ -1470,7 +1482,6 @@ out_fd: goto out_put; } - /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -1483,7 +1494,8 @@ out_fd: * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) +asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, + int addrlen) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1496,11 +1508,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl if (err < 0) goto out_put; - err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); + err = + security_socket_connect(sock, (struct sockaddr *)address, addrlen); if (err) goto out_put; - err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, + err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, sock->file->f_flags); out_put: fput_light(sock->file, fput_needed); @@ -1513,12 +1526,13 @@ out: * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) +asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1543,22 +1557,27 @@ out: * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) +asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, + int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_getpeername(sock); if (err) { fput_light(sock->file, fput_needed); return err; } - err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); + err = + sock->ops->getname(sock, (struct sockaddr *)address, &len, + 1); if (!err) - err=move_addr_to_user(address,len, usockaddr, usockaddr_len); + err = move_addr_to_user(address, len, usockaddr, + usockaddr_len); fput_light(sock->file, fput_needed); } return err; @@ -1570,8 +1589,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _ * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags, - struct sockaddr __user *addr, int addr_len) +asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, + unsigned flags, struct sockaddr __user *addr, + int addr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1588,54 +1608,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag sock = sock_from_file(sock_file, &err); if (!sock) goto out_put; - iov.iov_base=buff; - iov.iov_len=len; - msg.msg_name=NULL; - msg.msg_iov=&iov; - msg.msg_iovlen=1; - msg.msg_control=NULL; - msg.msg_controllen=0; - msg.msg_namelen=0; + iov.iov_base = buff; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; if (addr) { err = move_addr_to_kernel(addr, addr_len, address); if (err < 0) goto out_put; - msg.msg_name=address; - msg.msg_namelen=addr_len; + msg.msg_name = address; + msg.msg_namelen = addr_len; } if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; err = sock_sendmsg(sock, &msg, len); -out_put: +out_put: fput_light(sock_file, fput_needed); return err; } /* - * Send a datagram down a socket. + * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags) +asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } /* - * Receive a frame from the socket and optionally record the address of the + * Receive a frame from the socket and optionally record the address of the * sender. We verify the buffers are writable and if needed move the * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags, - struct sockaddr __user *addr, int __user *addr_len) +asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, + unsigned flags, struct sockaddr __user *addr, + int __user *addr_len) { struct socket *sock; struct iovec iov; struct msghdr msg; char address[MAX_SOCK_ADDR]; - int err,err2; + int err, err2; struct file *sock_file; int fput_needed; @@ -1647,23 +1668,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f if (!sock) goto out; - msg.msg_control=NULL; - msg.msg_controllen=0; - msg.msg_iovlen=1; - msg.msg_iov=&iov; - iov.iov_len=size; - iov.iov_base=ubuf; - msg.msg_name=address; - msg.msg_namelen=MAX_SOCK_ADDR; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = &iov; + iov.iov_len = size; + iov.iov_base = ubuf; + msg.msg_name = address; + msg.msg_namelen = MAX_SOCK_ADDR; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err=sock_recvmsg(sock, &msg, size, flags); + err = sock_recvmsg(sock, &msg, size, flags); - if(err >= 0 && addr != NULL) - { - err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); - if(err2<0) - err=err2; + if (err >= 0 && addr != NULL) { + err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + if (err2 < 0) + err = err2; } out: fput_light(sock_file, fput_needed); @@ -1671,10 +1691,11 @@ out: } /* - * Receive a datagram from a socket. + * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags) +asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, + unsigned flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1684,24 +1705,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) +asmlinkage long sys_setsockopt(int fd, int level, int optname, + char __user *optval, int optlen) { int err, fput_needed; struct socket *sock; if (optlen < 0) return -EINVAL; - - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) - { - err = security_socket_setsockopt(sock,level,optname); + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { + err = security_socket_setsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) - err=sock_setsockopt(sock,level,optname,optval,optlen); + err = + sock_setsockopt(sock, level, optname, optval, + optlen); else - err=sock->ops->setsockopt(sock, level, optname, optval, optlen); + err = + sock->ops->setsockopt(sock, level, optname, optval, + optlen); out_put: fput_light(sock->file, fput_needed); } @@ -1713,27 +1739,32 @@ out_put: * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) +asmlinkage long sys_getsockopt(int fd, int level, int optname, + char __user *optval, int __user *optlen) { int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_getsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) - err=sock_getsockopt(sock,level,optname,optval,optlen); + err = + sock_getsockopt(sock, level, optname, optval, + optlen); else - err=sock->ops->getsockopt(sock, level, optname, optval, optlen); + err = + sock->ops->getsockopt(sock, level, optname, optval, + optlen); out_put: fput_light(sock->file, fput_needed); } return err; } - /* * Shutdown a socket. */ @@ -1743,8 +1774,8 @@ asmlinkage long sys_shutdown(int fd, int how) int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) - { + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock != NULL) { err = security_socket_shutdown(sock, how); if (!err) err = sock->ops->shutdown(sock, how); @@ -1753,41 +1784,42 @@ asmlinkage long sys_shutdown(int fd, int how) return err; } -/* A couple of helpful macros for getting the address of the 32/64 bit +/* A couple of helpful macros for getting the address of the 32/64 bit * fields which are the same type (int / unsigned) on our platforms. */ #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) - /* * BSD sendmsg interface */ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) { - struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = + (struct compat_msghdr __user *)msg; struct socket *sock; char address[MAX_SOCK_ADDR]; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] - __attribute__ ((aligned (sizeof(__kernel_size_t)))); - /* 20 is size of ipv6_pktinfo */ + __attribute__ ((aligned(sizeof(__kernel_size_t)))); + /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; int fput_needed; - + err = -EFAULT; if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } + else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) + if (!sock) goto out; /* do not move before msg_sys is valid */ @@ -1795,7 +1827,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - /* Check whether to allocate the iovec area*/ + /* Check whether to allocate the iovec area */ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1809,7 +1841,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); } else err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); - if (err < 0) + if (err < 0) goto out_freeiov; total_len = err; @@ -1817,18 +1849,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_controllen > INT_MAX) goto out_freeiov; - ctl_len = msg_sys.msg_controllen; + ctl_len = msg_sys.msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); + err = + cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, + sizeof(ctl)); if (err) goto out_freeiov; ctl_buf = msg_sys.msg_control; ctl_len = msg_sys.msg_controllen; } else if (ctl_len) { - if (ctl_len > sizeof(ctl)) - { + if (ctl_len > sizeof(ctl)) { ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); - if (ctl_buf == NULL) + if (ctl_buf == NULL) goto out_freeiov; } err = -EFAULT; @@ -1837,7 +1870,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) * Afterwards, it will be a kernel pointer. Thus the compiler-assisted * checking falls down on this. */ - if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len)) + if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, + ctl_len)) goto out_freectl; msg_sys.msg_control = ctl_buf; } @@ -1848,14 +1882,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = sock_sendmsg(sock, &msg_sys, total_len); out_freectl: - if (ctl_buf != ctl) + if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); out_put: fput_light(sock->file, fput_needed); -out: +out: return err; } @@ -1863,12 +1897,14 @@ out: * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags) +asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, + unsigned int flags) { - struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = + (struct compat_msghdr __user *)msg; struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack; + struct iovec *iov = iovstack; struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; @@ -1880,13 +1916,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag /* user mode address pointers */ struct sockaddr __user *uaddr; int __user *uaddr_len; - + if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } else - if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) - return -EFAULT; + } + else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1895,8 +1931,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag err = -EMSGSIZE; if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - - /* Check whether to allocate the iovec area*/ + + /* Check whether to allocate the iovec area */ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1906,11 +1942,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag } /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + * Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - - uaddr = (void __user *) msg_sys.msg_name; + + uaddr = (void __user *)msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); @@ -1918,13 +1954,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; - total_len=err; + total_len = err; cmsg_ptr = (unsigned long)msg_sys.msg_control; msg_sys.msg_flags = 0; if (MSG_CMSG_COMPAT & flags) msg_sys.msg_flags = MSG_CMSG_COMPAT; - + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg_sys, total_len, flags); @@ -1933,7 +1969,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag len = err; if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); + err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, + uaddr_len); if (err < 0) goto out_freeiov; } @@ -1942,10 +1979,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -1964,102 +2001,113 @@ out: /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; +static const unsigned char nargs[18]={ + AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) +}; + #undef AL /* - * System call vectors. + * System call vectors. * * Argument checking cleaned up. Saved 20% in size. * This function doesn't need to set the kernel lock because - * it is set by the callees. + * it is set by the callees. */ asmlinkage long sys_socketcall(int call, unsigned long __user *args) { unsigned long a[6]; - unsigned long a0,a1; + unsigned long a0, a1; int err; - if(call<1||call>SYS_RECVMSG) + if (call < 1 || call > SYS_RECVMSG) return -EINVAL; /* copy_from_user should be SMP safe. */ if (copy_from_user(a, args, nargs[call])) return -EFAULT; - err = audit_socketcall(nargs[call]/sizeof(unsigned long), a); + err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); if (err) return err; - a0=a[0]; - a1=a[1]; - - switch(call) - { - case SYS_SOCKET: - err = sys_socket(a0,a1,a[2]); - break; - case SYS_BIND: - err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]); - break; - case SYS_CONNECT: - err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_LISTEN: - err = sys_listen(a0,a1); - break; - case SYS_ACCEPT: - err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_GETSOCKNAME: - err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_GETPEERNAME: - err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]); - break; - case SYS_SOCKETPAIR: - err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]); - break; - case SYS_SEND: - err = sys_send(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_SENDTO: - err = sys_sendto(a0,(void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], a[5]); - break; - case SYS_RECV: - err = sys_recv(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_RECVFROM: - err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], (int __user *)a[5]); - break; - case SYS_SHUTDOWN: - err = sys_shutdown(a0,a1); - break; - case SYS_SETSOCKOPT: - err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); - break; - case SYS_GETSOCKOPT: - err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]); - break; - case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]); - break; - case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]); - break; - default: - err = -EINVAL; - break; + a0 = a[0]; + a1 = a[1]; + + switch (call) { + case SYS_SOCKET: + err = sys_socket(a0, a1, a[2]); + break; + case SYS_BIND: + err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); + break; + case SYS_CONNECT: + err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); + break; + case SYS_LISTEN: + err = sys_listen(a0, a1); + break; + case SYS_ACCEPT: + err = + sys_accept(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_GETSOCKNAME: + err = + sys_getsockname(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_GETPEERNAME: + err = + sys_getpeername(a0, (struct sockaddr __user *)a1, + (int __user *)a[2]); + break; + case SYS_SOCKETPAIR: + err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); + break; + case SYS_SEND: + err = sys_send(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_SENDTO: + err = sys_sendto(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], a[5]); + break; + case SYS_RECV: + err = sys_recv(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_RECVFROM: + err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], + (int __user *)a[5]); + break; + case SYS_SHUTDOWN: + err = sys_shutdown(a0, a1); + break; + case SYS_SETSOCKOPT: + err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); + break; + case SYS_GETSOCKOPT: + err = + sys_getsockopt(a0, a1, a[2], (char __user *)a[3], + (int __user *)a[4]); + break; + case SYS_SENDMSG: + err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); + break; + case SYS_RECVMSG: + err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); + break; + default: + err = -EINVAL; + break; } return err; } -#endif /* __ARCH_WANT_SYS_SOCKETCALL */ +#endif /* __ARCH_WANT_SYS_SOCKETCALL */ /* * This function is called by a protocol handler that wants to @@ -2072,18 +2120,18 @@ int sock_register(struct net_proto_family *ops) int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); + printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, + NPROTO); return -ENOBUFS; } net_family_write_lock(); err = -EEXIST; if (net_families[ops->family] == NULL) { - net_families[ops->family]=ops; + net_families[ops->family] = ops; err = 0; } net_family_write_unlock(); - printk(KERN_INFO "NET: Registered protocol family %d\n", - ops->family); + printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } @@ -2099,28 +2147,27 @@ int sock_unregister(int family) return -1; net_family_write_lock(); - net_families[family]=NULL; + net_families[family] = NULL; net_family_write_unlock(); - printk(KERN_INFO "NET: Unregistered protocol family %d\n", - family); + printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); return 0; } static int __init sock_init(void) { /* - * Initialize sock SLAB cache. + * Initialize sock SLAB cache. */ - + sk_init(); /* - * Initialize skbuff SLAB cache + * Initialize skbuff SLAB cache */ skb_init(); /* - * Initialize the protocols module. + * Initialize the protocols module. */ init_inodecache(); @@ -2146,7 +2193,7 @@ void socket_seq_show(struct seq_file *seq) int counter = 0; for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); + counter += per_cpu(sockets_in_use, cpu); /* It can be negative, by the way. 8) */ if (counter < 0) @@ -2154,11 +2201,11 @@ void socket_seq_show(struct seq_file *seq) seq_printf(seq, "sockets: used %d\n", counter); } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, unsigned cmd, - unsigned long arg) + unsigned long arg) { struct socket *sock = file->private_data; int ret = -ENOIOCTLCMD; From 757dbb494be3309fe41ce4c62f8057d8b41d8897 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 20:50:00 -0700 Subject: [PATCH 0437/1063] [NET]: drop unused elements from net_proto_family Three values in net_proto_family are defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 19da2c08d7b6..1bd76327ee2b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -169,11 +169,6 @@ struct proto_ops { struct net_proto_family { int family; int (*create)(struct socket *sock, int protocol); - /* These are counters for the number of different methods of - each we support */ - short authentication; - short encryption; - short encrypt_net; struct module *owner; }; From 55737fda0bc73cb20f702301d8b52938a5a43630 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 1 Sep 2006 00:23:39 -0700 Subject: [PATCH 0438/1063] [NET]: socket family using RCU Replace the gross custom locking done in socket code for net_family[] with simple RCU usage. Some reordering necessary to avoid sleep issues with sock_alloc. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/socket.c | 174 ++++++++++++++++++++++----------------------------- 1 file changed, 76 insertions(+), 98 deletions(-) diff --git a/net/socket.c b/net/socket.c index 156f2efa4e4a..b5a3fcb9ed6d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -59,11 +59,11 @@ */ #include -#include #include #include #include #include +#include #include #include #include @@ -146,51 +146,8 @@ static struct file_operations socket_file_ops = { * The protocol list. Each protocol is registered in here. */ -static struct net_proto_family *net_families[NPROTO]; - -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) -static atomic_t net_family_lockct = ATOMIC_INIT(0); static DEFINE_SPINLOCK(net_family_lock); - -/* The strategy is: modifications net_family vector are short, do not - sleep and veeery rare, but read access should be free of any exclusive - locks. - */ - -static void net_family_write_lock(void) -{ - spin_lock(&net_family_lock); - while (atomic_read(&net_family_lockct) != 0) { - spin_unlock(&net_family_lock); - - yield(); - - spin_lock(&net_family_lock); - } -} - -static __inline__ void net_family_write_unlock(void) -{ - spin_unlock(&net_family_lock); -} - -static __inline__ void net_family_read_lock(void) -{ - atomic_inc(&net_family_lockct); - spin_unlock_wait(&net_family_lock); -} - -static __inline__ void net_family_read_unlock(void) -{ - atomic_dec(&net_family_lockct); -} - -#else -#define net_family_write_lock() do { } while(0) -#define net_family_write_unlock() do { } while(0) -#define net_family_read_lock() do { } while(0) -#define net_family_read_unlock() do { } while(0) -#endif +static const struct net_proto_family *net_families[NPROTO]; /* * Statistics counters of the socket lists @@ -1138,6 +1095,7 @@ static int __sock_create(int family, int type, int protocol, { int err; struct socket *sock; + const struct net_proto_family *pf; /* * Check protocol is in range @@ -1166,6 +1124,21 @@ static int __sock_create(int family, int type, int protocol, if (err) return err; + /* + * Allocate the socket and allow the family to set things up. if + * the protocol is 0, the family is instructed to select an appropriate + * default. + */ + sock = sock_alloc(); + if (!sock) { + if (net_ratelimit()) + printk(KERN_WARNING "socket: no more sockets\n"); + return -ENFILE; /* Not exactly a match, but its the + closest posix thing */ + } + + sock->type = type; + #if defined(CONFIG_KMOD) /* Attempt to load a protocol module if the find failed. * @@ -1173,72 +1146,61 @@ static int __sock_create(int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) { + if (net_families[family] == NULL) request_module("net-pf-%d", family); - } #endif - net_family_read_lock(); - if (net_families[family] == NULL) { - err = -EAFNOSUPPORT; - goto out; - } - -/* - * Allocate the socket and allow the family to set things up. if - * the protocol is 0, the family is instructed to select an appropriate - * default. - */ - - if (!(sock = sock_alloc())) { - if (net_ratelimit()) - printk(KERN_WARNING "socket: no more sockets\n"); - err = -ENFILE; /* Not exactly a match, but its the - closest posix thing */ - goto out; - } - - sock->type = type; + rcu_read_lock(); + pf = rcu_dereference(net_families[family]); + err = -EAFNOSUPPORT; + if (!pf) + goto out_release; /* * We will call the ->create function, that possibly is in a loadable * module, so we have to bump that loadable module refcnt first. */ - err = -EAFNOSUPPORT; - if (!try_module_get(net_families[family]->owner)) + if (!try_module_get(pf->owner)) goto out_release; - if ((err = net_families[family]->create(sock, protocol)) < 0) { - sock->ops = NULL; + /* Now protected by module ref count */ + rcu_read_unlock(); + + err = pf->create(sock, protocol); + if (err < 0) goto out_module_put; - } /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. */ - if (!try_module_get(sock->ops->owner)) { - sock->ops = NULL; - goto out_module_put; - } + if (!try_module_get(sock->ops->owner)) + goto out_module_busy; + /* * Now that we're done with the ->create function, the [loadable] * module can have its refcnt decremented */ - module_put(net_families[family]->owner); - *res = sock; + module_put(pf->owner); err = security_socket_post_create(sock, family, type, protocol, kern); if (err) goto out_release; + *res = sock; -out: - net_family_read_unlock(); - return err; + return 0; + +out_module_busy: + err = -EAFNOSUPPORT; out_module_put: - module_put(net_families[family]->owner); -out_release: + sock->ops = NULL; + module_put(pf->owner); +out_sock_release: sock_release(sock); - goto out; + return err; + +out_release: + rcu_read_unlock(); + goto out_sock_release; } int sock_create(int family, int type, int protocol, struct socket **res) @@ -2109,12 +2071,15 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) #endif /* __ARCH_WANT_SYS_SOCKETCALL */ -/* +/** + * sock_register - add a socket protocol handler + * @ops: description of protocol + * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * SOCKET module. + * socket interface. The value ops->family coresponds to the + * socket system call protocol family. */ - int sock_register(struct net_proto_family *ops) { int err; @@ -2124,31 +2089,44 @@ int sock_register(struct net_proto_family *ops) NPROTO); return -ENOBUFS; } - net_family_write_lock(); - err = -EEXIST; - if (net_families[ops->family] == NULL) { + + spin_lock(&net_family_lock); + if (net_families[ops->family]) + err = -EEXIST; + else { net_families[ops->family] = ops; err = 0; } - net_family_write_unlock(); + spin_unlock(&net_family_lock); + printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } -/* +/** + * sock_unregister - remove a protocol handler + * @family: protocol family to remove + * * This function is called by a protocol handler that wants to * remove its address family, and have it unlinked from the - * SOCKET module. + * new socket creation. + * + * If protocol handler is a module, then it can use module reference + * counts to protect against new references. If protocol handler is not + * a module then it needs to provide its own protection in + * the ops->create routine. */ - int sock_unregister(int family) { if (family < 0 || family >= NPROTO) - return -1; + return -EINVAL; - net_family_write_lock(); + spin_lock(&net_family_lock); net_families[family] = NULL; - net_family_write_unlock(); + spin_unlock(&net_family_lock); + + synchronize_rcu(); + printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); return 0; } From f0fd27d42e39b91f85e1840ec49b072fd6c545b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Aug 2006 21:03:17 -0700 Subject: [PATCH 0439/1063] [NET]: sock_register interface changes The sock_register() doesn't change the family, so the protocols can define it read-only. No caller ever checks return value from sock_unregister() Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++-- net/socket.c | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 1bd76327ee2b..c257f716e00f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -176,8 +176,8 @@ struct iovec; struct kvec; extern int sock_wake_async(struct socket *sk, int how, int band); -extern int sock_register(struct net_proto_family *fam); -extern int sock_unregister(int family); +extern int sock_register(const struct net_proto_family *fam); +extern void sock_unregister(int family); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, diff --git a/net/socket.c b/net/socket.c index b5a3fcb9ed6d..4147fe4bf41d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -147,7 +147,7 @@ static struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO]; +static const struct net_proto_family *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -2080,7 +2080,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) * socket interface. The value ops->family coresponds to the * socket system call protocol family. */ -int sock_register(struct net_proto_family *ops) +int sock_register(const struct net_proto_family *ops) { int err; @@ -2116,10 +2116,9 @@ int sock_register(struct net_proto_family *ops) * a module then it needs to provide its own protection in * the ops->create routine. */ -int sock_unregister(int family) +void sock_unregister(int family) { - if (family < 0 || family >= NPROTO) - return -EINVAL; + BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); net_families[family] = NULL; @@ -2128,7 +2127,6 @@ int sock_unregister(int family) synchronize_rcu(); printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); - return 0; } static int __init sock_init(void) From bf0d52492d82ad70684e17c8a46942c13d0e140e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 22 Sep 2006 14:00:29 -0700 Subject: [PATCH 0440/1063] [NET]: Remove unnecessary config.h includes from net/ config.h is automatically included by kbuild these days. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/atm/atm_sysfs.c | 1 - net/core/dev_mcast.c | 3 +-- net/core/wireless.c | 1 - net/ipv4/af_inet.c | 1 - net/ipv4/ipconfig.c | 1 - net/ipv4/netfilter/ip_conntrack_sip.c | 1 - net/ipv4/raw.c | 3 +-- net/ipv4/tcp_lp.c | 1 - net/ipv4/tcp_veno.c | 1 - 9 files changed, 2 insertions(+), 11 deletions(-) diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 5df4b9a068bb..c0a4ae28fcfa 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -1,6 +1,5 @@ /* ATM driver model support. */ -#include #include #include #include diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index c57d887da2ef..b22648d04d36 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -21,8 +21,7 @@ * 2 of the License, or (at your option) any later version. */ -#include -#include +#include #include #include #include diff --git a/net/core/wireless.c b/net/core/wireless.c index de0bde4b51dd..348b9da73cc4 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -72,7 +72,6 @@ /***************************** INCLUDES *****************************/ -#include /* Not needed ??? */ #include #include /* off_t */ #include /* struct ifreq, dev_get_by_name() */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fc40da3b6d39..36c38bffb474 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -67,7 +67,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cb8a92f18ef6..1fbb38415b19 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -31,7 +31,6 @@ * -- Josef Siemes , Aug 2002 */ -#include #include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index 4f222d6be009..2893e9c74850 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include #include #include #include diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fe44cb50a1c5..0e935b4c8741 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -38,8 +38,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include + #include #include #include diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 48f28d617ce6..649ebaed1df1 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -35,7 +35,6 @@ * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ */ -#include #include #include diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 11b42a7135c1..5b2fe6d2aba9 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -9,7 +9,6 @@ * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf */ -#include #include #include #include From 1e38bb3a38d08129d08c904b10ea3ba08e22d297 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Aug 2006 00:22:41 -0700 Subject: [PATCH 0441/1063] [NET]: Kill double initialization in sock_alloc_inode. No need to set ei->socket.flags to zero twice. Signed-off-by: David S. Miller --- net/socket.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 4147fe4bf41d..d6f27ed9ba6c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -254,7 +254,6 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei->socket.ops = NULL; ei->socket.sk = NULL; ei->socket.file = NULL; - ei->socket.flags = 0; return &ei->vfs_inode; } From d924424aaed116b362c6d0e667d912b77e655085 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:03:23 -0700 Subject: [PATCH 0442/1063] [NEIGHBOUR]: Use ALIGN() macro. Rather than opencoding the mask, it looks better to use ALIGN() macro from kernel.h. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 74c4b6ff8a5c..bd187daffdb9 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -101,7 +101,7 @@ struct neighbour __u8 dead; atomic_t probes; rwlock_t lock; - unsigned char ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)]; + unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; atomic_t refcnt; int (*output)(struct sk_buff *skb); From 2dfe55b47e3d66ded5a84caf71e0da5710edf48b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:08:33 -0700 Subject: [PATCH 0443/1063] [NET]: Use u32 for routing table IDs Use u32 for routing table IDs in net/ipv4 and net/decnet in preparation of support for a larger number of routing tables. net/ipv6 already uses u32 everywhere and needs no further changes. No functional changes are made by this patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/dn_fib.h | 4 ++-- include/net/ip_fib.h | 14 +++++++------- net/decnet/dn_fib.c | 6 +++--- net/decnet/dn_table.c | 10 +++++----- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_lookup.h | 4 ++-- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/fib_trie.c | 6 +++--- 10 files changed, 31 insertions(+), 31 deletions(-) diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 32bc8ce5c5ce..cd9c3782f838 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,7 +94,7 @@ struct dn_fib_node { struct dn_fib_table { - int n; + u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, @@ -137,7 +137,7 @@ extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c */ -extern struct dn_fib_table *dn_fib_get_table(int n, int creat); +extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat); extern struct dn_fib_table *dn_fib_empty_table(void); extern void dn_fib_table_init(void); extern void dn_fib_table_cleanup(void); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index adf73586bc05..0dcbf166eb94 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -150,7 +150,7 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { - unsigned char tb_id; + u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); int (*tb_insert)(struct fib_table *table, struct rtmsg *r, @@ -173,14 +173,14 @@ struct fib_table { extern struct fib_table *ip_fib_local_table; extern struct fib_table *ip_fib_main_table; -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id != RT_TABLE_LOCAL) return ip_fib_main_table; return ip_fib_local_table; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { return fib_get_table(id); } @@ -205,9 +205,9 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(int id); +extern struct fib_table *__fib_new_table(u32 id); -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id == 0) id = RT_TABLE_MAIN; @@ -215,7 +215,7 @@ static inline struct fib_table *fib_get_table(int id) return fib_tables[id]; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { if (id == 0) id = RT_TABLE_MAIN; @@ -248,7 +248,7 @@ extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ -extern struct fib_table *fib_hash_init(int id); +extern struct fib_table *fib_hash_init(u32 id); #ifdef CONFIG_IP_MULTIPLE_TABLES extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index ed5fb5c3eab5..7b3bf5c3d720 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -534,8 +534,8 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + u32 t; + u32 s_t; struct dn_fib_table *tb; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && @@ -765,7 +765,7 @@ void dn_fib_flush(void) { int flushed = 0; struct dn_fib_table *tb; - int id; + u32 id; for(id = RT_TABLE_MAX; id > 0; id--) { if ((tb = dn_fib_get_table(id, 0)) == NULL) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 2e01b67398c8..1601ee5406a8 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -264,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern } static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; @@ -327,7 +327,7 @@ rtattr_failure: } -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, +static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; @@ -740,7 +740,7 @@ out: } -struct dn_fib_table *dn_fib_get_table(int n, int create) +struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; @@ -777,7 +777,7 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) return t; } -static void dn_fib_del_tree(int n) +static void dn_fib_del_tree(u32 n) { struct dn_fib_table *t; @@ -791,7 +791,7 @@ static void dn_fib_del_tree(int n) struct dn_fib_table *dn_fib_empty_table(void) { - int id; + u32 id; for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) if (dn_fib_tables[id] == NULL) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a83f1aa8034e..06f4b23f6f57 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -62,7 +62,7 @@ struct fib_table *ip_fib_main_table; struct fib_table *fib_tables[RT_TABLE_MAX+1]; -struct fib_table *__fib_new_table(int id) +struct fib_table *__fib_new_table(u32 id) { struct fib_table *tb; @@ -82,7 +82,7 @@ static void fib_flush(void) int flushed = 0; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - int id; + u32 id; for (id = RT_TABLE_MAX; id>0; id--) { if ((tb = fib_get_table(id))==NULL) @@ -333,8 +333,8 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + u32 t; + u32 s_t; struct fib_table *tb; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 72c633b357cf..f8d5c8024ccb 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -765,9 +765,9 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin } #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ef6609ea0eb7..ddd52496b451 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -30,11 +30,11 @@ extern struct fib_info *fib_create_info(const struct rtmsg *r, extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, + int z, u32 tb_id, struct nlmsghdr *n, struct netlink_skb_parms *req); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d242e5291fcc..58fb91b00fdf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,7 +169,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct fib_table *fib_empty_table(void) { - int id; + u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) if (fib_tables[id] == NULL) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 38bca473c7e2..c7a112b5a185 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -273,7 +273,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) } void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, + int z, u32 tb_id, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct sk_buff *skb; @@ -939,7 +939,7 @@ u32 __fib_res_prefsrc(struct fib_result *res) int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct rtmsg *rtm; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 01801c0f885d..4a27b2d573a3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1148,7 +1148,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = ntohl(key); - pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1943,9 +1943,9 @@ out: /* Fix more generic FIB names for init later */ #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; From 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:09:48 -0700 Subject: [PATCH 0444/1063] [NET]: Introduce RTA_TABLE/FRA_TABLE attributes Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute to hold 32 bit routing table IDs. Usespace compatibility is provided by continuing to accept and send the rtm_table field, but because of its limited size it can only carry the low 8 bits of the table ID. This implies that if larger IDs are used, _all_ userspace programs using them need to use RTA_TABLE. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 4 ++++ include/linux/rtnetlink.h | 8 ++++++++ include/net/fib_rules.h | 7 +++++++ net/core/fib_rules.c | 5 +++-- net/decnet/dn_fib.c | 7 ++++--- net/decnet/dn_route.c | 1 + net/decnet/dn_table.c | 1 + net/ipv4/fib_frontend.c | 7 ++++--- net/ipv4/fib_rules.c | 1 + net/ipv4/fib_semantics.c | 1 + net/ipv4/route.c | 1 + net/ipv6/fib6_rules.c | 1 + net/ipv6/route.c | 13 +++++++++---- 13 files changed, 45 insertions(+), 12 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 5e503f0ca6e4..19a82b6c1c1f 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -36,6 +36,10 @@ enum FRA_UNUSED5, FRA_FWMARK, /* netfilter mark (IPv4) */ FRA_FLOW, /* flow/class id */ + FRA_UNUSED6, + FRA_UNUSED7, + FRA_UNUSED8, + FRA_TABLE, /* Extended table id */ __FRA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0aaffa2ae666..ea422a539a03 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -264,6 +264,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -717,6 +718,13 @@ extern void __rtnl_unlock(void); } \ } while(0) +static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return table; +} + #endif /* __KERNEL__ */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 61375d9e53f8..8e2f473d3e82 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -74,6 +74,13 @@ static inline void fib_rule_put(struct fib_rule *rule) call_rcu(&rule->rcu, fib_rule_put_rcu); } +static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) +{ + if (nla[FRA_TABLE]) + return nla_get_u32(nla[FRA_TABLE]); + return frh->table; +} + extern int fib_rules_register(struct fib_rules_ops *); extern int fib_rules_unregister(struct fib_rules_ops *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6cdad24038e2..873b04d5df81 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -187,7 +187,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->action = frh->action; rule->flags = frh->flags; - rule->table = frh->table; + rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) rule->pref = ops->default_pref(); @@ -245,7 +245,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh->table != rule->table)) + if (frh->table && (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -291,6 +291,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->table = rule->table; + NLA_PUT_U32(skb, FRA_TABLE, rule->table); frh->res1 = 0; frh->res2 = 0; frh->action = rule->action; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7b3bf5c3d720..fb596373daa8 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -491,7 +491,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) rta[i-1] = (struct rtattr *)RTA_DATA(attr); } } @@ -508,7 +509,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 0); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); if (tb) return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); @@ -524,7 +525,7 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 1); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); if (tb) return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e6f4616ca10..4c963213fba5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1486,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 0; r->rtm_tos = 0; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; r->rtm_scope = RT_SCOPE_UNIVERSE; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 1601ee5406a8..eca7c1e10c80 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -278,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = 0; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_type = type; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 06f4b23f6f57..2696ede52de2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -294,7 +294,8 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) *rta = (struct rtattr*)RTA_DATA(attr); } } @@ -310,7 +311,7 @@ int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_get_table(r->rtm_table); + tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ESRCH; @@ -325,7 +326,7 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_new_table(r->rtm_table); + tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ENOBUFS; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 58fb91b00fdf..0330b9cc4b58 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -184,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7a112b5a185..ab753df20a39 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -953,6 +953,7 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b873cbcdd0b8..12128b82c9dc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2652,6 +2652,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 22a2fdb09831..2c4fbc855e6c 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -129,6 +129,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e08d84063c1f..843c5509fced 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1859,7 +1859,8 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1869,7 +1870,8 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } struct rt6_rtnl_dump_arg @@ -1887,6 +1889,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rta_cacheinfo ci; + u32 table; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -1902,9 +1905,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; if (rt->rt6i_table) - rtm->rtm_table = rt->rt6i_table->tb6_id; + table = rt->rt6i_table->tb6_id; else - rtm->rtm_table = RT6_TABLE_UNSPEC; + table = RT6_TABLE_UNSPEC; + rtm->rtm_table = table; + RTA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) From 1af5a8c4a11cfed0c9a7f30fcfb689981750599c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:10:46 -0700 Subject: [PATCH 0445/1063] [IPV4]: Increase number of possible routing tables to 2^32 Increase the number of possible routing tables to 2^32 by replacing the fixed sized array of pointers by a hash table and replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip_fib.h | 25 ++-------- net/ipv4/fib_frontend.c | 100 +++++++++++++++++++++++++++------------- net/ipv4/fib_hash.c | 26 +++++------ net/ipv4/fib_rules.c | 4 +- net/ipv4/fib_trie.c | 26 +++++------ 5 files changed, 100 insertions(+), 81 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 0dcbf166eb94..8e9ba563d342 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -150,6 +150,7 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { + struct hlist_node tb_hlist; u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); @@ -200,29 +201,13 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL]) -#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) +#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) +#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) -extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(u32 id); - -static inline struct fib_table *fib_get_table(u32 id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id]; -} - -static inline struct fib_table *fib_new_table(u32 id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id] ? : __fib_new_table(id); -} +extern struct fib_table *fib_new_table(u32 id); +extern struct fib_table *fib_get_table(u32 id); extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2696ede52de2..ad4c14f968a1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -51,48 +52,67 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -#define RT_TABLE_MIN RT_TABLE_MAIN - struct fib_table *ip_fib_local_table; struct fib_table *ip_fib_main_table; +#define FIB_TABLE_HASHSZ 1 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + #else -#define RT_TABLE_MIN 1 +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *fib_tables[RT_TABLE_MAX+1]; - -struct fib_table *__fib_new_table(u32 id) +struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + tb = fib_get_table(id); + if (tb) + return tb; tb = fib_hash_init(id); if (!tb) return NULL; - fib_tables[id] = tb; + h = id & (FIB_TABLE_HASHSZ - 1); + hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); return tb; } +struct fib_table *fib_get_table(u32 id) +{ + struct fib_table *tb; + struct hlist_node *node; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + if (tb->tb_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + return NULL; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ - static void fib_flush(void) { int flushed = 0; -#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - u32 id; + struct hlist_node *node; + unsigned int h; - for (id = RT_TABLE_MAX; id>0; id--) { - if ((tb = fib_get_table(id))==NULL) - continue; - flushed += tb->tb_flush(tb); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + flushed += tb->tb_flush(tb); } -#else /* CONFIG_IP_MULTIPLE_TABLES */ - flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); - flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); -#endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) rt_cache_flush(-1); @@ -334,29 +354,37 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - u32 t; - u32 s_t; + unsigned int h, s_h; + unsigned int e = 0, s_e; struct fib_table *tb; + struct hlist_node *node; + int dumped = 0; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return ip_rt_dump(skb, cb); - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_TABLE_MIN; + s_h = cb->args[0]; + s_e = cb->args[1]; - for (t=s_t; t<=RT_TABLE_MAX; t++) { - if (t < s_t) continue; - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - if ((tb = fib_get_table(t))==NULL) - continue; - if (tb->tb_dump(tb, skb, cb) < 0) - break; + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->tb_dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } } - - cb->args[0] = t; +out: + cb->args[1] = e; + cb->args[0] = h; return skb->len; } @@ -654,9 +682,15 @@ static struct notifier_block fib_netdev_notifier = { void __init ip_fib_init(void) { + unsigned int i; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&fib_table_hash[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); #else fib4_rules_init(); #endif diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index f8d5c8024ccb..b5bee1a71e5c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -684,7 +684,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, struct fib_node *f; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; hlist_for_each_entry(f, node, head, fn_hash) { struct fib_alias *fa; @@ -704,14 +704,14 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } next: i++; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -722,21 +722,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h=0; h < fz->fz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); if (fz->fz_hash == NULL || hlist_empty(&fz->fz_hash[h])) continue; if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -746,21 +746,21 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin struct fn_zone *fz; struct fn_hash *table = (struct fn_hash*)tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&fib_hash_lock); for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&fib_hash_lock); return -1; } } read_unlock(&fib_hash_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0330b9cc4b58..ce185ac6f260 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -172,8 +172,8 @@ static struct fib_table *fib_empty_table(void) u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_tables[id] == NULL) - return __fib_new_table(id); + if (fib_get_table(id) == NULL) + return fib_new_table(id); return NULL; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4a27b2d573a3..2a580eb2579b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1848,7 +1848,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi u32 xkey = htonl(key); - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1870,12 +1870,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi plen, fa->fa_tos, fa->fa_info, 0) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1886,14 +1886,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str struct list_head *fa_head; struct leaf *l = NULL; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); @@ -1904,11 +1904,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -1917,23 +1917,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin int m, s_m; struct trie *t = (struct trie *) tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { - cb->args[1] = m; + cb->args[2] = m; goto out; } } rcu_read_unlock(); - cb->args[1] = m; + cb->args[2] = m; return skb->len; out: rcu_read_unlock(); From 1b43af5480c351dbcb2eef478bafe179cbeb6e83 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:11:17 -0700 Subject: [PATCH 0446/1063] [IPV6]: Increase number of possible routing tables to 2^32 Increase number of possible routing tables to 2^32 by replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip6_route.h | 7 ++ net/ipv6/ip6_fib.c | 171 +++++++++++++++++++++++++++++++++++----- net/ipv6/route.c | 128 +----------------------------- 3 files changed, 159 insertions(+), 147 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9bfa3cc6cedb..01bfe404784f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -137,6 +137,13 @@ extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +struct rt6_rtnl_dump_arg +{ + struct sk_buff *skb; + struct netlink_callback *cb; +}; + +extern int rt6_dump_route(struct rt6_info *rt, void *p_arg); extern void rt6_ifdown(struct net_device *dev); extern void rt6_mtu_change(struct net_device *dev, unsigned mtu); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1f2316187ca4..bececbe9dd2c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -158,7 +158,26 @@ static struct fib6_table fib6_main_tbl = { }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB_TABLE_HASHSZ 256 +#else +#define FIB_TABLE_HASHSZ 1 +#endif +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +static void fib6_link_table(struct fib6_table *tb) +{ + unsigned int h; + + h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + + /* + * No protection necessary, this is the only list mutatation + * operation, tables never disappear once they exist. + */ + hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); +} + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES static struct fib6_table fib6_local_tbl = { .tb6_id = RT6_TABLE_LOCAL, .tb6_lock = RW_LOCK_UNLOCKED, @@ -168,9 +187,6 @@ static struct fib6_table fib6_local_tbl = { }, }; -#define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; - static struct fib6_table *fib6_alloc_table(u32 id) { struct fib6_table *table; @@ -186,19 +202,6 @@ static struct fib6_table *fib6_alloc_table(u32 id) return table; } -static void fib6_link_table(struct fib6_table *tb) -{ - unsigned int h; - - h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); - - /* - * No protection necessary, this is the only list mutatation - * operation, tables never disappear once they exist. - */ - hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); -} - struct fib6_table *fib6_new_table(u32 id) { struct fib6_table *tb; @@ -263,10 +266,135 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, static void __init fib6_tables_init(void) { + fib6_link_table(&fib6_main_tbl); } #endif +static int fib6_dump_node(struct fib6_walker_t *w) +{ + int res; + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->u.next) { + res = rt6_dump_route(rt, w->args); + if (res < 0) { + /* Frame is full, suspend walking */ + w->leaf = rt; + return 1; + } + BUG_TRAP(res!=0); + } + w->leaf = NULL; + return 0; +} + +static void fib6_dump_end(struct netlink_callback *cb) +{ + struct fib6_walker_t *w = (void*)cb->args[2]; + + if (w) { + cb->args[2] = 0; + kfree(w); + } + cb->done = (void*)cb->args[3]; + cb->args[1] = 3; +} + +static int fib6_dump_done(struct netlink_callback *cb) +{ + fib6_dump_end(cb); + return cb->done ? cb->done(cb) : 0; +} + +static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct fib6_walker_t *w; + int res; + + w = (void *)cb->args[2]; + w->root = &table->tb6_root; + + if (cb->args[4] == 0) { + read_lock_bh(&table->tb6_lock); + res = fib6_walk(w); + read_unlock_bh(&table->tb6_lock); + if (res > 0) + cb->args[4] = 1; + } else { + read_lock_bh(&table->tb6_lock); + res = fib6_walk_continue(w); + read_unlock_bh(&table->tb6_lock); + if (res != 0) { + if (res < 0) + fib6_walker_unlink(w); + goto end; + } + fib6_walker_unlink(w); + cb->args[4] = 0; + } +end: + return res; +} + +int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct rt6_rtnl_dump_arg arg; + struct fib6_walker_t *w; + struct fib6_table *tb; + struct hlist_node *node; + int res = 0; + + s_h = cb->args[0]; + s_e = cb->args[1]; + + w = (void *)cb->args[2]; + if (w == NULL) { + /* New dump: + * + * 1. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + + /* + * 2. allocate and initialize walker. + */ + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (w == NULL) + return -ENOMEM; + w->func = fib6_dump_node; + cb->args[2] = (long)w; + } + + arg.skb = skb; + arg.cb = cb; + w->args = &arg; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { + if (e < s_e) + goto next; + res = fib6_dump_table(tb, skb, cb); + if (res != 0) + goto out; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + res = res < 0 ? res : skb->len; + if (res <= 0) + fib6_dump_end(cb); + return res; +} /* * Routing Table @@ -1187,17 +1315,20 @@ static void fib6_clean_tree(struct fib6_node *root, void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { - int i; struct fib6_table *table; + struct hlist_node *node; + unsigned int h; - for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) { - table = fib6_get_table(i); - if (table != NULL) { + rcu_read_lock(); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], + tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(&table->tb6_root, func, prune, arg); write_unlock_bh(&table->tb6_lock); } } + rcu_read_unlock(); } static int fib6_prune_clone(struct rt6_info *rt, void *arg) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 843c5509fced..9ce28277f47f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,12 +1874,6 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rtm_get_table(arg, r->rtm_table)); } -struct rt6_rtnl_dump_arg -{ - struct sk_buff *skb; - struct netlink_callback *cb; -}; - static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, @@ -1976,7 +1970,7 @@ rtattr_failure: return -1; } -static int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; int prefix; @@ -1992,126 +1986,6 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, NLM_F_MULTI); } -static int fib6_dump_node(struct fib6_walker_t *w) -{ - int res; - struct rt6_info *rt; - - for (rt = w->leaf; rt; rt = rt->u.next) { - res = rt6_dump_route(rt, w->args); - if (res < 0) { - /* Frame is full, suspend walking */ - w->leaf = rt; - return 1; - } - BUG_TRAP(res!=0); - } - w->leaf = NULL; - return 0; -} - -static void fib6_dump_end(struct netlink_callback *cb) -{ - struct fib6_walker_t *w = (void*)cb->args[0]; - - if (w) { - cb->args[0] = 0; - kfree(w); - } - cb->done = (void*)cb->args[1]; - cb->args[1] = 0; -} - -static int fib6_dump_done(struct netlink_callback *cb) -{ - fib6_dump_end(cb); - return cb->done ? cb->done(cb) : 0; -} - -int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct fib6_table *table; - struct rt6_rtnl_dump_arg arg; - struct fib6_walker_t *w; - int i, res = 0; - - arg.skb = skb; - arg.cb = cb; - - /* - * cb->args[0] = pointer to walker structure - * cb->args[1] = saved cb->done() pointer - * cb->args[2] = current table being dumped - */ - - w = (void*)cb->args[0]; - if (w == NULL) { - /* New dump: - * - * 1. hook callback destructor. - */ - cb->args[1] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. - */ - w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) - return -ENOMEM; - w->func = fib6_dump_node; - w->args = &arg; - cb->args[0] = (long)w; - cb->args[2] = FIB6_TABLE_MIN; - } else { - w->args = &arg; - i = cb->args[2]; - if (i > FIB6_TABLE_MAX) - goto end; - - table = fib6_get_table(i); - if (table != NULL) { - read_lock_bh(&table->tb6_lock); - w->root = &table->tb6_root; - res = fib6_walk_continue(w); - read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; - } - } - - fib6_walker_unlink(w); - cb->args[2] = ++i; - } - - for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) { - table = fib6_get_table(i); - if (table == NULL) - continue; - - read_lock_bh(&table->tb6_lock); - w->root = &table->tb6_root; - res = fib6_walk(w); - read_unlock_bh(&table->tb6_lock); - if (res) - break; - } -end: - cb->args[2] = i; - - res = res < 0 ? res : skb->len; - /* res < 0 is an error. (really, impossible) - res == 0 means that dump is complete, but skb still can contain data. - res > 0 dump is not complete, but frame is full. - */ - /* Destroy walker, if dump of this table is complete. */ - if (res <= 0) - fib6_dump_end(cb); - return res; -} - int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { struct rtattr **rta = arg; From abcab268303c22d24fc89fedd35d82271d20f5da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:11:47 -0700 Subject: [PATCH 0447/1063] [DECNET]: Increase number of possible routing tables to 2^32 Increase the number of possible routing tables to 2^32 by replacing the fixed sized array of pointers by a hash table and replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/dn_fib.h | 3 +- net/decnet/dn_fib.c | 49 ----------------- net/decnet/dn_rules.c | 2 +- net/decnet/dn_table.c | 125 ++++++++++++++++++++++++++++++------------ 4 files changed, 93 insertions(+), 86 deletions(-) diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index cd9c3782f838..d97aa10c463f 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,6 +94,7 @@ struct dn_fib_node { struct dn_fib_table { + struct hlist_node hlist; u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, @@ -177,8 +178,6 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) fib_rule_put(res->r); } -extern struct dn_fib_table *dn_fib_tables[]; - #else /* Endnode */ #define dn_fib_init() do { } while(0) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fb596373daa8..5ccca3ed53bd 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -532,39 +532,6 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENOBUFS; } - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - u32 t; - u32 s_t; - struct dn_fib_table *tb; - - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_MIN_TABLE; - - for(t = s_t; t <= RT_TABLE_MAX; t++) { - if (t < s_t) - continue; - if (t > s_t) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - tb = dn_fib_get_table(t, 0); - if (tb == NULL) - continue; - if (tb->dump(tb, skb, cb) < 0) - break; - } - - cb->args[0] = t; - - return skb->len; -} - static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) { struct dn_fib_table *tb; @@ -762,22 +729,6 @@ int dn_fib_sync_up(struct net_device *dev) return ret; } -void dn_fib_flush(void) -{ - int flushed = 0; - struct dn_fib_table *tb; - u32 id; - - for(id = RT_TABLE_MAX; id > 0; id--) { - if ((tb = dn_fib_get_table(id, 0)) == NULL) - continue; - flushed += tb->flush(tb); - } - - if (flushed) - dn_rt_cache_flush(-1); -} - static struct notifier_block dn_fib_dnaddr_notifier = { .notifier_call = dn_fib_dnaddr_event, }; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 096f1273e714..878312ff34ec 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -210,7 +210,7 @@ unsigned dnet_addr_type(__le16 addr) struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; + struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); res.r = NULL; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index eca7c1e10c80..10e87262b6fb 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -75,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) #define RT_TABLE_MIN 1 - +#define DN_FIB_TABLE_HASHSZ 256 +static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; @@ -361,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, { int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; for(i = 0; f; i++, f = f->fn_next) { if (i < s_i) continue; @@ -374,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, &f->fn_key, dz->dz_order, f->fn_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -389,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for(h = 0; h < dz->dz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) continue; if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -413,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, struct dn_zone *dz; struct dn_hash *table = (struct dn_hash *)tb->data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&dn_fib_tables_lock); for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&dn_fib_tables_lock); return -1; } } read_unlock(&dn_fib_tables_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } +int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct dn_fib_table *tb; + struct hlist_node *node; + int dumped = 0; + + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + return dn_cache_dump(skb, cb); + + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { + e = 0; + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + return skb->len; +} + static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; @@ -744,6 +781,8 @@ out: struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; + struct hlist_node *node; + unsigned int h; if (n < RT_TABLE_MIN) return NULL; @@ -751,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) if (n > RT_TABLE_MAX) return NULL; - if (dn_fib_tables[n]) - return dn_fib_tables[n]; + h = n & (DN_FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + if (t->n == n) { + rcu_read_unlock(); + return t; + } + } + rcu_read_unlock(); if (!create) return NULL; @@ -773,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) t->flush = dn_fib_table_flush; t->dump = dn_fib_table_dump; memset(t->data, 0, sizeof(struct dn_hash)); - dn_fib_tables[n] = t; + hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); return t; } -static void dn_fib_del_tree(u32 n) -{ - struct dn_fib_table *t; - - write_lock(&dn_fib_tables_lock); - t = dn_fib_tables[n]; - dn_fib_tables[n] = NULL; - write_unlock(&dn_fib_tables_lock); - - kfree(t); -} - struct dn_fib_table *dn_fib_empty_table(void) { u32 id; for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_tables[id] == NULL) + if (dn_fib_get_table(id, 0) == NULL) return dn_fib_get_table(id, 1); return NULL; } +void dn_fib_flush(void) +{ + int flushed = 0; + struct dn_fib_table *tb; + struct hlist_node *node; + unsigned int h; + + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + flushed += tb->flush(tb); + } + + if (flushed) + dn_rt_cache_flush(-1); +} + void __init dn_fib_table_init(void) { dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", @@ -810,10 +860,17 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { - int i; + struct dn_fib_table *t; + struct hlist_node *node, *next; + unsigned int h; - for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) - dn_fib_del_tree(i); - - return; + write_lock(&dn_fib_tables_lock); + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist) { + hlist_del(&t->hlist); + kfree(t); + } + } + write_unlock(&dn_fib_tables_lock); } From b801f54917b7c6e8540f877ee562cd0725e62ebd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:12:34 -0700 Subject: [PATCH 0448/1063] [NET]: Increate RT_TABLE_MAX to 2^32 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ea422a539a03..7e4aa48680a7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -239,10 +239,8 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - __RT_TABLE_MAX + RT_TABLE_MAX=0xFFFFFFFF }; -#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) - /* Routing message attributes */ From 3bf72957d2a553c343e4285463ef0a88139bdec4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:31:08 -0700 Subject: [PATCH 0449/1063] [HTB]: Remove broken debug code. The HTB network scheduler had debug code that wouldn't compile and confused and obfuscated the code, remove it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 302 +++++--------------------------------------- 1 file changed, 34 insertions(+), 268 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 880a3394a51f..73094e7f4169 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -70,7 +70,6 @@ #define HTB_HSIZE 16 /* classid hash size */ #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ #define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) @@ -81,51 +80,6 @@ #error "Mismatched sch_htb.c and pkt_sch.h" #endif -/* debugging support; S is subsystem, these are defined: - 0 - netlink messages - 1 - enqueue - 2 - drop & requeue - 3 - dequeue main - 4 - dequeue one prio DRR part - 5 - dequeue class accounting - 6 - class overlimit status computation - 7 - hint tree - 8 - event queue - 10 - rate estimator - 11 - classifier - 12 - fast dequeue cache - - L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full - q->debug uint32 contains 16 2-bit fields one for subsystem starting - from LSB - */ -#ifdef HTB_DEBUG -#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L) -#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \ - printk(KERN_DEBUG FMT,##ARG) -#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC) -#define HTB_PASSQ q, -#define HTB_ARGQ struct htb_sched *q, -#define static -#undef __inline__ -#define __inline__ -#undef inline -#define inline -#define HTB_CMAGIC 0xFEFAFEF1 -#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \ - if ((N)->rb_color == -1) break; \ - rb_erase(N,R); \ - (N)->rb_color = -1; } while (0) -#else -#define HTB_DBG_COND(S,L) (0) -#define HTB_DBG(S,L,FMT,ARG...) -#define HTB_PASSQ -#define HTB_ARGQ -#define HTB_CHCL(cl) -#define htb_safe_rb_erase(N,R) rb_erase(N,R) -#endif - - /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -136,9 +90,6 @@ enum htb_cmode { /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { -#ifdef HTB_DEBUG - unsigned magic; -#endif /* general class parameters */ u32 classid; struct gnet_stats_basic bstats; @@ -238,7 +189,6 @@ struct htb_sched int nwc_hit; /* this to disable mindelay complaint in dequeue */ int defcls; /* class where unclassified flows go to */ - u32 debug; /* subsystem debug levels */ /* filters for qdisc itself */ struct tcf_proto *filter_list; @@ -354,75 +304,21 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in return cl; } -#ifdef HTB_DEBUG -static void htb_next_rb_node(struct rb_node **n); -#define HTB_DUMTREE(root,memb) if(root) { \ - struct rb_node *n = (root)->rb_node; \ - while (n->rb_left) n = n->rb_left; \ - while (n) { \ - struct htb_class *cl = rb_entry(n, struct htb_class, memb); \ - printk(" %x",cl->classid); htb_next_rb_node (&n); \ - } } - -static void htb_debug_dump (struct htb_sched *q) -{ - int i,p; - printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies); - /* rows */ - for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) { - printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]); - for (p=0;prow[i][p].rb_node) continue; - printk(" p%d:",p); - HTB_DUMTREE(q->row[i]+p,node[p]); - } - printk("\n"); - } - /* classes */ - for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *l; - list_for_each (l,q->hash+i) { - struct htb_class *cl = list_entry(l,struct htb_class,hlist); - long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); - printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d " - "pa=%x f:", - cl->classid,cl->cmode,cl->tokens,cl->ctokens, - cl->pq_node.rb_color==-1?0:cl->pq_key,diff, - cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity); - if (cl->level) - for (p=0;pun.inner.feed[p].rb_node) continue; - printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0); - HTB_DUMTREE(cl->un.inner.feed+p,node[p]); - } - printk("\n"); - } - } -} -#endif /** * htb_add_to_id_tree - adds class to the round robin list * * Routine adds class to the list (actually tree) sorted by classid. * Make sure that class is not already on such list for given prio. */ -static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, +static void htb_add_to_id_tree (struct rb_root *root, struct htb_class *cl,int prio) { struct rb_node **p = &root->rb_node, *parent = NULL; - HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio); -#ifdef HTB_DEBUG - if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; } - HTB_CHCL(cl); - if (*p) { - struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]); - HTB_CHCL(x); - } -#endif + while (*p) { struct htb_class *c; parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - HTB_CHCL(c); + if (cl->classid > c->classid) p = &parent->rb_right; else @@ -440,16 +336,10 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, * already in the queue. */ static void htb_add_to_wait_tree (struct htb_sched *q, - struct htb_class *cl,long delay,int debug_hint) + struct htb_class *cl,long delay) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; - HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key); -#ifdef HTB_DEBUG - if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; } - HTB_CHCL(cl); - if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit()) - printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint); -#endif + cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); if (cl->pq_key == q->jiffies) cl->pq_key++; @@ -490,14 +380,11 @@ static void htb_next_rb_node(struct rb_node **n) static inline void htb_add_class_to_row(struct htb_sched *q, struct htb_class *cl,int mask) { - HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n", - cl->classid,mask,q->row_mask[cl->level]); - HTB_CHCL(cl); q->row_mask[cl->level] |= mask; while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio); + htb_add_to_id_tree(q->row[cl->level]+prio,cl,prio); } } @@ -511,18 +398,16 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q, struct htb_class *cl,int mask) { int m = 0; - HTB_CHCL(cl); + while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); if (q->ptr[cl->level][prio] == cl->node+prio) htb_next_rb_node(q->ptr[cl->level]+prio); - htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio); + rb_erase(cl->node + prio,q->row[cl->level]+prio); if (!q->row[cl->level][prio].rb_node) m |= 1 << prio; } - HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n", - cl->classid,mask,q->row_mask[cl->level],m); q->row_mask[cl->level] &= ~m; } @@ -537,11 +422,9 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) { struct htb_class *p = cl->parent; long m,mask = cl->prio_activity; - HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); - HTB_CHCL(cl); while (cl->cmode == HTB_MAY_BORROW && p && mask) { - HTB_CHCL(p); + m = mask; while (m) { int prio = ffz(~m); m &= ~(1 << prio); @@ -551,13 +434,11 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio); + htb_add_to_id_tree(p->un.inner.feed+prio,cl,prio); } - HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", - p->classid,p->prio_activity,mask,p->cmode); p->prio_activity |= mask; cl = p; p = cl->parent; - HTB_CHCL(cl); + } if (cl->cmode == HTB_CAN_SEND && mask) htb_add_class_to_row(q,cl,mask); @@ -574,8 +455,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; long m,mask = cl->prio_activity; - HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); - HTB_CHCL(cl); + while (cl->cmode == HTB_MAY_BORROW && p && mask) { m = mask; mask = 0; @@ -591,16 +471,15 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) p->un.inner.ptr[prio] = NULL; } - htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio); + rb_erase(cl->node + prio,p->un.inner.feed + prio); if (!p->un.inner.feed[prio].rb_node) mask |= 1 << prio; } - HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", - p->classid,p->prio_activity,mask,p->cmode); + p->prio_activity &= ~mask; cl = p; p = cl->parent; - HTB_CHCL(cl); + } if (cl->cmode == HTB_CAN_SEND && mask) htb_remove_class_from_row(q,cl,mask); @@ -655,8 +534,6 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) { enum htb_cmode new_mode = htb_class_mode(cl,diff); - HTB_CHCL(cl); - HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid); if (new_mode == cl->cmode) return; @@ -681,7 +558,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) { BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); - HTB_CHCL(cl); + if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); htb_activate_prios(q,cl); @@ -699,7 +576,7 @@ static __inline__ void htb_deactivate(struct htb_sched *q,struct htb_class *cl) { BUG_TRAP(cl->prio_activity); - HTB_CHCL(cl); + htb_deactivate_prios(q,cl); cl->prio_activity = 0; list_del_init(&cl->un.leaf.drop_list); @@ -739,7 +616,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; sch->bstats.packets++; sch->bstats.bytes += skb->len; - HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); return NET_XMIT_SUCCESS; } @@ -771,7 +647,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; sch->qstats.requeues++; - HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); return NET_XMIT_SUCCESS; } @@ -793,7 +668,6 @@ static void htb_rate_timer(unsigned long arg) /* lock queue so that we can muck with it */ HTB_QLOCK(sch); - HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies); q->rttim.expires = jiffies + HZ; add_timer(&q->rttim); @@ -803,8 +677,7 @@ static void htb_rate_timer(unsigned long arg) q->recmp_bucket = 0; list_for_each (p,q->hash+q->recmp_bucket) { struct htb_class *cl = list_entry(p,struct htb_class,hlist); - HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n", - cl->classid,cl->sum_bytes,cl->sum_packets); + RT_GEN (cl->sum_bytes,cl->rate_bytes); RT_GEN (cl->sum_packets,cl->rate_packets); } @@ -828,7 +701,6 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, { long toks,diff; enum htb_cmode old_mode; - HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes); #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ if (toks > cl->B) toks = cl->B; \ @@ -837,24 +709,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, cl->T = toks while (cl) { - HTB_CHCL(cl); diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); -#ifdef HTB_DEBUG - if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { - if (net_ratelimit()) - printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", - cl->classid, diff, -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - q->now.tv_sec * 1000000ULL + q->now.tv_usec, - cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, -#else - (unsigned long long) q->now, - (unsigned long long) cl->t_c, -#endif - q->jiffies); - diff = 1000; - } -#endif if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; HTB_ACCNT (tokens,buffer,rate); @@ -864,15 +719,14 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, } HTB_ACCNT (ctokens,cbuffer,ceil); cl->t_c = q->now; - HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens); old_mode = cl->cmode; diff = 0; htb_change_class_mode(q,cl,&diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); + rb_erase(&cl->pq_node,q->wait_pq+cl->level); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff,1); + htb_add_to_wait_tree (q,cl,diff); } #ifdef HTB_RATECM @@ -899,8 +753,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, static long htb_do_events(struct htb_sched *q,int level) { int i; - HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n", - level,q->wait_pq[level].rb_node,q->row_mask[level]); + for (i = 0; i < 500; i++) { struct htb_class *cl; long diff; @@ -910,30 +763,13 @@ static long htb_do_events(struct htb_sched *q,int level) cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { - HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies); return cl->pq_key - q->jiffies; } - htb_safe_rb_erase(p,q->wait_pq+level); + rb_erase(p,q->wait_pq+level); diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); -#ifdef HTB_DEBUG - if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { - if (net_ratelimit()) - printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", - cl->classid, diff, -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - q->now.tv_sec * 1000000ULL + q->now.tv_usec, - cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, -#else - (unsigned long long) q->now, - (unsigned long long) cl->t_c, -#endif - q->jiffies); - diff = 1000; - } -#endif htb_change_class_mode(q,cl,&diff); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff,2); + htb_add_to_wait_tree (q,cl,diff); } if (net_ratelimit()) printk(KERN_WARNING "htb: too many events !\n"); @@ -966,7 +802,7 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) * Find leaf where current feed pointers points to. */ static struct htb_class * -htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) +htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) { int i; struct { @@ -981,8 +817,6 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 sp->pid = pid; for (i = 0; i < 65535; i++) { - HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid); - if (!*sp->pptr && *sp->pid) { /* ptr was invalidated but id is valid - try to recover the original or next ptr */ @@ -1002,7 +836,6 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 } else { struct htb_class *cl; cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); - HTB_CHCL(cl); if (!cl->level) return cl; (++sp)->root = cl->un.inner.feed[prio].rb_node; @@ -1022,15 +855,13 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level) struct sk_buff *skb = NULL; struct htb_class *cl,*start; /* look initial class up in the row */ - start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio, + start = cl = htb_lookup_leaf (q->row[level]+prio,prio, q->ptr[level]+prio,q->last_ptr_id[level]+prio); do { next: BUG_TRAP(cl); if (!cl) return NULL; - HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n", - prio,level,cl->classid,cl->un.leaf.deficit[level]); /* class can be empty - it is unlikely but can be true if leaf qdisc drops packets in enqueue routine or if someone used @@ -1044,7 +875,7 @@ next: if ((q->row_mask[level] & (1 << prio)) == 0) return NULL; - next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio, + next = htb_lookup_leaf (q->row[level]+prio, prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); if (cl == start) /* fix start if we just deleted it */ @@ -1061,15 +892,13 @@ next: } q->nwc_hit++; htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); - cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio, + cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio, q->last_ptr_id[level]+prio); } while (cl != start); if (likely(skb != NULL)) { if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { - HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n", - level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum); cl->un.leaf.deficit[level] += cl->un.leaf.quantum; htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); } @@ -1095,7 +924,6 @@ static void htb_delay_by(struct Qdisc *sch,long delay) mod_timer(&q->timer, q->jiffies + delay); sch->flags |= TCQ_F_THROTTLED; sch->qstats.overlimits++; - HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay); } static struct sk_buff *htb_dequeue(struct Qdisc *sch) @@ -1104,13 +932,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; long min_delay; -#ifdef HTB_DEBUG - int evs_used = 0; -#endif q->jiffies = jiffies; - HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue), - sch->q.qlen); /* try to dequeue direct packets as high prio (!) to minimize cpu work */ if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { @@ -1131,9 +954,6 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { delay = htb_do_events(q,level); q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); -#ifdef HTB_DEBUG - evs_used++; -#endif } else delay = q->near_ev_cache[level] - q->jiffies; @@ -1151,20 +971,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } } -#ifdef HTB_DEBUG - if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) { - if (min_delay == LONG_MAX) { - printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n", - evs_used,q->jiffies,jiffies); - htb_debug_dump(q); - } else - printk(KERN_WARNING "HTB: mindelay=%ld, some class has " - "too small rate\n",min_delay); - } -#endif htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); fin: - HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb); return skb; } @@ -1198,7 +1006,6 @@ static void htb_reset(struct Qdisc* sch) { struct htb_sched *q = qdisc_priv(sch); int i; - HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle); for (i = 0; i < HTB_HSIZE; i++) { struct list_head *p; @@ -1213,10 +1020,6 @@ static void htb_reset(struct Qdisc* sch) } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; -#ifdef HTB_DEBUG - cl->pq_node.rb_color = -1; - memset(cl->node,255,sizeof(cl->node)); -#endif } } @@ -1238,10 +1041,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) struct rtattr *tb[TCA_HTB_INIT]; struct tc_htb_glob *gopt; int i; -#ifdef HTB_DEBUG - printk(KERN_INFO "HTB init, kernel part version %d.%d\n", - HTB_VER >> 16,HTB_VER & 0xffff); -#endif if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || tb[TCA_HTB_INIT-1] == NULL || RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { @@ -1254,8 +1053,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); return -EINVAL; } - q->debug = gopt->debug; - HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum); INIT_LIST_HEAD(&q->root); for (i = 0; i < HTB_HSIZE; i++) @@ -1292,18 +1089,13 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_glob gopt; - HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle); HTB_QLOCK(sch); gopt.direct_pkts = q->direct_pkts; -#ifdef HTB_DEBUG - if (HTB_DBG_COND(0,2)) - htb_debug_dump(q); -#endif gopt.version = HTB_VER; gopt.rate2quantum = q->rate2quantum; gopt.defcls = q->defcls; - gopt.debug = q->debug; + gopt.debug = 0; rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); @@ -1319,16 +1111,11 @@ rtattr_failure: static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif struct htb_class *cl = (struct htb_class*)arg; unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_opt opt; - HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid); - HTB_QLOCK(sch); tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; tcm->tcm_handle = cl->classid; @@ -1410,11 +1197,7 @@ static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) static unsigned long htb_get(struct Qdisc *sch, u32 classid) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif struct htb_class *cl = htb_find(classid,sch); - HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0); if (cl) cl->refcnt++; return (unsigned long)cl; @@ -1433,7 +1216,6 @@ static void htb_destroy_filters(struct tcf_proto **fl) static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); - HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); if (!cl->level) { BUG_TRAP(cl->un.leaf.q); sch->q.qlen -= cl->un.leaf.q->q.qlen; @@ -1456,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) htb_deactivate (q,cl); if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); + rb_erase(&cl->pq_node,q->wait_pq+cl->level); kfree(cl); } @@ -1465,7 +1247,6 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) static void htb_destroy(struct Qdisc* sch) { struct htb_sched *q = qdisc_priv(sch); - HTB_DBG(0,1,"htb_destroy q=%p\n",q); del_timer_sync (&q->timer); #ifdef HTB_RATECM @@ -1488,7 +1269,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class*)arg; - HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class @@ -1512,11 +1292,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) static void htb_put(struct Qdisc *sch, unsigned long arg) { -#ifdef HTB_DEBUG - struct htb_sched *q = qdisc_priv(sch); -#endif struct htb_class *cl = (struct htb_class*)arg; - HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); if (--cl->refcnt == 0) htb_destroy_class(sch,cl); @@ -1542,7 +1318,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); - HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); if (!rtab || !ctab) goto failure; @@ -1567,9 +1343,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, INIT_LIST_HEAD(&cl->hlist); INIT_LIST_HEAD(&cl->children); INIT_LIST_HEAD(&cl->un.leaf.drop_list); -#ifdef HTB_DEBUG - cl->magic = HTB_CMAGIC; -#endif /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock @@ -1585,7 +1358,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/); + rb_erase(&parent->pq_node,q->wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level @@ -1607,13 +1380,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* attach to the hash list and parent's family */ list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); -#ifdef HTB_DEBUG - { - int i; - for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1; - cl->pq_node.rb_color = -1; - } -#endif } else sch_tree_lock(sch); /* it used to be a nasty bug here, we have to check that node @@ -1654,7 +1420,7 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; - HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl); + return fl; } @@ -1663,7 +1429,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_find (classid,sch); - HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt); + /*if (cl && !cl->level) return 0; The line above used to be there to prevent attaching filters to leaves. But at least tc_index filter uses this just to get class @@ -1684,7 +1450,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt); + if (cl) cl->filter_cnt--; else From 9ac961ee05bfc837e5271be34ad7158e90dce7d9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:33:16 -0700 Subject: [PATCH 0450/1063] [HTB]: Remove lock macro. Get rid of the macro's being used to obscure the locking. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 73094e7f4169..c0b80b75cdff 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -72,8 +72,6 @@ #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ #define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ -#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) -#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock) #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER @@ -667,7 +665,7 @@ static void htb_rate_timer(unsigned long arg) struct list_head *p; /* lock queue so that we can muck with it */ - HTB_QLOCK(sch); + spin_lock_bh(&sch->dev->queue_lock); q->rttim.expires = jiffies + HZ; add_timer(&q->rttim); @@ -681,7 +679,7 @@ static void htb_rate_timer(unsigned long arg) RT_GEN (cl->sum_bytes,cl->rate_bytes); RT_GEN (cl->sum_packets,cl->rate_packets); } - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); } #endif @@ -1089,7 +1087,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_glob gopt; - HTB_QLOCK(sch); + spin_lock_bh(&sch->dev->queue_lock); gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1100,10 +1098,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); rta->rta_len = skb->tail - b; - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); skb_trim(skb, skb->tail - skb->data); return -1; } @@ -1116,7 +1114,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct rtattr *rta; struct tc_htb_opt opt; - HTB_QLOCK(sch); + spin_lock_bh(&sch->dev->queue_lock); tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; tcm->tcm_handle = cl->classid; if (!cl->level && cl->un.leaf.q) @@ -1133,10 +1131,10 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); rta->rta_len = skb->tail - b; - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: - HTB_QUNLOCK(sch); + spin_unlock_bh(&sch->dev->queue_lock); skb_trim(skb, b - skb->data); return -1; } From 18a63e868b04cf949643cc9d2c8a51d8cb5da9c4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:34:02 -0700 Subject: [PATCH 0451/1063] [HTB]: HTB_HYSTERESIS cleanup Change the conditional compilation around HTB_HYSTERSIS since code was splitting mid expression. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c0b80b75cdff..d8c1a6b0def1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -483,6 +483,20 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q,cl,mask); } +#if HTB_HYSTERESIS +static inline long htb_lowater(const struct htb_class *cl) +{ + return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; +} +static inline long htb_hiwater(const struct htb_class *cl) +{ + return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; +} +#else +#define htb_lowater(cl) (0) +#define htb_hiwater(cl) (0) +#endif + /** * htb_class_mode - computes and returns current class mode * @@ -499,19 +513,12 @@ htb_class_mode(struct htb_class *cl,long *diff) { long toks; - if ((toks = (cl->ctokens + *diff)) < ( -#if HTB_HYSTERESIS - cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : -#endif - 0)) { + if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { *diff = -toks; return HTB_CANT_SEND; } - if ((toks = (cl->tokens + *diff)) >= ( -#if HTB_HYSTERESIS - cl->cmode == HTB_CAN_SEND ? -cl->buffer : -#endif - 0)) + + if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) return HTB_CAN_SEND; *diff = -toks; From 87990467d387f922103db31678034785d8f21cb7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:35:16 -0700 Subject: [PATCH 0452/1063] [HTB]: Lindent Code was a mess in terms of indentation. Run through Lindent script, and cleanup the damage. Also, don't use, vim magic comment, and substitute inline for __inline__. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 985 +++++++++++++++++++++++--------------------- 1 file changed, 518 insertions(+), 467 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d8c1a6b0def1..6c6cac65255f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1,4 +1,4 @@ -/* vim: ts=8 sw=8 +/* * net/sched/sch_htb.c Hierarchical token bucket, feed tree version * * This program is free software; you can redistribute it and/or @@ -68,11 +68,11 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ -#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ -#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ +#define HTB_HSIZE 16 /* classid hash size */ +#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ +#define HTB_RATECM 1 /* whether to use rate computer */ +#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ +#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" @@ -80,154 +80,152 @@ /* used internaly to keep status of single class */ enum htb_cmode { - HTB_CANT_SEND, /* class can't send and can't borrow */ - HTB_MAY_BORROW, /* class can't send but may borrow */ - HTB_CAN_SEND /* class can send */ + HTB_CANT_SEND, /* class can't send and can't borrow */ + HTB_MAY_BORROW, /* class can't send but may borrow */ + HTB_CAN_SEND /* class can send */ }; /* interior & leaf nodes; props specific to leaves are marked L: */ -struct htb_class -{ - /* general class parameters */ - u32 classid; - struct gnet_stats_basic bstats; - struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; - struct tc_htb_xstats xstats;/* our special stats */ - int refcnt; /* usage count of this class */ +struct htb_class { + /* general class parameters */ + u32 classid; + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct tc_htb_xstats xstats; /* our special stats */ + int refcnt; /* usage count of this class */ #ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes,sum_bytes; - unsigned long rate_packets,sum_packets; + /* rate measurement counters */ + unsigned long rate_bytes, sum_bytes; + unsigned long rate_packets, sum_packets; #endif - /* topology */ - int level; /* our level (see above) */ - struct htb_class *parent; /* parent class */ - struct list_head hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ + /* topology */ + int level; /* our level (see above) */ + struct htb_class *parent; /* parent class */ + struct list_head hlist; /* classid hash list item */ + struct list_head sibling; /* sibling list item */ + struct list_head children; /* children list */ - union { - struct htb_class_leaf { - struct Qdisc *q; - int prio; - int aprio; - int quantum; - int deficit[TC_HTB_MAXDEPTH]; - struct list_head drop_list; - } leaf; - struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - parent's feed then we lost ptr value and start from the - first child again. Here we store classid of the - last valid ptr (used when ptr is NULL). */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; - } inner; - } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - unsigned long pq_key; /* the same type as jiffies global */ - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ + union { + struct htb_class_leaf { + struct Qdisc *q; + int prio; + int aprio; + int quantum; + int deficit[TC_HTB_MAXDEPTH]; + struct list_head drop_list; + } leaf; + struct htb_class_inner { + struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ + struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ + /* When class changes from state 1->2 and disconnects from + parent's feed then we lost ptr value and start from the + first child again. Here we store classid of the + last valid ptr (used when ptr is NULL). */ + u32 last_ptr_id[TC_HTB_NUMPRIO]; + } inner; + } un; + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + struct rb_node pq_node; /* node for event queue */ + unsigned long pq_key; /* the same type as jiffies global */ - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ - int warned; /* only one warning about non work conserving .. */ + /* class attached filters */ + struct tcf_proto *filter_list; + int filter_cnt; - /* token bucket parameters */ - struct qdisc_rate_table *rate; /* rate table of the class itself */ - struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ - long buffer,cbuffer; /* token bucket depth/rate */ - psched_tdiff_t mbuffer; /* max wait time */ - long tokens,ctokens; /* current number of tokens */ - psched_time_t t_c; /* checkpoint time */ + int warned; /* only one warning about non work conserving .. */ + + /* token bucket parameters */ + struct qdisc_rate_table *rate; /* rate table of the class itself */ + struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ + long buffer, cbuffer; /* token bucket depth/rate */ + psched_tdiff_t mbuffer; /* max wait time */ + long tokens, ctokens; /* current number of tokens */ + psched_time_t t_c; /* checkpoint time */ }; /* TODO: maybe compute rate when size is too large .. or drop ? */ -static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate, - int size) -{ - int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } - return rate->data[slot]; +static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, + int size) +{ + int slot = size >> rate->rate.cell_log; + if (slot > 255) { + cl->xstats.giants++; + slot = 255; + } + return rate->data[slot]; } -struct htb_sched -{ - struct list_head root; /* root classes list */ - struct list_head hash[HTB_HSIZE]; /* hashed by classid */ - struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; +struct htb_sched { + struct list_head root; /* root classes list */ + struct list_head hash[HTB_HSIZE]; /* hashed by classid */ + struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + /* self list - roots of self generating tree */ + struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int row_mask[TC_HTB_MAXDEPTH]; + struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - /* time of nearest event per level (row) */ - unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; + /* self wait list - roots of wait PQs per row */ + struct rb_root wait_pq[TC_HTB_MAXDEPTH]; - /* cached value of jiffies in dequeue */ - unsigned long jiffies; + /* time of nearest event per level (row) */ + unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; - /* whether we hit non-work conserving class during this dequeue; we use */ - int nwc_hit; /* this to disable mindelay complaint in dequeue */ + /* cached value of jiffies in dequeue */ + unsigned long jiffies; - int defcls; /* class where unclassified flows go to */ + /* whether we hit non-work conserving class during this dequeue; we use */ + int nwc_hit; /* this to disable mindelay complaint in dequeue */ - /* filters for qdisc itself */ - struct tcf_proto *filter_list; - int filter_cnt; + int defcls; /* class where unclassified flows go to */ - int rate2quantum; /* quant = rate / rate2quantum */ - psched_time_t now; /* cached dequeue time */ - struct timer_list timer; /* send delay timer */ + /* filters for qdisc itself */ + struct tcf_proto *filter_list; + int filter_cnt; + + int rate2quantum; /* quant = rate / rate2quantum */ + psched_time_t now; /* cached dequeue time */ + struct timer_list timer; /* send delay timer */ #ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ + struct timer_list rttim; /* rate computer timer */ + int recmp_bucket; /* which hash bucket to recompute next */ #endif - - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ - long direct_pkts; + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + int direct_qlen; /* max qlen of above */ + + long direct_pkts; }; /* compute hash of size HTB_HSIZE for given handle */ -static __inline__ int htb_hash(u32 h) +static inline int htb_hash(u32 h) { #if HTB_HSIZE != 16 - #error "Declare new hash for your HTB_HSIZE" +#error "Declare new hash for your HTB_HSIZE" #endif - h ^= h>>8; /* stolen from cbq_hash */ - h ^= h>>4; - return h & 0xf; + h ^= h >> 8; /* stolen from cbq_hash */ + h ^= h >> 4; + return h & 0xf; } /* find class in global hash table using given handle */ -static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch) +static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); struct list_head *p; - if (TC_H_MAJ(handle) != sch->handle) + if (TC_H_MAJ(handle) != sch->handle) return NULL; - - list_for_each (p,q->hash+htb_hash(handle)) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + + list_for_each(p, q->hash + htb_hash(handle)) { + struct htb_class *cl = list_entry(p, struct htb_class, hlist); if (cl->classid == handle) return cl; } @@ -252,7 +250,8 @@ static inline u32 htb_classid(struct htb_class *cl) return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; } -static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; @@ -264,8 +263,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in note that nfmark can be used too by attaching filter fw with no rules in it */ if (skb->priority == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) selected */ - if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) + return HTB_DIRECT; /* X:0 (direct flow) selected */ + if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) return cl; *qerr = NET_XMIT_BYPASS; @@ -274,7 +273,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: - case TC_ACT_STOLEN: + case TC_ACT_STOLEN: *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; @@ -283,22 +282,22 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in if (result == TC_POLICE_SHOT) return HTB_DIRECT; #endif - if ((cl = (void*)res.class) == NULL) { + if ((cl = (void *)res.class) == NULL) { if (res.classid == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) */ - if ((cl = htb_find(res.classid,sch)) == NULL) - break; /* filter selected invalid classid */ + return HTB_DIRECT; /* X:0 (direct flow) */ + if ((cl = htb_find(res.classid, sch)) == NULL) + break; /* filter selected invalid classid */ } if (!cl->level) - return cl; /* we hit leaf; return it */ + return cl; /* we hit leaf; return it */ /* we have got inner class; apply inner filter chain */ tcf = cl->filter_list; } /* classification failed; try to use default class */ - cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch); + cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); if (!cl || cl->level) - return HTB_DIRECT; /* bad default .. this is safe bet */ + return HTB_DIRECT; /* bad default .. this is safe bet */ return cl; } @@ -308,18 +307,19 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in * Routine adds class to the list (actually tree) sorted by classid. * Make sure that class is not already on such list for given prio. */ -static void htb_add_to_id_tree (struct rb_root *root, - struct htb_class *cl,int prio) +static void htb_add_to_id_tree(struct rb_root *root, + struct htb_class *cl, int prio) { struct rb_node **p = &root->rb_node, *parent = NULL; while (*p) { - struct htb_class *c; parent = *p; + struct htb_class *c; + parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); if (cl->classid > c->classid) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->node[prio], parent, p); @@ -333,8 +333,8 @@ static void htb_add_to_id_tree (struct rb_root *root, * change its mode in cl->pq_key microseconds. Make sure that class is not * already in the queue. */ -static void htb_add_to_wait_tree (struct htb_sched *q, - struct htb_class *cl,long delay) +static void htb_add_to_wait_tree(struct htb_sched *q, + struct htb_class *cl, long delay) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; @@ -345,13 +345,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q, /* update the nearest event cache */ if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) q->near_ev_cache[cl->level] = cl->pq_key; - + while (*p) { - struct htb_class *c; parent = *p; + struct htb_class *c; + parent = *p; c = rb_entry(parent, struct htb_class, pq_node); if (time_after_eq(cl->pq_key, c->pq_key)) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); @@ -375,14 +376,14 @@ static void htb_next_rb_node(struct rb_node **n) * The class is added to row at priorities marked in mask. * It does nothing if mask == 0. */ -static inline void htb_add_class_to_row(struct htb_sched *q, - struct htb_class *cl,int mask) +static inline void htb_add_class_to_row(struct htb_sched *q, + struct htb_class *cl, int mask) { q->row_mask[cl->level] |= mask; while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(q->row[cl->level]+prio,cl,prio); + htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); } } @@ -392,18 +393,18 @@ static inline void htb_add_class_to_row(struct htb_sched *q, * The class is removed from row at priorities marked in mask. * It does nothing if mask == 0. */ -static __inline__ void htb_remove_class_from_row(struct htb_sched *q, - struct htb_class *cl,int mask) +static inline void htb_remove_class_from_row(struct htb_sched *q, + struct htb_class *cl, int mask) { int m = 0; while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node+prio) - htb_next_rb_node(q->ptr[cl->level]+prio); - rb_erase(cl->node + prio,q->row[cl->level]+prio); - if (!q->row[cl->level][prio].rb_node) + if (q->ptr[cl->level][prio] == cl->node + prio) + htb_next_rb_node(q->ptr[cl->level] + prio); + rb_erase(cl->node + prio, q->row[cl->level] + prio); + if (!q->row[cl->level][prio].rb_node) m |= 1 << prio; } q->row_mask[cl->level] &= ~m; @@ -416,30 +417,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q, * for priorities it is participating on. cl->cmode must be new * (activated) mode. It does nothing if cl->prio_activity == 0. */ -static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) +static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; - long m,mask = cl->prio_activity; + long m, mask = cl->prio_activity; while (cl->cmode == HTB_MAY_BORROW && p && mask) { - - m = mask; while (m) { + m = mask; + while (m) { int prio = ffz(~m); m &= ~(1 << prio); - + if (p->un.inner.feed[prio].rb_node) /* parent already has its feed in use so that reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - - htb_add_to_id_tree(p->un.inner.feed+prio,cl,prio); + + htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); } p->prio_activity |= mask; - cl = p; p = cl->parent; + cl = p; + p = cl->parent; } if (cl->cmode == HTB_CAN_SEND && mask) - htb_add_class_to_row(q,cl,mask); + htb_add_class_to_row(q, cl, mask); } /** @@ -452,35 +454,36 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; - long m,mask = cl->prio_activity; - + long m, mask = cl->prio_activity; while (cl->cmode == HTB_MAY_BORROW && p && mask) { - m = mask; mask = 0; + m = mask; + mask = 0; while (m) { int prio = ffz(~m); m &= ~(1 << prio); - - if (p->un.inner.ptr[prio] == cl->node+prio) { + + if (p->un.inner.ptr[prio] == cl->node + prio) { /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ p->un.inner.last_ptr_id[prio] = cl->classid; p->un.inner.ptr[prio] = NULL; } - - rb_erase(cl->node + prio,p->un.inner.feed + prio); - - if (!p->un.inner.feed[prio].rb_node) + + rb_erase(cl->node + prio, p->un.inner.feed + prio); + + if (!p->un.inner.feed[prio].rb_node) mask |= 1 << prio; } p->prio_activity &= ~mask; - cl = p; p = cl->parent; + cl = p; + p = cl->parent; } - if (cl->cmode == HTB_CAN_SEND && mask) - htb_remove_class_from_row(q,cl,mask); + if (cl->cmode == HTB_CAN_SEND && mask) + htb_remove_class_from_row(q, cl, mask); } #if HTB_HYSTERESIS @@ -508,21 +511,21 @@ static inline long htb_hiwater(const struct htb_class *cl) * 0 .. -cl->{c,}buffer range. It is meant to limit number of * mode transitions per time unit. The speed gain is about 1/6. */ -static __inline__ enum htb_cmode -htb_class_mode(struct htb_class *cl,long *diff) +static inline enum htb_cmode +htb_class_mode(struct htb_class *cl, long *diff) { - long toks; + long toks; - if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { - *diff = -toks; - return HTB_CANT_SEND; - } + if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { + *diff = -toks; + return HTB_CANT_SEND; + } - if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) - return HTB_CAN_SEND; + if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) + return HTB_CAN_SEND; - *diff = -toks; - return HTB_MAY_BORROW; + *diff = -toks; + return HTB_MAY_BORROW; } /** @@ -534,22 +537,21 @@ htb_class_mode(struct htb_class *cl,long *diff) * be different from old one and cl->pq_key has to be valid if changing * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). */ -static void +static void htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) -{ - enum htb_cmode new_mode = htb_class_mode(cl,diff); - +{ + enum htb_cmode new_mode = htb_class_mode(cl, diff); if (new_mode == cl->cmode) - return; - - if (cl->prio_activity) { /* not necessary: speed optimization */ - if (cl->cmode != HTB_CANT_SEND) - htb_deactivate_prios(q,cl); + return; + + if (cl->prio_activity) { /* not necessary: speed optimization */ + if (cl->cmode != HTB_CANT_SEND) + htb_deactivate_prios(q, cl); cl->cmode = new_mode; - if (new_mode != HTB_CANT_SEND) - htb_activate_prios(q,cl); - } else + if (new_mode != HTB_CANT_SEND) + htb_activate_prios(q, cl); + } else cl->cmode = new_mode; } @@ -560,14 +562,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) * for the prio. It can be called on already active leaf safely. * It also adds leaf into droplist. */ -static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) +static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); - htb_activate_prios(q,cl); - list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio); + htb_activate_prios(q, cl); + list_add_tail(&cl->un.leaf.drop_list, + q->drops + cl->un.leaf.aprio); } } @@ -577,97 +580,100 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) * Make sure that leaf is active. In the other words it can't be called * with non-active leaf. It also removes class from the drop list. */ -static __inline__ void -htb_deactivate(struct htb_sched *q,struct htb_class *cl) +static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { BUG_TRAP(cl->prio_activity); - htb_deactivate_prios(q,cl); + htb_deactivate_prios(q, cl); cl->prio_activity = 0; list_del_init(&cl->un.leaf.drop_list); } static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb,sch,&ret); + int ret; + struct htb_sched *q = qdisc_priv(sch); + struct htb_class *cl = htb_classify(skb, sch, &ret); - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); - q->direct_pkts++; - } else { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_DROP; - } + if (cl == HTB_DIRECT) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen) { + __skb_queue_tail(&q->direct_queue, skb); + q->direct_pkts++; + } else { + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_DROP; + } #ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret == NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb (skb); - return ret; + } else if (!cl) { + if (ret == NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; #endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; - } else { - cl->bstats.packets++; cl->bstats.bytes += skb->len; - htb_activate (q,cl); - } + } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != + NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else { + cl->bstats.packets++; + cl->bstats.bytes += skb->len; + htb_activate(q, cl); + } - sch->q.qlen++; - sch->bstats.packets++; sch->bstats.bytes += skb->len; - return NET_XMIT_SUCCESS; + sch->q.qlen++; + sch->bstats.packets++; + sch->bstats.bytes += skb->len; + return NET_XMIT_SUCCESS; } /* TODO: requeuing packet charges it to policers again !! */ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) { - struct htb_sched *q = qdisc_priv(sch); - int ret = NET_XMIT_SUCCESS; - struct htb_class *cl = htb_classify(skb,sch, &ret); - struct sk_buff *tskb; + struct htb_sched *q = qdisc_priv(sch); + int ret = NET_XMIT_SUCCESS; + struct htb_class *cl = htb_classify(skb, sch, &ret); + struct sk_buff *tskb; - if (cl == HTB_DIRECT || !cl) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen && cl) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb (tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; - } else - htb_activate (q,cl); + if (cl == HTB_DIRECT || !cl) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen && cl) { + __skb_queue_head(&q->direct_queue, skb); + } else { + __skb_queue_head(&q->direct_queue, skb); + tskb = __skb_dequeue_tail(&q->direct_queue); + kfree_skb(tskb); + sch->qstats.drops++; + return NET_XMIT_CN; + } + } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != + NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else + htb_activate(q, cl); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + sch->q.qlen++; + sch->qstats.requeues++; + return NET_XMIT_SUCCESS; } static void htb_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc*)arg; - sch->flags &= ~TCQ_F_THROTTLED; - wmb(); - netif_schedule(sch->dev); + struct Qdisc *sch = (struct Qdisc *)arg; + sch->flags &= ~TCQ_F_THROTTLED; + wmb(); + netif_schedule(sch->dev); } #ifdef HTB_RATECM #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 static void htb_rate_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc*)arg; + struct Qdisc *sch = (struct Qdisc *)arg; struct htb_sched *q = qdisc_priv(sch); struct list_head *p; @@ -678,13 +684,13 @@ static void htb_rate_timer(unsigned long arg) add_timer(&q->rttim); /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) + if (++q->recmp_bucket >= HTB_HSIZE) q->recmp_bucket = 0; - list_for_each (p,q->hash+q->recmp_bucket) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + list_for_each(p, q->hash + q->recmp_bucket) { + struct htb_class *cl = list_entry(p, struct htb_class, hlist); - RT_GEN (cl->sum_bytes,cl->rate_bytes); - RT_GEN (cl->sum_packets,cl->rate_packets); + RT_GEN(cl->sum_bytes, cl->rate_bytes); + RT_GEN(cl->sum_packets, cl->rate_packets); } spin_unlock_bh(&sch->dev->queue_lock); } @@ -701,10 +707,10 @@ static void htb_rate_timer(unsigned long arg) * CAN_SEND) because we can use more precise clock that event queue here. * In such case we remove class from event queue first. */ -static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, - int level,int bytes) -{ - long toks,diff; +static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, + int level, int bytes) +{ + long toks, diff; enum htb_cmode old_mode; #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ @@ -714,29 +720,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, cl->T = toks while (cl) { - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); if (cl->level >= level) { - if (cl->level == level) cl->xstats.lends++; - HTB_ACCNT (tokens,buffer,rate); + if (cl->level == level) + cl->xstats.lends++; + HTB_ACCNT(tokens, buffer, rate); } else { cl->xstats.borrows++; - cl->tokens += diff; /* we moved t_c; update tokens */ + cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT (ctokens,cbuffer,ceil); + HTB_ACCNT(ctokens, cbuffer, ceil); cl->t_c = q->now; - old_mode = cl->cmode; diff = 0; - htb_change_class_mode(q,cl,&diff); + old_mode = cl->cmode; + diff = 0; + htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - rb_erase(&cl->pq_node,q->wait_pq+cl->level); + rb_erase(&cl->pq_node, q->wait_pq + cl->level); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff); + htb_add_to_wait_tree(q, cl, diff); } - #ifdef HTB_RATECM /* update rate counters */ - cl->sum_bytes += bytes; cl->sum_packets++; + cl->sum_bytes += bytes; + cl->sum_packets++; #endif /* update byte stats except for leaves which are already updated */ @@ -755,7 +763,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Aplied are events whose have cl->pq_key <= jiffies. */ -static long htb_do_events(struct htb_sched *q,int level) +static long htb_do_events(struct htb_sched *q, int level) { int i; @@ -763,34 +771,38 @@ static long htb_do_events(struct htb_sched *q,int level) struct htb_class *cl; long diff; struct rb_node *p = q->wait_pq[level].rb_node; - if (!p) return 0; - while (p->rb_left) p = p->rb_left; + if (!p) + return 0; + while (p->rb_left) + p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { return cl->pq_key - q->jiffies; } - rb_erase(p,q->wait_pq+level); - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); - htb_change_class_mode(q,cl,&diff); + rb_erase(p, q->wait_pq + level); + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); + htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree (q,cl,diff); + htb_add_to_wait_tree(q, cl, diff); } if (net_ratelimit()) printk(KERN_WARNING "htb: too many events !\n"); - return HZ/10; + return HZ / 10; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL is no such one exists. */ -static struct rb_node * -htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) +static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, + u32 id) { struct rb_node *r = NULL; while (n) { - struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]); - if (id == cl->classid) return n; - + struct htb_class *cl = + rb_entry(n, struct htb_class, node[prio]); + if (id == cl->classid) + return n; + if (id > cl->classid) { n = n->rb_right; } else { @@ -806,46 +818,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) * * Find leaf where current feed pointers points to. */ -static struct htb_class * -htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) +static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, + struct rb_node **pptr, u32 * pid) { int i; struct { struct rb_node *root; struct rb_node **pptr; u32 *pid; - } stk[TC_HTB_MAXDEPTH],*sp = stk; - + } stk[TC_HTB_MAXDEPTH], *sp = stk; + BUG_TRAP(tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; for (i = 0; i < 65535; i++) { - if (!*sp->pptr && *sp->pid) { + if (!*sp->pptr && *sp->pid) { /* ptr was invalidated but id is valid - try to recover the original or next ptr */ - *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid); + *sp->pptr = + htb_id_find_next_upper(prio, sp->root, *sp->pid); } - *sp->pid = 0; /* ptr is valid now so that remove this hint as it - can become out of date quickly */ - if (!*sp->pptr) { /* we are at right end; rewind & go up */ + *sp->pid = 0; /* ptr is valid now so that remove this hint as it + can become out of date quickly */ + if (!*sp->pptr) { /* we are at right end; rewind & go up */ *sp->pptr = sp->root; - while ((*sp->pptr)->rb_left) + while ((*sp->pptr)->rb_left) *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL; - htb_next_rb_node (sp->pptr); + BUG_TRAP(*sp->pptr); + if (!*sp->pptr) + return NULL; + htb_next_rb_node(sp->pptr); } } else { struct htb_class *cl; - cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); - if (!cl->level) + cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); + if (!cl->level) return cl; (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr+prio; - sp->pid = cl->un.inner.last_ptr_id+prio; + sp->pptr = cl->un.inner.ptr + prio; + sp->pid = cl->un.inner.last_ptr_id + prio; } } BUG_TRAP(0); @@ -854,19 +869,21 @@ htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) /* dequeues packet at given priority and level; call only if you are sure that there is active class at prio/level */ -static struct sk_buff * -htb_dequeue_tree(struct htb_sched *q,int prio,int level) +static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, + int level) { struct sk_buff *skb = NULL; - struct htb_class *cl,*start; + struct htb_class *cl, *start; /* look initial class up in the row */ - start = cl = htb_lookup_leaf (q->row[level]+prio,prio, - q->ptr[level]+prio,q->last_ptr_id[level]+prio); - + start = cl = htb_lookup_leaf(q->row[level] + prio, prio, + q->ptr[level] + prio, + q->last_ptr_id[level] + prio); + do { next: - BUG_TRAP(cl); - if (!cl) return NULL; + BUG_TRAP(cl); + if (!cl) + return NULL; /* class can be empty - it is unlikely but can be true if leaf qdisc drops packets in enqueue routine or if someone used @@ -874,56 +891,64 @@ next: simply deactivate and skip such class */ if (unlikely(cl->un.leaf.q->q.qlen == 0)) { struct htb_class *next; - htb_deactivate(q,cl); + htb_deactivate(q, cl); /* row/level might become empty */ if ((q->row_mask[level] & (1 << prio)) == 0) - return NULL; - - next = htb_lookup_leaf (q->row[level]+prio, - prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); + return NULL; - if (cl == start) /* fix start if we just deleted it */ + next = htb_lookup_leaf(q->row[level] + prio, + prio, q->ptr[level] + prio, + q->last_ptr_id[level] + prio); + + if (cl == start) /* fix start if we just deleted it */ start = next; cl = next; goto next; } - - if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) + + skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); + if (likely(skb != NULL)) break; if (!cl->warned) { - printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid); + printk(KERN_WARNING + "htb: class %X isn't work conserving ?!\n", + cl->classid); cl->warned = 1; } q->nwc_hit++; - htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); - cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio, - q->last_ptr_id[level]+prio); + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> + ptr[0]) + prio); + cl = htb_lookup_leaf(q->row[level] + prio, prio, + q->ptr[level] + prio, + q->last_ptr_id[level] + prio); } while (cl != start); if (likely(skb != NULL)) { if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { cl->un.leaf.deficit[level] += cl->un.leaf.quantum; - htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); + htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> + ptr[0]) + prio); } /* this used to be after charge_class but this constelation gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) - htb_deactivate (q,cl); - htb_charge_class (q,cl,level,skb->len); + htb_deactivate(q, cl); + htb_charge_class(q, cl, level, skb->len); } return skb; } -static void htb_delay_by(struct Qdisc *sch,long delay) +static void htb_delay_by(struct Qdisc *sch, long delay) { struct htb_sched *q = qdisc_priv(sch); - if (delay <= 0) delay = 1; - if (unlikely(delay > 5*HZ)) { + if (delay <= 0) + delay = 1; + if (unlikely(delay > 5 * HZ)) { if (net_ratelimit()) printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); - delay = 5*HZ; + delay = 5 * HZ; } /* why don't use jiffies here ? because expires can be in past */ mod_timer(&q->timer, q->jiffies + delay); @@ -941,13 +966,15 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) q->jiffies = jiffies; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { + skb = __skb_dequeue(&q->direct_queue); + if (skb != NULL) { sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } - if (!sch->q.qlen) goto fin; + if (!sch->q.qlen) + goto fin; PSCHED_GET_TIME(q->now); min_delay = LONG_MAX; @@ -957,18 +984,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) int m; long delay; if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { - delay = htb_do_events(q,level); - q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); + delay = htb_do_events(q, level); + q->near_ev_cache[level] = + q->jiffies + (delay ? delay : HZ); } else - delay = q->near_ev_cache[level] - q->jiffies; - - if (delay && min_delay > delay) + delay = q->near_ev_cache[level] - q->jiffies; + + if (delay && min_delay > delay) min_delay = delay; m = ~q->row_mask[level]; while (m != (int)(-1)) { - int prio = ffz (m); + int prio = ffz(m); m |= 1 << prio; - skb = htb_dequeue_tree(q,prio,level); + skb = htb_dequeue_tree(q, prio, level); if (likely(skb != NULL)) { sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; @@ -976,28 +1004,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } } - htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); + htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); fin: return skb; } /* try to drop from each class (by prio) until one succeed */ -static unsigned int htb_drop(struct Qdisc* sch) +static unsigned int htb_drop(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); int prio; for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { struct list_head *p; - list_for_each (p,q->drops+prio) { + list_for_each(p, q->drops + prio) { struct htb_class *cl = list_entry(p, struct htb_class, un.leaf.drop_list); unsigned int len; - if (cl->un.leaf.q->ops->drop && - (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + if (cl->un.leaf.q->ops->drop && + (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) - htb_deactivate (q,cl); + htb_deactivate(q, cl); return len; } } @@ -1007,19 +1035,20 @@ static unsigned int htb_drop(struct Qdisc* sch) /* reset all classes */ /* always caled under BH & queue lock */ -static void htb_reset(struct Qdisc* sch) +static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); int i; for (i = 0; i < HTB_HSIZE; i++) { struct list_head *p; - list_for_each (p,q->hash+i) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + list_for_each(p, q->hash + i) { + struct htb_class *cl = + list_entry(p, struct htb_class, hlist); if (cl->level) - memset(&cl->un.inner,0,sizeof(cl->un.inner)); + memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { - if (cl->un.leaf.q) + if (cl->un.leaf.q) qdisc_reset(cl->un.leaf.q); INIT_LIST_HEAD(&cl->un.leaf.drop_list); } @@ -1032,12 +1061,12 @@ static void htb_reset(struct Qdisc* sch) del_timer(&q->timer); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row,0,sizeof(q->row)); - memset(q->row_mask,0,sizeof(q->row_mask)); - memset(q->wait_pq,0,sizeof(q->wait_pq)); - memset(q->ptr,0,sizeof(q->ptr)); + memset(q->row, 0, sizeof(q->row)); + memset(q->row_mask, 0, sizeof(q->row_mask)); + memset(q->wait_pq, 0, sizeof(q->wait_pq)); + memset(q->ptr, 0, sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops+i); + INIT_LIST_HEAD(q->drops + i); } static int htb_init(struct Qdisc *sch, struct rtattr *opt) @@ -1047,29 +1076,30 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) struct tc_htb_glob *gopt; int i; if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || - tb[TCA_HTB_INIT-1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { + tb[TCA_HTB_INIT - 1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); return -EINVAL; } - gopt = RTA_DATA(tb[TCA_HTB_INIT-1]); + gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); if (gopt->version != HTB_VER >> 16) { - printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); + printk(KERN_ERR + "HTB: need tc/htb version %d (minor is %d), you have %d\n", + HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); return -EINVAL; } INIT_LIST_HEAD(&q->root); for (i = 0; i < HTB_HSIZE; i++) - INIT_LIST_HEAD(q->hash+i); + INIT_LIST_HEAD(q->hash + i); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops+i); + INIT_LIST_HEAD(q->drops + i); init_timer(&q->timer); skb_queue_head_init(&q->direct_queue); q->direct_qlen = sch->dev->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; q->timer.function = htb_timer; q->timer.data = (unsigned long)sch; @@ -1091,7 +1121,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { struct htb_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_glob gopt; spin_lock_bh(&sch->dev->queue_lock); @@ -1101,7 +1131,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) gopt.rate2quantum = q->rate2quantum; gopt.defcls = q->defcls; gopt.debug = 0; - rta = (struct rtattr*)b; + rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); rta->rta_len = skb->tail - b; @@ -1114,10 +1144,10 @@ rtattr_failure: } static int htb_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { - struct htb_class *cl = (struct htb_class*)arg; - unsigned char *b = skb->tail; + struct htb_class *cl = (struct htb_class *)arg; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_opt opt; @@ -1127,15 +1157,18 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; - rta = (struct rtattr*)b; + rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - memset (&opt,0,sizeof(opt)); + memset(&opt, 0, sizeof(opt)); - opt.rate = cl->rate->rate; opt.buffer = cl->buffer; - opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; - opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; - opt.level = cl->level; + opt.rate = cl->rate->rate; + opt.buffer = cl->buffer; + opt.ceil = cl->ceil->rate; + opt.cbuffer = cl->cbuffer; + opt.quantum = cl->un.leaf.quantum; + opt.prio = cl->un.leaf.prio; + opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); rta->rta_len = skb->tail - b; spin_unlock_bh(&sch->dev->queue_lock); @@ -1147,14 +1180,13 @@ rtattr_failure: } static int -htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) +htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; #ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE); + cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); + cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); #endif if (!cl->level && cl->un.leaf.q) @@ -1171,21 +1203,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) == NULL) - return -ENOBUFS; + if (new == NULL && (new = qdisc_create_dflt(sch->dev, + &pfifo_qdisc_ops)) + == NULL) + return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { if (cl->prio_activity) - htb_deactivate (qdisc_priv(sch),cl); + htb_deactivate(qdisc_priv(sch), cl); /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + sch->q.qlen -= (*old)->q.qlen; qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1194,16 +1227,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return -ENOENT; } -static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) +static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; return (cl && !cl->level) ? cl->un.leaf.q : NULL; } static unsigned long htb_get(struct Qdisc *sch, u32 classid) { - struct htb_class *cl = htb_find(classid,sch); - if (cl) + struct htb_class *cl = htb_find(classid, sch); + if (cl) cl->refcnt++; return (unsigned long)cl; } @@ -1218,7 +1251,7 @@ static void htb_destroy_filters(struct tcf_proto **fl) } } -static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) +static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); if (!cl->level) { @@ -1228,44 +1261,44 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) } qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - - htb_destroy_filters (&cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class (sch,list_entry(cl->children.next, - struct htb_class,sibling)); + + htb_destroy_filters(&cl->filter_list); + + while (!list_empty(&cl->children)) + htb_destroy_class(sch, list_entry(cl->children.next, + struct htb_class, sibling)); /* note: this delete may happen twice (see htb_delete) */ list_del(&cl->hlist); list_del(&cl->sibling); - + if (cl->prio_activity) - htb_deactivate (q,cl); - + htb_deactivate(q, cl); + if (cl->cmode != HTB_CAN_SEND) - rb_erase(&cl->pq_node,q->wait_pq+cl->level); - + rb_erase(&cl->pq_node, q->wait_pq + cl->level); + kfree(cl); } /* always caled under BH & queue lock */ -static void htb_destroy(struct Qdisc* sch) +static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - del_timer_sync (&q->timer); + del_timer_sync(&q->timer); #ifdef HTB_RATECM - del_timer_sync (&q->rttim); + del_timer_sync(&q->rttim); #endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ htb_destroy_filters(&q->filter_list); - - while (!list_empty(&q->root)) - htb_destroy_class (sch,list_entry(q->root.next, - struct htb_class,sibling)); + + while (!list_empty(&q->root)) + htb_destroy_class(sch, list_entry(q->root.next, + struct htb_class, sibling)); __skb_queue_purge(&q->direct_queue); } @@ -1273,23 +1306,23 @@ static void htb_destroy(struct Qdisc* sch) static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? if (!list_empty(&cl->children) || cl->filter_cnt) return -EBUSY; - + sch_tree_lock(sch); - + /* delete from hash and active; remainder in destroy_class */ list_del_init(&cl->hlist); if (cl->prio_activity) - htb_deactivate (q,cl); + htb_deactivate(q, cl); if (--cl->refcnt == 0) - htb_destroy_class(sch,cl); + htb_destroy_class(sch, cl); sch_tree_unlock(sch); return 0; @@ -1297,41 +1330,44 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) static void htb_put(struct Qdisc *sch, unsigned long arg) { - struct htb_class *cl = (struct htb_class*)arg; + struct htb_class *cl = (struct htb_class *)arg; if (--cl->refcnt == 0) - htb_destroy_class(sch,cl); + htb_destroy_class(sch, cl); } -static int htb_change_class(struct Qdisc *sch, u32 classid, - u32 parentid, struct rtattr **tca, unsigned long *arg) +static int htb_change_class(struct Qdisc *sch, u32 classid, + u32 parentid, struct rtattr **tca, + unsigned long *arg) { int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class*)*arg,*parent; - struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct rtattr *opt = tca[TCA_OPTIONS - 1]; struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct rtattr *tb[TCA_HTB_RTAB]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || - tb[TCA_HTB_PARMS-1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt)) + tb[TCA_HTB_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) goto failure; - - parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); - hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); + parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); - if (!rtab || !ctab) goto failure; + hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); - if (!cl) { /* new class */ + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); + if (!rtab || !ctab) + goto failure; + + if (!cl) { /* new class */ struct Qdisc *new_q; /* check for valid classid */ - if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch)) + if (!classid || TC_H_MAJ(classid ^ sch->handle) + || htb_find(classid, sch)) goto failure; /* check maximal depth */ @@ -1342,7 +1378,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = -ENOBUFS; if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - + cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); INIT_LIST_HEAD(&cl->hlist); @@ -1357,46 +1393,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent && !parent->level) { /* turn parent into inner node */ sch->q.qlen -= parent->un.leaf.q->q.qlen; - qdisc_destroy (parent->un.leaf.q); - if (parent->prio_activity) - htb_deactivate (q,parent); + qdisc_destroy(parent->un.leaf.q); + if (parent->prio_activity) + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - rb_erase(&parent->pq_node,q->wait_pq); + rb_erase(&parent->pq_node, q->wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level - : TC_HTB_MAXDEPTH) - 1; - memset (&parent->un.inner,0,sizeof(parent->un.inner)); + : TC_HTB_MAXDEPTH) - 1; + memset(&parent->un.inner, 0, sizeof(parent->un.inner)); } /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; cl->parent = parent; + cl->classid = classid; + cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; cl->ctokens = hopt->cbuffer; - cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */ + cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ PSCHED_GET_TIME(cl->t_c); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); - list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); - } else sch_tree_lock(sch); + list_add_tail(&cl->hlist, q->hash + htb_hash(classid)); + list_add_tail(&cl->sibling, + parent ? &parent->children : &q->root); + } else + sch_tree_lock(sch); /* it used to be a nasty bug here, we have to check that node - is really leaf before changing cl->un.leaf ! */ + is really leaf before changing cl->un.leaf ! */ if (!cl->level) { cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->un.leaf.quantum < 1000) { - printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); + printk(KERN_WARNING + "HTB: quantum of class %X is small. Consider r2q change.\n", + cl->classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { - printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid); + printk(KERN_WARNING + "HTB: quantum of class %X is big. Consider r2q change.\n", + cl->classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1407,16 +1450,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; - if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; - if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; + if (cl->rate) + qdisc_put_rtab(cl->rate); + cl->rate = rtab; + if (cl->ceil) + qdisc_put_rtab(cl->ceil); + cl->ceil = ctab; sch_tree_unlock(sch); *arg = (unsigned long)cl; return 0; failure: - if (rtab) qdisc_put_rtab(rtab); - if (ctab) qdisc_put_rtab(ctab); + if (rtab) + qdisc_put_rtab(rtab); + if (ctab) + qdisc_put_rtab(ctab); return err; } @@ -1430,23 +1479,23 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) + u32 classid) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_find (classid,sch); + struct htb_class *cl = htb_find(classid, sch); /*if (cl && !cl->level) return 0; - The line above used to be there to prevent attaching filters to - leaves. But at least tc_index filter uses this just to get class - for other reasons so that we have to allow for it. - ---- - 19.6.2002 As Werner explained it is ok - bind filter is just - another way to "lock" the class - unlike "get" this lock can - be broken by class during destroy IIUC. + The line above used to be there to prevent attaching filters to + leaves. But at least tc_index filter uses this just to get class + for other reasons so that we have to allow for it. + ---- + 19.6.2002 As Werner explained it is ok - bind filter is just + another way to "lock" the class - unlike "get" this lock can + be broken by class during destroy IIUC. */ - if (cl) - cl->filter_cnt++; - else + if (cl) + cl->filter_cnt++; + else q->filter_cnt++; return (unsigned long)cl; } @@ -1456,9 +1505,9 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - if (cl) - cl->filter_cnt--; - else + if (cl) + cl->filter_cnt--; + else q->filter_cnt--; } @@ -1472,8 +1521,9 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < HTB_HSIZE; i++) { struct list_head *p; - list_for_each (p,q->hash+i) { - struct htb_class *cl = list_entry(p,struct htb_class,hlist); + list_for_each(p, q->hash + i) { + struct htb_class *cl = + list_entry(p, struct htb_class, hlist); if (arg->count < arg->skip) { arg->count++; continue; @@ -1521,12 +1571,13 @@ static struct Qdisc_ops htb_qdisc_ops = { static int __init htb_module_init(void) { - return register_qdisc(&htb_qdisc_ops); + return register_qdisc(&htb_qdisc_ops); } -static void __exit htb_module_exit(void) +static void __exit htb_module_exit(void) { - unregister_qdisc(&htb_qdisc_ops); + unregister_qdisc(&htb_qdisc_ops); } + module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); From 0cef296da9331e871401076b8c0688b2b31fcadd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:35:38 -0700 Subject: [PATCH 0453/1063] [HTB]: Use hlist for hash lists. Use hlist instead of list for the hash list. This saves space, and we can check for double delete better. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 49 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6c6cac65255f..a686b9511b05 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -104,7 +104,7 @@ struct htb_class { /* topology */ int level; /* our level (see above) */ struct htb_class *parent; /* parent class */ - struct list_head hlist; /* classid hash list item */ + struct hlist_node hlist; /* classid hash list item */ struct list_head sibling; /* sibling list item */ struct list_head children; /* children list */ @@ -163,8 +163,8 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, struct htb_sched { struct list_head root; /* root classes list */ - struct list_head hash[HTB_HSIZE]; /* hashed by classid */ - struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ + struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ + struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* self list - roots of self generating tree */ struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; @@ -220,12 +220,13 @@ static inline int htb_hash(u32 h) static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct list_head *p; + struct hlist_node *p; + struct htb_class *cl; + if (TC_H_MAJ(handle) != sch->handle) return NULL; - list_for_each(p, q->hash + htb_hash(handle)) { - struct htb_class *cl = list_entry(p, struct htb_class, hlist); + hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { if (cl->classid == handle) return cl; } @@ -675,7 +676,9 @@ static void htb_rate_timer(unsigned long arg) { struct Qdisc *sch = (struct Qdisc *)arg; struct htb_sched *q = qdisc_priv(sch); - struct list_head *p; + struct hlist_node *p; + struct htb_class *cl; + /* lock queue so that we can muck with it */ spin_lock_bh(&sch->dev->queue_lock); @@ -686,9 +689,8 @@ static void htb_rate_timer(unsigned long arg) /* scan and recompute one bucket at time */ if (++q->recmp_bucket >= HTB_HSIZE) q->recmp_bucket = 0; - list_for_each(p, q->hash + q->recmp_bucket) { - struct htb_class *cl = list_entry(p, struct htb_class, hlist); + hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { RT_GEN(cl->sum_bytes, cl->rate_bytes); RT_GEN(cl->sum_packets, cl->rate_packets); } @@ -1041,10 +1043,10 @@ static void htb_reset(struct Qdisc *sch) int i; for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *p; - list_for_each(p, q->hash + i) { - struct htb_class *cl = - list_entry(p, struct htb_class, hlist); + struct hlist_node *p; + struct htb_class *cl; + + hlist_for_each_entry(cl, p, q->hash + i, hlist) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1091,7 +1093,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) INIT_LIST_HEAD(&q->root); for (i = 0; i < HTB_HSIZE; i++) - INIT_LIST_HEAD(q->hash + i); + INIT_HLIST_HEAD(q->hash + i); for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); @@ -1269,7 +1271,8 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) struct htb_class, sibling)); /* note: this delete may happen twice (see htb_delete) */ - list_del(&cl->hlist); + if (!hlist_unhashed(&cl->hlist)) + hlist_del(&cl->hlist); list_del(&cl->sibling); if (cl->prio_activity) @@ -1317,7 +1320,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); /* delete from hash and active; remainder in destroy_class */ - list_del_init(&cl->hlist); + if (!hlist_unhashed(&cl->hlist)) + hlist_del(&cl->hlist); + if (cl->prio_activity) htb_deactivate(q, cl); @@ -1381,7 +1386,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); - INIT_LIST_HEAD(&cl->hlist); + INIT_HLIST_NODE(&cl->hlist); INIT_LIST_HEAD(&cl->children); INIT_LIST_HEAD(&cl->un.leaf.drop_list); @@ -1420,7 +1425,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - list_add_tail(&cl->hlist, q->hash + htb_hash(classid)); + hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); } else @@ -1520,10 +1525,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < HTB_HSIZE; i++) { - struct list_head *p; - list_for_each(p, q->hash + i) { - struct htb_class *cl = - list_entry(p, struct htb_class, hlist); + struct hlist_node *p; + struct htb_class *cl; + + hlist_for_each_entry(cl, p, q->hash + i, hlist) { if (arg->count < arg->skip) { arg->count++; continue; From 3696f625e2efa1f1b228b276788274e1eb86fcfa Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:36:01 -0700 Subject: [PATCH 0454/1063] [HTB]: rbtree cleanup Add code to initialize rb tree nodes, and check for double deletion. This is not a real fix, but I can make it trap sometimes and may be a bandaid for: http://bugzilla.kernel.org/show_bug.cgi?id=6681 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index a686b9511b05..bb3ddd4784b1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -366,7 +366,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q, * When we are past last key we return NULL. * Average complexity is 2 steps per call. */ -static void htb_next_rb_node(struct rb_node **n) +static inline void htb_next_rb_node(struct rb_node **n) { *n = rb_next(*n); } @@ -388,6 +388,18 @@ static inline void htb_add_class_to_row(struct htb_sched *q, } } +/* If this triggers, it is a bug in this code, but it need not be fatal */ +static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) +{ + if (RB_EMPTY_NODE(rb)) { + WARN_ON(1); + } else { + rb_erase(rb, root); + RB_CLEAR_NODE(rb); + } +} + + /** * htb_remove_class_from_row - removes class from its row * @@ -401,10 +413,12 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, while (mask) { int prio = ffz(~mask); + mask &= ~(1 << prio); if (q->ptr[cl->level][prio] == cl->node + prio) htb_next_rb_node(q->ptr[cl->level] + prio); - rb_erase(cl->node + prio, q->row[cl->level] + prio); + + htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); if (!q->row[cl->level][prio].rb_node) m |= 1 << prio; } @@ -472,7 +486,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) p->un.inner.ptr[prio] = NULL; } - rb_erase(cl->node + prio, p->un.inner.feed + prio); + htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); if (!p->un.inner.feed[prio].rb_node) mask |= 1 << prio; @@ -739,7 +753,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } @@ -782,7 +796,7 @@ static long htb_do_events(struct htb_sched *q, int level) if (time_after(cl->pq_key, q->jiffies)) { return cl->pq_key - q->jiffies; } - rb_erase(p, q->wait_pq + level); + htb_safe_rb_erase(p, q->wait_pq + level); diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) @@ -1279,7 +1293,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) - rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); kfree(cl); } @@ -1370,6 +1384,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) { /* new class */ struct Qdisc *new_q; + int prio; + /* check for valid classid */ if (!classid || TC_H_MAJ(classid ^ sch->handle) || htb_find(classid, sch)) @@ -1389,6 +1405,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, INIT_HLIST_NODE(&cl->hlist); INIT_LIST_HEAD(&cl->children); INIT_LIST_HEAD(&cl->un.leaf.drop_list); + RB_CLEAR_NODE(&cl->pq_node); + + for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) + RB_CLEAR_NODE(&cl->node[prio]); /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock @@ -1404,7 +1424,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - rb_erase(&parent->pq_node, q->wait_pq); + htb_safe_rb_erase(&parent->pq_node, q->wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level From b6fe17d6cc5d570b72f8e4da351b593c5a680355 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:06:13 -0700 Subject: [PATCH 0455/1063] [NET] netdev: Check name length Some improvements to robust name interface. These API's are safe now by convention, but it is worth providing some safety checks against future bugs. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index fc82f6f6e1c1..14de297d024d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -640,6 +640,8 @@ int dev_valid_name(const char *name) { if (*name == '\0') return 0; + if (strlen(name) >= IFNAMSIZ) + return 0; if (!strcmp(name, ".") || !strcmp(name, "..")) return 0; @@ -3191,13 +3193,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, struct net_device *dev; int alloc_size; + BUG_ON(strlen(name) >= sizeof(dev->name)); + /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { - printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); + printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); return NULL; } From d880309ae17783c27016bf4f903782d322d0a2a1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Aug 2006 16:43:41 -0700 Subject: [PATCH 0456/1063] [DECNET] Fix to multiple tables routing Here is a fix to Patrick McHardy's increase number of routing tables patch for DECnet. I did just test this and it appears to be working fine with this patch. Signed-off-by: Steven Whitehouse Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/decnet/dn_rules.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 878312ff34ec..c8d9411e5943 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -116,6 +116,7 @@ static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_SRC] = { .type = NLA_U16 }, [FRA_DST] = { .type = NLA_U16 }, [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) From d1aa62f15b511457af2233150c960dc1fd02769b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Aug 2006 16:44:18 -0700 Subject: [PATCH 0457/1063] [DECNET] Fix to decnet rules compare function Here is a fix to the DECnet rules compare function where we used 32bit values rather than 16bit values. Spotted by Patrick McHardy. Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index c8d9411e5943..977bb56c3ce4 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -197,10 +197,10 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; #endif - if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC]))) + if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) return 0; - if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST]))) + if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST]))) return 0; return 1; From 90d41122f79c8c3687d965dde4c6d30a6e0cac4c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Aug 2006 23:49:16 -0700 Subject: [PATCH 0458/1063] [IPV6] ip6_fib.c: make code static Make the following needlessly global code static: - fib6_walker_lock - struct fib6_walker_list - fib6_walk_continue() - fib6_walk() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 25 ------------------------- net/ipv6/ip6_fib.c | 29 ++++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c0660cea9a2f..69c444209781 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -92,28 +92,6 @@ struct fib6_walker_t void *args; }; -extern struct fib6_walker_t fib6_walker_list; -extern rwlock_t fib6_walker_lock; - -static inline void fib6_walker_link(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next = fib6_walker_list.next; - w->prev = &fib6_walker_list; - w->next->prev = w; - w->prev->next = w; - write_unlock_bh(&fib6_walker_lock); -} - -static inline void fib6_walker_unlink(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next->prev = w->prev; - w->prev->next = w->next; - w->prev = w->next = w; - write_unlock_bh(&fib6_walker_lock); -} - struct rt6_statistics { __u32 fib_nodes; __u32 fib_route_nodes; @@ -195,9 +173,6 @@ struct fib6_node *fib6_locate(struct fib6_node *root, extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg); -extern int fib6_walk(struct fib6_walker_t *w); -extern int fib6_walk_continue(struct fib6_walker_t *w); - extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bececbe9dd2c..be36f4acda94 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -69,8 +69,7 @@ struct fib6_cleaner_t void *arg; }; -DEFINE_RWLOCK(fib6_walker_lock); - +static DEFINE_RWLOCK(fib6_walker_lock); #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S @@ -82,6 +81,8 @@ DEFINE_RWLOCK(fib6_walker_lock); static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +static int fib6_walk(struct fib6_walker_t *w); +static int fib6_walk_continue(struct fib6_walker_t *w); /* * A routing update causes an increase of the serial number on the @@ -94,13 +95,31 @@ static __u32 rt_sernum; static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); -struct fib6_walker_t fib6_walker_list = { +static struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, .next = &fib6_walker_list, }; #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) +static inline void fib6_walker_link(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next = fib6_walker_list.next; + w->prev = &fib6_walker_list; + w->next->prev = w; + w->prev->next = w; + write_unlock_bh(&fib6_walker_lock); +} + +static inline void fib6_walker_unlink(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next->prev = w->prev; + w->prev->next = w->next; + w->prev = w->next = w; + write_unlock_bh(&fib6_walker_lock); +} static __inline__ u32 fib6_new_sernum(void) { u32 n = ++rt_sernum; @@ -1173,7 +1192,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne * <0 -> walk is terminated by an error. */ -int fib6_walk_continue(struct fib6_walker_t *w) +static int fib6_walk_continue(struct fib6_walker_t *w) { struct fib6_node *fn, *pn; @@ -1247,7 +1266,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) } } -int fib6_walk(struct fib6_walker_t *w) +static int fib6_walk(struct fib6_walker_t *w) { int res; From 50da859d4e566fba90ebda87b843970d902c903e Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Mon, 14 Aug 2006 23:54:30 -0700 Subject: [PATCH 0459/1063] [TG3]: Constify firmware structs Constify largish areas of firmware data in Tigon3 ethernet driver. non-const: lsmod: tg3 101404 0 objdump -x: .rodata 000003e8 .data 00004a0c ls -l: -rw-r--r-- 1 root root 114404 2006-08-19 21:36 drivers/net/tg3.ko const: lsmod: tg3 101404 0 objdump -x: .rodata 000042c8 .data 00000b4c ls -l: -rw-r--r-- 1 root root 114532 2006-08-19 21:06 drivers/net/tg3.ko Signed-off-by: Andreas Mohr Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/tg3.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 6f5d3a38c582..34078a7c1a84 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -264,7 +264,7 @@ static struct pci_device_id tg3_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, tg3_pci_tbl); -static struct { +static const struct { const char string[ETH_GSTRING_LEN]; } ethtool_stats_keys[TG3_NUM_STATS] = { { "rx_octets" }, @@ -345,7 +345,7 @@ static struct { { "nic_tx_threshold_hit" } }; -static struct { +static const struct { const char string[ETH_GSTRING_LEN]; } ethtool_test_keys[TG3_NUM_TEST] = { { "nvram test (online) " }, @@ -4969,7 +4969,7 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent) #define TG3_FW_BSS_ADDR 0x08000a70 #define TG3_FW_BSS_LEN 0x10 -static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { +static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000, 0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034, @@ -5063,7 +5063,7 @@ static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { 0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000 }; -static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = { +static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = { 0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272, @@ -5128,13 +5128,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) struct fw_info { unsigned int text_base; unsigned int text_len; - u32 *text_data; + const u32 *text_data; unsigned int rodata_base; unsigned int rodata_len; - u32 *rodata_data; + const u32 *rodata_data; unsigned int data_base; unsigned int data_len; - u32 *data_data; + const u32 *data_data; }; /* tp->lock is held. */ @@ -5266,7 +5266,7 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp) #define TG3_TSO_FW_BSS_ADDR 0x08001b80 #define TG3_TSO_FW_BSS_LEN 0x894 -static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { +static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { 0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe, @@ -5553,7 +5553,7 @@ static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000, }; -static u32 tg3TsoFwRodata[] = { +static const u32 tg3TsoFwRodata[] = { 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f, 0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000, @@ -5561,7 +5561,7 @@ static u32 tg3TsoFwRodata[] = { 0x00000000, }; -static u32 tg3TsoFwData[] = { +static const u32 tg3TsoFwData[] = { 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -5583,7 +5583,7 @@ static u32 tg3TsoFwData[] = { #define TG3_TSO5_FW_BSS_ADDR 0x00010f50 #define TG3_TSO5_FW_BSS_LEN 0x88 -static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { +static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { 0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001, 0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe, @@ -5742,14 +5742,14 @@ static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { 0x00000000, 0x00000000, 0x00000000, }; -static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = { +static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = { 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272, 0x00000000, 0x00000000, 0x00000000, }; -static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = { +static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = { 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; From 2aa7f36cdb332a32849afbf25fcbf35dce5b1940 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Aug 2006 23:55:20 -0700 Subject: [PATCH 0460/1063] [DECNET]: cleanups - make the following needlessly global functions static: - dn_fib.c: dn_fib_sync_down() - dn_fib.c: dn_fib_sync_up() - dn_rules.c: dn_fib_rule_action() - remove the following unneeded prototype: - dn_fib.c: dn_cache_dump() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/dn_fib.h | 3 --- net/decnet/dn_fib.c | 9 +++++---- net/decnet/dn_rules.c | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index d97aa10c463f..f01626cbbed6 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -131,9 +131,6 @@ extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res); -extern int dn_fib_sync_down(__le16 local, struct net_device *dev, - int force); -extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 5ccca3ed53bd..1cf010124ec5 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -55,8 +55,6 @@ #define endfor_nexthops(fi) } -extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); - static DEFINE_SPINLOCK(dn_fib_multipath_lock); static struct dn_fib_info *dn_fib_info_list; static DEFINE_SPINLOCK(dn_fib_info_lock); @@ -80,6 +78,9 @@ static struct [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, }; +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); +static int dn_fib_sync_up(struct net_device *dev); + void dn_fib_free_info(struct dn_fib_info *fi) { if (fi->fib_dead == 0) { @@ -651,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -695,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) } -int dn_fib_sync_up(struct net_device *dev) +static int dn_fib_sync_up(struct net_device *dev) { int ret = 0; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 977bb56c3ce4..50e819edf8c7 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -75,8 +75,8 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) return err; } -int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, - struct fib_lookup_arg *arg) +static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct dn_fib_table *tbl; From 81aa646cc4df3779bcbf9d18cc2c0813ee9b3262 Mon Sep 17 00:00:00 2001 From: Martin Bligh Date: Mon, 14 Aug 2006 23:57:10 -0700 Subject: [PATCH 0461/1063] [IPV4]: add the UdpSndbufErrors and UdpRcvbufErrors MIBs Signed-off-by: Martin Bligh Signed-off-by: Andrew Morton --- include/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/udp.c | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 4db25d5c7cd1..30156556f78d 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -155,6 +155,8 @@ enum UDP_MIB_NOPORTS, /* NoPorts */ UDP_MIB_INERRORS, /* InErrors */ UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */ + UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ + UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ __UDP_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d61e2a9d394d..9c6cbe3d9fb8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), + SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 87152510980c..514c1e9ae810 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -662,6 +662,16 @@ out: UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: @@ -981,6 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); + int rc; /* * Charge it to the socket, dropping if the queue is full. @@ -1027,7 +1038,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; From a18135eb9389c26d36ef5c05bd8bc526e0cbe883 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Aug 2006 00:00:09 -0700 Subject: [PATCH 0462/1063] [IPV6]: Add UDP_MIB_{SND,RCV}BUFERRORS handling. Signed-off-by: David S. Miller --- net/ipv6/udp.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 780b89f6dfcc..c813381020bc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -345,6 +345,8 @@ out: static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { + int rc; + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { kfree_skb(skb); return -1; @@ -356,7 +358,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) return 0; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP6_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return 0; @@ -857,6 +862,16 @@ out: UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: From 97a4f3e7110619568aa239fe19143d9ec42dede5 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 15 Aug 2006 00:01:05 -0700 Subject: [PATCH 0463/1063] [NETFILTER]: Make unused signal code go away so nobody copies its brokenness This code is wrong on so many levels, please lose it so it isn't replicated anywhere else. Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3a13ed643459..d06a5075b5f6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -37,30 +37,9 @@ #include #include -#if 0 -/* use this for remote debugging - * Copyright (C) 1998 by Ori Pomerantz - * Print the string to the appropriate tty, the one - * the current task uses - */ -static void print_string(char *str) -{ - struct tty_struct *my_tty; - - /* The tty for the current task */ - my_tty = current->signal->tty; - if (my_tty != NULL) { - my_tty->driver->write(my_tty, 0, str, strlen(str)); - my_tty->driver->write(my_tty, 0, "\015\012", 2); - } -} - -#define BUGPRINT(args) print_string(args); -#else #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -#endif #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ ": out of memory: "format, ## args) /* #define MEMPRINT(format, args...) */ From 9a673e563e543a5c8a6f9824562e55e807b8a56c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 15 Aug 2006 00:03:53 -0700 Subject: [PATCH 0464/1063] [SELINUX]: security/selinux/hooks.c: Make 4 functions static. This patch makes four needlessly global functions static. Signed-off-by: Adrian Bunk Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- security/selinux/hooks.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2a6bbb921e1e..180b26b97d2d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3598,7 +3598,7 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid) } } -void selinux_sock_graft(struct sock* sk, struct socket *parent) +static void selinux_sock_graft(struct sock* sk, struct socket *parent) { struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; struct sk_security_struct *sksec = sk->sk_security; @@ -3608,8 +3608,8 @@ void selinux_sock_graft(struct sock* sk, struct socket *parent) selinux_netlbl_sock_graft(sk, parent); } -int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, - struct request_sock *req) +static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) { struct sk_security_struct *sksec = sk->sk_security; int err; @@ -3638,7 +3638,8 @@ int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, return 0; } -void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) +static void selinux_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) { struct sk_security_struct *newsksec = newsk->sk_security; @@ -3649,7 +3650,8 @@ void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) time it will have been created and available. */ } -void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static void selinux_req_classify_flow(const struct request_sock *req, + struct flowi *fl) { fl->secid = req->secid; } From 9c3bd6833a4df1abd9ecd3b51492b8949bf9cd11 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 15 Aug 2006 00:05:38 -0700 Subject: [PATCH 0465/1063] [IRDA]: Replace hard-coded dev_self[] array sizes with ARRAY_SIZE() Several IR drivers used "for (i = 0; i < 4; i++)" to walk their dev_self[] table. Better to use ARRAY_SIZE(). And fix ali-ircc so it won't run off the end if we find too many adapters. Signed-off-by: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/irda/ali-ircc.c | 8 +++++++- drivers/net/irda/irport.c | 4 ++-- drivers/net/irda/via-ircc.c | 5 ++++- drivers/net/irda/w83977af_ir.c | 4 ++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index e3c8cd5eca67..68d4c418cb98 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -249,7 +249,7 @@ static void __exit ali_ircc_cleanup(void) IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__); - for (i=0; i < 4; i++) { + for (i=0; i < ARRAY_SIZE(dev_self); i++) { if (dev_self[i]) ali_ircc_close(dev_self[i]); } @@ -273,6 +273,12 @@ static int ali_ircc_open(int i, chipio_t *info) int err; IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__); + + if (i >= ARRAY_SIZE(dev_self)) { + IRDA_ERROR("%s(), maximum number of supported chips reached!\n", + __FUNCTION__); + return -ENOMEM; + } /* Set FIR FIFO and DMA Threshold */ if ((ali_ircc_setup(info)) == -1) diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c index 44efd49bf4a9..ba4f3eb988b3 100644 --- a/drivers/net/irda/irport.c +++ b/drivers/net/irda/irport.c @@ -1090,7 +1090,7 @@ static int __init irport_init(void) { int i; - for (i=0; (io[i] < 2000) && (i < 4); i++) { + for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) { if (irport_open(i, io[i], irq[i]) != NULL) return 0; } @@ -1112,7 +1112,7 @@ static void __exit irport_cleanup(void) IRDA_DEBUG( 4, "%s()\n", __FUNCTION__); - for (i=0; i < 4; i++) { + for (i=0; i < ARRAY_SIZE(dev_self); i++) { if (dev_self[i]) irport_close(dev_self[i]); } diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c index 8bafb455c102..79b85f327500 100644 --- a/drivers/net/irda/via-ircc.c +++ b/drivers/net/irda/via-ircc.c @@ -279,7 +279,7 @@ static void via_ircc_clean(void) IRDA_DEBUG(3, "%s()\n", __FUNCTION__); - for (i=0; i < 4; i++) { + for (i=0; i < ARRAY_SIZE(dev_self); i++) { if (dev_self[i]) via_ircc_close(dev_self[i]); } @@ -327,6 +327,9 @@ static __devinit int via_ircc_open(int i, chipio_t * info, unsigned int id) IRDA_DEBUG(3, "%s()\n", __FUNCTION__); + if (i >= ARRAY_SIZE(dev_self)) + return -ENOMEM; + /* Allocate new instance of the driver */ dev = alloc_irdadev(sizeof(struct via_ircc_cb)); if (dev == NULL) diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c index 0ea65c4c6f85..8421597072a7 100644 --- a/drivers/net/irda/w83977af_ir.c +++ b/drivers/net/irda/w83977af_ir.c @@ -117,7 +117,7 @@ static int __init w83977af_init(void) IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); - for (i=0; (io[i] < 2000) && (i < 4); i++) { + for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) { if (w83977af_open(i, io[i], irq[i], dma[i]) == 0) return 0; } @@ -136,7 +136,7 @@ static void __exit w83977af_cleanup(void) IRDA_DEBUG(4, "%s()\n", __FUNCTION__ ); - for (i=0; i < 4; i++) { + for (i=0; i < ARRAY_SIZE(dev_self); i++) { if (dev_self[i]) w83977af_close(dev_self[i]); } From 62872e2dcb3127b20a49e3b4b1d93523cf476cc4 Mon Sep 17 00:00:00 2001 From: Stphane Witzmann Date: Tue, 15 Aug 2006 00:09:17 -0700 Subject: [PATCH 0466/1063] [ARCNET]: SoHard PCI support Add support for a SoHard PCI ARCnet card. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/arcnet/com20020-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 979a33df0a8c..96d8a694d433 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -161,6 +161,7 @@ static struct pci_device_id com20020pci_id_table[] = { { 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, + { 0x10B5, 0x9030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x10B5, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, {0,} }; From f8d8fda54a1bfcf8cf829e44c494b2b4582819aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Aug 2006 00:15:41 -0700 Subject: [PATCH 0467/1063] [IPV6] udp: Fix type in previous change. UDPv6 stats are UDP6_foo not UDP_foo. Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c813381020bc..eb9e1b39c8f8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -361,7 +361,7 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP6_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return 0; @@ -870,7 +870,7 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); } return err; From 2942e90050569525628a9f34e0daaa9b661b49cc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:30:25 -0700 Subject: [PATCH 0468/1063] [RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + net/core/rtnetlink.c | 10 +++++++--- net/decnet/dn_route.c | 4 +--- net/ipv4/ipmr.c | 7 ++++--- net/ipv4/route.c | 7 +++---- net/ipv6/addrconf.c | 4 +--- net/ipv6/route.c | 4 +--- net/sched/act_api.c | 7 ++----- 8 files changed, 20 insertions(+), 24 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7e4aa48680a7..0e4f478e2cb5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -584,6 +584,7 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a1b783a6afc6..e02fa6a33f42 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -166,6 +166,11 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } +int rtnl_unicast(struct sk_buff *skb, u32 pid) +{ + return nlmsg_unicast(rtnl, skb, pid); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct rtattr *mx = (struct rtattr*)skb->tail; @@ -574,9 +579,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) goto errout; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(skb).pid); errout: kfree(iw_buf); dev_put(dev); @@ -825,3 +828,4 @@ EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); +EXPORT_SYMBOL(rtnl_unicast); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4c963213fba5..c5daf3557c1f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1611,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - - return err; + return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 85893eef6b16..98f0aa0d4216 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e = NLMSG_DATA(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (skb->nh.iph->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { @@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12128b82c9dc..b8f6cadc5b3a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2809,10 +2809,9 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: return err; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +out: + return err; out_free: kfree_skb(skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba1e811ba50..4f991a2234d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3268,9 +3268,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out: in6_ifa_put(ifa); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ce28277f47f..024c8e26c2ec 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2044,9 +2044,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) goto out_free; } - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); out: return err; out_free: diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a2587b52e531..6990747d6d5a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -459,7 +459,6 @@ static int act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; - int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -468,10 +467,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) kfree_skb(skb); return -EINVAL; } - err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); - if (err > 0) - err = 0; - return err; + + return rtnl_unicast(skb, pid); } static struct tc_action * From d387f6ad10764fc2174373b4a1cca443adee36e3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:06 -0700 Subject: [PATCH 0469/1063] [NETLINK]: Add notification message sending interface Adds nlmsg_notify() implementing proper notification logic. The message is multicasted to all listeners in the group. The applications the requests orignates from can request a unicast back report in which case said socket will be excluded from the multicast to avoid duplicated notifications. nlmsg_multicast() is extended to take allocation flags to allow notification in atomic contexts. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/genetlink.h | 5 +++-- include/net/netlink.h | 6 ++++-- net/netlabel/netlabel_user.c | 2 +- net/netlink/af_netlink.c | 34 +++++++++++++++++++++++++++++++++- net/netlink/genetlink.c | 2 +- 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8c2287264266..97d6d3aba9d2 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -133,11 +133,12 @@ static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id + * @flags: allocation flags */ static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, - unsigned int group) + unsigned int group, gfp_t flags) { - return nlmsg_multicast(genl_sock, skb, pid, group); + return nlmsg_multicast(genl_sock, skb, pid, group, flags); } /** diff --git a/include/net/netlink.h b/include/net/netlink.h index 3a5e40b1e045..b154b81d9a7a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -43,6 +43,7 @@ * Message Sending: * nlmsg_multicast() multicast message to several groups * nlmsg_unicast() unicast a message to a single socket + * nlmsg_notify() send notification message * * Message Length Calculations: * nlmsg_msg_size(payload) length of message w/o padding @@ -545,15 +546,16 @@ static inline void nlmsg_free(struct sk_buff *skb) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id + * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group) + u32 pid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL); + err = netlink_broadcast(sk, skb, pid, group, flags); if (err > 0) err = 0; diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 80022221b0a7..73cbe66e42ff 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -154,5 +154,5 @@ int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) */ int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) { - return genlmsg_multicast(skb, pid, group); + return genlmsg_multicast(skb, pid, group, GFP_KERNEL); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f36ddc0b72d..a80e4456e204 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1549,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) skb_pull(skb, msglen); } +/** + * nlmsg_notify - send a notification netlink message + * @sk: netlink socket to use + * @skb: notification message + * @pid: destination netlink pid for reports or 0 + * @group: destination multicast group or 0 + * @report: 1 to report back, 0 to disable + * @flags: allocation flags + */ +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, + unsigned int group, int report, gfp_t flags) +{ + int err = 0; + + if (group) { + int exclude_pid = 0; + + if (report) { + atomic_inc(&skb->users); + exclude_pid = pid; + } + + /* errors reported via destination sk->sk_err */ + nlmsg_multicast(sk, skb, exclude_pid, group, flags); + } + + if (report) + err = nlmsg_unicast(sk, skb, pid); + + return err; +} + #ifdef CONFIG_PROC_FS struct nl_seq_iter { int link; @@ -1802,4 +1834,4 @@ EXPORT_SYMBOL(netlink_set_err); EXPORT_SYMBOL(netlink_set_nonroot); EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); - +EXPORT_SYMBOL(nlmsg_notify); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 75bb47a898dd..d32599116c56 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -510,7 +510,7 @@ static int genl_ctrl_event(int event, void *data) if (IS_ERR(msg)) return PTR_ERR(msg); - genlmsg_multicast(msg, 0, GENL_ID_CTRL); + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); break; } From 97676b6b5538b3e059d33b8338e7d5cc41c5f1f1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:41 -0700 Subject: [PATCH 0470/1063] [RTNETLINK]: Add rtnetlink notification interface Adds rtnl_notify() to send rtnetlink notification messages and rtnl_set_sk_err() to report notification errors as socket errors in order to indicate the need of a resync due to loss of events. nlmsg_report() is added to properly document the meaning of NLM_F_ECHO. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +++ include/net/netlink.h | 17 +++++++++++++++++ net/core/rtnetlink.c | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e4f478e2cb5..ecbe0349060c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -585,6 +585,9 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); +extern void rtnl_set_sk_err(u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); diff --git a/include/net/netlink.h b/include/net/netlink.h index b154b81d9a7a..bf593eb59e1b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -65,6 +65,9 @@ * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes * + * Misc: + * nlmsg_report() report back to application? + * * ------------------------------------------------------------------------ * Attributes Interface * ------------------------------------------------------------------------ @@ -194,6 +197,9 @@ extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, struct nlmsghdr *, int *)); extern void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); extern int nla_validate(struct nlattr *head, int len, int maxtype, struct nla_policy *policy); @@ -375,6 +381,17 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, nlmsg_attrlen(nlh, hdrlen), maxtype, policy); } +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + /** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e02fa6a33f42..2b1af17e6389 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -171,6 +171,22 @@ int rtnl_unicast(struct sk_buff *skb, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } +int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + return nlmsg_notify(rtnl, skb, pid, group, report, flags); +} + +void rtnl_set_sk_err(u32 group, int error) +{ + netlink_set_err(rtnl, 0, group, error); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct rtattr *mx = (struct rtattr*)skb->tail; @@ -829,3 +845,5 @@ EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); +EXPORT_SYMBOL(rtnl_notify); +EXPORT_SYMBOL(rtnl_set_sk_err); From c17084d21c18497b506bd28b82d964bc9e6c424b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:32:48 -0700 Subject: [PATCH 0471/1063] [NET] fib_rules: Convert fib rule notification to use rtnl_notify() Adds support for NLM_F_ECHO to simplify the process of identifying inserted rules with an auto generated priority. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/fib_rules.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 873b04d5df81..7b2e9bb1a605 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,7 +18,8 @@ static LIST_HEAD(rules_ops); static DEFINE_SPINLOCK(rules_mod_lock); static void notify_rule_change(int event, struct fib_rule *rule, - struct fib_rules_ops *ops); + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid); static struct fib_rules_ops *lookup_rules_ops(int family) { @@ -209,7 +210,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) else list_add_rcu(&rule->list, ops->rules_list); - notify_rule_change(RTM_NEWRULE, rule, ops); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); rules_ops_put(ops); return 0; @@ -266,7 +267,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_del_rcu(&rule->list); synchronize_rcu(); - notify_rule_change(RTM_DELRULE, rule, ops); + notify_rule_change(RTM_DELRULE, rule, ops, nlh, + NETLINK_CB(skb).pid); fib_rule_put(rule); rules_ops_put(ops); return 0; @@ -344,18 +346,26 @@ skip: EXPORT_SYMBOL_GPL(fib_rules_dump); static void notify_rule_change(int event, struct fib_rule *rule, - struct fib_rules_ops *ops) + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid) { - int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + struct sk_buff *skb; + int err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) - netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS); - else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) { + goto errout; + + err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL); - } else - netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL); + goto errout; + } + + err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) From b8673311804ca29680dd584bd08352001fcbe2f8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:33:14 -0700 Subject: [PATCH 0472/1063] [NEIGH]: Convert neighbour notifications ot use rtnl_notify() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/neighbour.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2f4e06a13457..23ae5e5426db 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2409,36 +2409,35 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD -void neigh_app_ns(struct neighbour *n) +static void __neigh_notify(struct neighbour *n, int type, int flags) { struct sk_buff *skb; + int err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb == NULL) - return; + goto errout; - if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, NLM_F_REQUEST) <= 0) + err = neigh_fill_info(skb, n, 0, 0, type, flags); + if (err < 0) { kfree_skb(skb); - else { - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); + goto errout; } + + err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_NEIGH, err); +} + +void neigh_app_ns(struct neighbour *n) +{ + __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } static void neigh_app_notify(struct neighbour *n) { - struct sk_buff *skb; - - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); - if (skb == NULL) - return; - - if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) <= 0) - kfree_skb(skb); - else { - NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); - } + __neigh_notify(n, RTM_NEWNEIGH, 0); } #endif /* CONFIG_ARPD */ From dc738dd83e88c3c5de55431f8cfb758de5d4df48 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:33:35 -0700 Subject: [PATCH 0473/1063] [DECNET]: Convert DECnet notifications to use rtnl_notify() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 25 ++++++++++++++----------- net/decnet/dn_table.c | 28 ++++++++++++++-------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 88ea7a13bb24..01861feb608d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -746,20 +746,23 @@ rtattr_failure: static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + int payload = sizeof(struct ifaddrmsg) + 128; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); - return; - } - if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { + skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); + + err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 10e87262b6fb..317904bb5896 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -333,24 +333,24 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, { struct sk_buff *skb; u32 pid = req ? req->pid : 0; - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256); + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, - f->fn_type, f->fn_scope, &f->fn_key, z, - DN_FIB_INFO(f), 0) < 0) { + err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + f->fn_type, f->fn_scope, &f->fn_key, z, + DN_FIB_INFO(f), 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; - if (nlh->nlmsg_flags & NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); - if (nlh->nlmsg_flags & NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + + err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, From d6062cbbd1f5e92c94e5eae9ef1a280ed48d56d5 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:33:59 -0700 Subject: [PATCH 0474/1063] [IPv4] address: Convert address notification to use rtnl_notify() Adds support for NLM_F_ECHO allowing applications to easly see which address have been deleted, added, or promoted. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 53 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 398e7b9ca66b..0487677729cf 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -88,7 +88,7 @@ static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { [IFA_LABEL] = { .type = NLA_STRING }, }; -static void rtmsg_ifa(int event, struct in_ifaddr *); +static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, @@ -239,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) return 0; } -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy) +static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy, struct nlmsghdr *nlh, u32 pid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -273,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, if (!do_promote) { *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -298,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -310,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { @@ -329,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } -static int inet_insert_ifa(struct in_ifaddr *ifa) +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy) +{ + __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); +} + +static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, + u32 pid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -374,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; } +static int inet_insert_ifa(struct in_ifaddr *ifa) +{ + return __inet_insert_ifa(ifa, NULL, 0); +} + static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get_rtnl(dev); @@ -466,7 +478,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa))) continue; - inet_del_ifa(in_dev, ifap, 1); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); return 0; } @@ -558,7 +570,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return inet_insert_ifa(ifa); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); } /* @@ -1189,18 +1201,27 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa) +static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, + u32 pid) { struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); - else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { + goto errout; + + err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); - } else - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); + goto errout; + } + + err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { From f21c7bc5f6a0a5bd03988886ff46656bc3f255b7 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:34:17 -0700 Subject: [PATCH 0475/1063] [IPv4] route: Convert route notifications to use rtnl_notify() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ab753df20a39..5dfdad5cbcd4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -44,6 +43,7 @@ #include #include #include +#include #include "fib_lookup.h" @@ -278,25 +278,25 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, { struct sk_buff *skb; u32 pid = req ? req->pid : n->nlmsg_pid; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + int payload = sizeof(struct rtmsg) + 256; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; + skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + if (skb == NULL) + goto errout; - if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, z, - fa->fa_tos, - fa->fa_info, 0) < 0) { + err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, + fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos, + fa->fa_info, 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; - if (n->nlmsg_flags&NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); - if (n->nlmsg_flags&NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + + err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with From 5d620266431c03d1dac66287367c6da26c64a069 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:35:02 -0700 Subject: [PATCH 0476/1063] [IPv6] address: Convert address notification to use rtnl_notify() Fixes a wrong use of current->pid as netlink pid. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4f991a2234d0..81e9ef14676f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include @@ -3280,20 +3281,23 @@ out_free: static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); - return; - } - if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, From 21713ebc4f119950e87d21c4637d5a750eea20e8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:35:24 -0700 Subject: [PATCH 0477/1063] [IPv6] route: Convert route notifications to use rtnl_notify() Fixes a wrong use of current->pid as netlink pid. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/route.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 024c8e26c2ec..1aca787ead85 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #ifdef CONFIG_PROC_FS @@ -54,6 +53,7 @@ #include #include #include +#include #include @@ -2056,27 +2056,25 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - u32 pid = current->pid; - u32 seq = 0; + u32 pid = req ? req->pid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int payload = sizeof(struct rtmsg) + 256; + int err = -ENOBUFS; - if (req) - pid = req->pid; - if (nlh) - seq = nlh->nlmsg_seq; - - skb = alloc_skb(size, gfp_any()); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); - return; - } - if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); + if (skb == NULL) + goto errout; + + err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); + + err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); } /* From 8d7a76c9b17866f426fcbb531c81af7a1f53e071 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:35:47 -0700 Subject: [PATCH 0478/1063] [IPv6] link: Convert link notifications to use rtnl_notify() Fixes a wrong use of current->pid as netlink pid. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 81e9ef14676f..2a3be0f1c516 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3438,20 +3438,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); + int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); - return; - } - if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); } /* Maximum length of prefix_cacheinfo attributes */ From 8c384bfa36b1dbeba8154da20d49167ce3e275c4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:36:07 -0700 Subject: [PATCH 0479/1063] [IPv6] prefix: Convert prefix notifications to use rtnl_notify() Fixes a wrong use of current->pid as netlink pid. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2a3be0f1c516..4af741ef8d6b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3506,20 +3506,23 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); + int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE; + int err = -ENOBUFS; - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); - return; - } - if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); + + err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { From 280a306c539389156477cc9c07028d43fe4fbf86 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:36:28 -0700 Subject: [PATCH 0480/1063] [BRIDGE]: Convert notifications to use rtnl_notify() Fixes a wrong use of current->pid as netlink pid. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 53086fb75089..8f661195d09d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -12,6 +12,7 @@ #include #include +#include #include "br_private.h" /* @@ -76,26 +77,24 @@ rtattr_failure: void br_ifinfo_notify(int event, struct net_bridge_port *port) { struct sk_buff *skb; - int err = -ENOMEM; + int payload = sizeof(struct ifinfomsg) + 128; + int err = -ENOBUFS; pr_debug("bridge notify event=%d\n", event); - skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), - GFP_ATOMIC); - if (!skb) - goto err_out; + skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + if (skb == NULL) + goto errout; - err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); +errout: if (err < 0) - goto err_kfree; - - NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); - return; - -err_kfree: - kfree_skb(skb); -err_out: - netlink_set_err(rtnl, 0, RTNLGRP_LINK, err); + rtnl_set_sk_err(RTNLGRP_LINK, err); } /* From bd5785ba3ac1c89aa4c351ceb2acd96686424d8c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:36:49 -0700 Subject: [PATCH 0481/1063] [WIRELESS]: Convert notifications to use rtnl_notify() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/wireless.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/wireless.c b/net/core/wireless.c index 348b9da73cc4..3168fca312f7 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -85,6 +85,7 @@ #include /* Pretty obvious */ #include /* New driver API */ +#include #include /* copy_to_user() */ @@ -1849,7 +1850,7 @@ static void wireless_nlevent_process(unsigned long data) struct sk_buff *skb; while ((skb = skb_dequeue(&wireless_nlevent_queue))) - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); From 0ec6d3f467faeec5dd3b617959eb90e9d520113d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:37:09 -0700 Subject: [PATCH 0482/1063] [NET] link: Convert notifications to use rtnl_notify() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2b1af17e6389..f5300b5dd0fd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -630,20 +630,22 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + - sizeof(struct rtnl_link_ifmap) + - sizeof(struct rtnl_link_stats) + 128); + int err = -ENOBUFS; - skb = nlmsg_new(size, GFP_KERNEL); - if (!skb) - return; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0) < 0) { + err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); + + err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ From 56fc85ac961e2c20dcb5ef07e2628b3f93de2e49 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:37:29 -0700 Subject: [PATCH 0483/1063] [RTNETLINK]: Unexport rtnl socket Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 -- net/core/rtnetlink.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ecbe0349060c..9c92dc8b9a08 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -574,8 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) -extern struct sock *rtnl; - struct rtnetlink_link { int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5300b5dd0fd..dfc58269240a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -58,6 +58,7 @@ #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ static DEFINE_MUTEX(rtnl_mutex); +static struct sock *rtnl; void rtnl_lock(void) { @@ -95,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } -struct sock *rtnl; - struct rtnetlink_link * rtnetlink_links[NPROTO]; static const int rtm_min[RTM_NR_FAMILIES] = @@ -842,7 +841,6 @@ EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); EXPORT_SYMBOL(rtnetlink_links); EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); From ab32ea5d8a760e7dd4339634e95d7be24ee5b842 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 22 Sep 2006 14:15:41 -0700 Subject: [PATCH 0484/1063] [NET/IPV4/IPV6]: Change some sysctl variables to __read_mostly Change net/core, ipv4 and ipv6 sysctl variables to __read_mostly. Couldn't actually measure any performance increase while testing (.3% I consider noise), but seems like the right thing to do. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 +- net/core/sock.c | 10 +++++----- net/ipv4/af_inet.c | 4 ++-- net/ipv4/icmp.c | 12 ++++++------ net/ipv4/igmp.c | 4 ++-- net/ipv4/ip_fragment.c | 10 +++++----- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 32 ++++++++++++++++---------------- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv4/tcp_output.c | 12 ++++++------ net/ipv4/tcp_timer.c | 16 ++++++++-------- net/ipv6/addrconf.c | 6 +++--- net/ipv6/af_inet6.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/mcast.c | 2 +- net/ipv6/reassembly.c | 8 ++++---- 18 files changed, 67 insertions(+), 67 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 23ae5e5426db..c7e653ff5ed0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2451,7 +2451,7 @@ static struct neigh_sysctl_table { ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; ctl_table neigh_root_dir[2]; -} neigh_sysctl_template = { +} neigh_sysctl_template __read_mostly = { .neigh_vars = { { .ctl_name = NET_NEIGH_MCAST_SOLICIT, diff --git a/net/core/sock.c b/net/core/sock.c index b67d868649cd..cfaf09039b02 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -187,13 +187,13 @@ static struct lock_class_key af_callback_keys[AF_MAX]; #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) /* Run time adjustable parameters. */ -__u32 sysctl_wmem_max = SK_WMEM_MAX; -__u32 sysctl_rmem_max = SK_RMEM_MAX; -__u32 sysctl_wmem_default = SK_WMEM_MAX; -__u32 sysctl_rmem_default = SK_RMEM_MAX; +__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; +__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; /* Maximal space eaten by iovec or ancilliary data plus some space */ -int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); +int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 36c38bffb474..f2e8927f4596 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -391,7 +391,7 @@ int inet_release(struct socket *sock) } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind; +int sysctl_ip_nonlocal_bind __read_mostly; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -987,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p) * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr; +int sysctl_ip_dynaddr __read_mostly; static int inet_sk_reselect_saddr(struct sock *sk) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6d223e5c6741..c2ad07e48ab4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { }; /* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts = 1; +int sysctl_icmp_echo_ignore_all __read_mostly; +int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses = 1; +int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; /* * Configurable global rate limit. @@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1; * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit = 1 * HZ; -int sysctl_icmp_ratemask = 0x1818; -int sysctl_icmp_errors_use_inbound_ifaddr; +int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; +int sysctl_icmp_ratemask __read_mostly = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; /* * ICMP control array. This specifies what to do with each ICMP. diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7003e763d970..58be8227b0cb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf = IP_MAX_MSF; +int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; +int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d7f107c2eef..165d72859ddf 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -54,15 +54,15 @@ * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */ -int sysctl_ipfrag_high_thresh = 256*1024; -int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; -int sysctl_ipfrag_max_dist = 64; +int sysctl_ipfrag_max_dist __read_mostly = 64; /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ -int sysctl_ipfrag_time = IP_FRAG_TIME; +int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; struct ipfrag_skb_cb { @@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) } static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; +int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; static void ipfrag_secret_rebuild(unsigned long dummy) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1b9b6742ef77..81b2795a4c20 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,7 +83,7 @@ #include #include -int sysctl_ip_default_ttl = IPDEFTTL; +int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0124e69ab38..e570db4d33c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -268,7 +268,7 @@ #include #include -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 159fa3f1ba67..caf3c41dcc8c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -72,24 +72,24 @@ #include #include -int sysctl_tcp_timestamps = 1; -int sysctl_tcp_window_scaling = 1; -int sysctl_tcp_sack = 1; -int sysctl_tcp_fack = 1; -int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; -int sysctl_tcp_ecn; -int sysctl_tcp_dsack = 1; -int sysctl_tcp_app_win = 31; -int sysctl_tcp_adv_win_scale = 2; +int sysctl_tcp_timestamps __read_mostly = 1; +int sysctl_tcp_window_scaling __read_mostly = 1; +int sysctl_tcp_sack __read_mostly = 1; +int sysctl_tcp_fack __read_mostly = 1; +int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +int sysctl_tcp_ecn __read_mostly; +int sysctl_tcp_dsack __read_mostly = 1; +int sysctl_tcp_app_win __read_mostly = 31; +int sysctl_tcp_adv_win_scale __read_mostly = 2; -int sysctl_tcp_stdurg; -int sysctl_tcp_rfc1337; -int sysctl_tcp_max_orphans = NR_FILE; -int sysctl_tcp_frto; -int sysctl_tcp_nometrics_save; +int sysctl_tcp_stdurg __read_mostly; +int sysctl_tcp_rfc1337 __read_mostly; +int sysctl_tcp_max_orphans __read_mostly = NR_FILE; +int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_nometrics_save __read_mostly; -int sysctl_tcp_moderate_rcvbuf = 1; -int sysctl_tcp_abc; +int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; +int sysctl_tcp_abc __read_mostly; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2973dee0a489..23b46e36b147 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -78,8 +78,8 @@ #include #include -int sysctl_tcp_tw_reuse; -int sysctl_tcp_low_latency; +int sysctl_tcp_tw_reuse __read_mostly; +int sysctl_tcp_low_latency __read_mostly; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624e2b2c7f53..0163d9826907 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -34,8 +34,8 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_syncookies = SYNC_INIT; -int sysctl_tcp_abort_on_overflow; +int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; +int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9252a50c4b49..061edfae0c29 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -43,24 +43,24 @@ #include /* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse = 1; +int sysctl_tcp_retrans_collapse __read_mostly = 1; /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ -int sysctl_tcp_workaround_signed_windows = 0; +int sysctl_tcp_workaround_signed_windows __read_mostly = 0; /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ -int sysctl_tcp_tso_win_divisor = 3; +int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing = 0; -int sysctl_tcp_base_mss = 512; +int sysctl_tcp_mtu_probing __read_mostly = 0; +int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle = 1; +int sysctl_tcp_slow_start_after_idle __read_mostly = 1; static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7c1bde3cd6cb..fb09ade5897b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -23,14 +23,14 @@ #include #include -int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; -int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; -int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; -int sysctl_tcp_retries1 = TCP_RETR1; -int sysctl_tcp_retries2 = TCP_RETR2; -int sysctl_tcp_orphan_retries; +int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; +int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; +int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; +int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; +int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; +int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; +int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; +int sysctl_tcp_orphan_retries __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4af741ef8d6b..f1ede9004887 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -146,7 +146,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf = { +struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -177,7 +177,7 @@ struct ipv6_devconf ipv6_devconf = { #endif }; -static struct ipv6_devconf ipv6_devconf_dflt = { +static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -3665,7 +3665,7 @@ static struct addrconf_sysctl_table ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; ctl_table addrconf_root_dir[2]; -} addrconf_sysctl = { +} addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 82a1b1a328db..2ff600cfe3a4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -67,7 +67,7 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -int sysctl_ipv6_bindv6only; +int sysctl_ipv6_bindv6only __read_mostly; /* The inetsw table contains everything that inet_create needs to * build a new socket. diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 103055107674..e3a8e27af950 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb) return 0; } -static int sysctl_icmpv6_time = 1*HZ; +static int sysctl_icmpv6_time __read_mostly = 1*HZ; /* * Check the ICMP output rate limit diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 639eb20c9f1f..3b114e3fa2f8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IPV6_MLD_MAX_MSF 64 -int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; +int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; /* * socket join on multicast group diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index a8623d2b0879..f39bbedd1327 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -53,10 +53,10 @@ #include #include -int sysctl_ip6frag_high_thresh = 256*1024; -int sysctl_ip6frag_low_thresh = 192*1024; +int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; +int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; -int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; +int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; struct ip6frag_skb_cb { @@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, } static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; +int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; static void ip6_frag_secret_rebuild(unsigned long dummy) { From 4e902c57417c4c285b98ba2722468d1c3ed83d1b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 17 Aug 2006 18:14:52 -0700 Subject: [PATCH 0485/1063] [IPv4]: FIB configuration using struct fib_config Introduces struct fib_config replacing the ugly struct kern_rta prone to ordering issues. Avoids creating faked netlink messages for auto generated routes or requests via ioctl. A new interface net/nexthop.h is added to help navigate through nexthop configuration arrays. A new struct nl_info will be used to carry the necessary netlink information to be used for notifications later on. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 55 +++--- include/net/netlink.h | 10 + include/net/nexthop.h | 33 ++++ net/ipv4/fib_frontend.c | 364 ++++++++++++++++++++++++++++++------- net/ipv4/fib_hash.c | 94 +++++----- net/ipv4/fib_lookup.h | 11 +- net/ipv4/fib_semantics.c | 383 ++++++++++++--------------------------- net/ipv4/fib_trie.c | 76 ++++---- 8 files changed, 559 insertions(+), 467 deletions(-) create mode 100644 include/net/nexthop.h diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8e9ba563d342..42ed96fab3f5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -20,25 +20,32 @@ #include #include -/* WARNING: The ordering of these elements must match ordering - * of RTA_* rtnetlink attribute numbers. - */ -struct kern_rta { - void *rta_dst; - void *rta_src; - int *rta_iif; - int *rta_oif; - void *rta_gw; - u32 *rta_priority; - void *rta_prefsrc; - struct rtattr *rta_mx; - struct rtattr *rta_mp; - unsigned char *rta_protoinfo; - u32 *rta_flow; - struct rta_cacheinfo *rta_ci; - struct rta_session *rta_sess; - u32 *rta_mp_alg; -}; +struct fib_config { + u8 fc_family; + u8 fc_dst_len; + u8 fc_src_len; + u8 fc_tos; + u8 fc_protocol; + u8 fc_scope; + u8 fc_type; + /* 1 byte unused */ + u32 fc_table; + u32 fc_dst; + u32 fc_src; + u32 fc_gw; + int fc_oif; + u32 fc_flags; + u32 fc_priority; + u32 fc_prefsrc; + struct nlattr *fc_mx; + struct rtnexthop *fc_mp; + int fc_mx_len; + int fc_mp_len; + u32 fc_flow; + u32 fc_mp_alg; + u32 fc_nlflags; + struct nl_info fc_nlinfo; + }; struct fib_info; @@ -154,12 +161,8 @@ struct fib_table { u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); - int (*tb_insert)(struct fib_table *table, struct rtmsg *r, - struct kern_rta *rta, struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*tb_delete)(struct fib_table *table, struct rtmsg *r, - struct kern_rta *rta, struct nlmsghdr *n, - struct netlink_skb_parms *req); + int (*tb_insert)(struct fib_table *, struct fib_config *); + int (*tb_delete)(struct fib_table *, struct fib_config *); int (*tb_dump)(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int (*tb_flush)(struct fib_table *table); @@ -228,8 +231,6 @@ struct rtentry; extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); -extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r); extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ diff --git a/include/net/netlink.h b/include/net/netlink.h index bf593eb59e1b..47044da167c5 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -192,6 +192,16 @@ struct nla_policy { u16 minlen; }; +/** + * struct nl_info - netlink source information + * @nlh: Netlink message header of original request + * @pid: Netlink PID of requesting application + */ +struct nl_info { + struct nlmsghdr *nlh; + u32 pid; +}; + extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)); diff --git a/include/net/nexthop.h b/include/net/nexthop.h new file mode 100644 index 000000000000..3334dbfa5aa4 --- /dev/null +++ b/include/net/nexthop.h @@ -0,0 +1,33 @@ +#ifndef __NET_NEXTHOP_H +#define __NET_NEXTHOP_H + +#include +#include + +static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) +{ + return remaining >= sizeof(*rtnh) && + rtnh->rtnh_len >= sizeof(*rtnh) && + rtnh->rtnh_len <= remaining; +} + +static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, + int *remaining) +{ + int totlen = NLA_ALIGN(rtnh->rtnh_len); + + *remaining -= totlen; + return (struct rtnexthop *) ((char *) rtnh + totlen); +} + +static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) +{ + return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); +} + +static inline int rtnh_attrlen(const struct rtnexthop *rtnh) +{ + return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); +} + +#endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ad4c14f968a1..acc18bdf2dee 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -253,42 +253,190 @@ e_inval: #ifndef CONFIG_IP_NOSIOCRT +static inline u32 sk_extract_addr(struct sockaddr *addr) +{ + return ((struct sockaddr_in *) addr)->sin_addr.s_addr; +} + +static int put_rtax(struct nlattr *mx, int len, int type, u32 value) +{ + struct nlattr *nla; + + nla = (struct nlattr *) ((char *) mx + len); + nla->nla_type = type; + nla->nla_len = nla_attr_size(4); + *(u32 *) nla_data(nla) = value; + + return len + nla_total_size(4); +} + +static int rtentry_to_fib_config(int cmd, struct rtentry *rt, + struct fib_config *cfg) +{ + u32 addr; + int plen; + + memset(cfg, 0, sizeof(*cfg)); + + if (rt->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Check mask for validity: + * a) it must be contiguous. + * b) destination must have all host bits clear. + * c) if application forgot to set correct family (AF_INET), + * reject request unless it is absolutely clear i.e. + * both family and mask are zero. + */ + plen = 32; + addr = sk_extract_addr(&rt->rt_dst); + if (!(rt->rt_flags & RTF_HOST)) { + u32 mask = sk_extract_addr(&rt->rt_genmask); + + if (rt->rt_genmask.sa_family != AF_INET) { + if (mask || rt->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + + if (bad_mask(mask, addr)) + return -EINVAL; + + plen = inet_mask_len(mask); + } + + cfg->fc_dst_len = plen; + cfg->fc_dst = addr; + + if (cmd != SIOCDELRT) { + cfg->fc_nlflags = NLM_F_CREATE; + cfg->fc_protocol = RTPROT_BOOT; + } + + if (rt->rt_metric) + cfg->fc_priority = rt->rt_metric - 1; + + if (rt->rt_flags & RTF_REJECT) { + cfg->fc_scope = RT_SCOPE_HOST; + cfg->fc_type = RTN_UNREACHABLE; + return 0; + } + + cfg->fc_scope = RT_SCOPE_NOWHERE; + cfg->fc_type = RTN_UNICAST; + + if (rt->rt_dev) { + char *colon; + struct net_device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + + devname[IFNAMSIZ-1] = 0; + colon = strchr(devname, ':'); + if (colon) + *colon = 0; + dev = __dev_get_by_name(devname); + if (!dev) + return -ENODEV; + cfg->fc_oif = dev->ifindex; + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + cfg->fc_prefsrc = ifa->ifa_local; + } + } + + addr = sk_extract_addr(&rt->rt_gateway); + if (rt->rt_gateway.sa_family == AF_INET && addr) { + cfg->fc_gw = addr; + if (rt->rt_flags & RTF_GATEWAY && + inet_addr_type(addr) == RTN_UNICAST) + cfg->fc_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + return -EINVAL; + + if (cfg->fc_scope == RT_SCOPE_NOWHERE) + cfg->fc_scope = RT_SCOPE_LINK; + + if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { + struct nlattr *mx; + int len = 0; + + mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + + if (rt->rt_flags & RTF_MTU) + len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); + + if (rt->rt_flags & RTF_WINDOW) + len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); + + if (rt->rt_flags & RTF_IRTT) + len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); + + cfg->fc_mx = mx; + cfg->fc_mx_len = len; + } + + return 0; +} + /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void __user *arg) { + struct fib_config cfg; + struct rtentry rt; int err; - struct kern_rta rta; - struct rtentry r; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&r, arg, sizeof(struct rtentry))) + + if (copy_from_user(&rt, arg, sizeof(rt))) return -EFAULT; + rtnl_lock(); - err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); + err = rtentry_to_fib_config(cmd, &rt, &cfg); if (err == 0) { + struct fib_table *tb; + if (cmd == SIOCDELRT) { - struct fib_table *tb = fib_get_table(req.rtm.rtm_table); - err = -ESRCH; + tb = fib_get_table(cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_delete(tb, &cfg); + else + err = -ESRCH; } else { - struct fib_table *tb = fib_new_table(req.rtm.rtm_table); - err = -ENOBUFS; + tb = fib_new_table(cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_insert(tb, &cfg); + else + err = -ENOBUFS; } - kfree(rta.rta_mx); + + /* allocated by rtentry_to_fib_config() */ + kfree(cfg.fc_mx); } rtnl_unlock(); return err; @@ -305,51 +453,134 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) -{ - int i; +static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .minlen = sizeof(struct rtnexthop) }, + [RTA_PROTOINFO] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, +}; - for (i=1; i<=RTA_MAX; i++, rta++) { - struct rtattr *attr = *rta; - if (attr) { - if (RTA_PAYLOAD(attr) < 4) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - *rta = (struct rtattr*)RTA_DATA(attr); +static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib_config *cfg) +{ + struct nlattr *attr; + int err, remaining; + struct rtmsg *rtm; + + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + memset(cfg, 0, sizeof(*cfg)); + + rtm = nlmsg_data(nlh); + cfg->fc_family = rtm->rtm_family; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_tos = rtm->rtm_tos; + cfg->fc_table = rtm->rtm_table; + cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_scope = rtm->rtm_scope; + cfg->fc_type = rtm->rtm_type; + cfg->fc_flags = rtm->rtm_flags; + cfg->fc_nlflags = nlh->nlmsg_flags; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { + switch (attr->nla_type) { + case RTA_DST: + cfg->fc_dst = nla_get_u32(attr); + break; + case RTA_SRC: + cfg->fc_src = nla_get_u32(attr); + break; + case RTA_OIF: + cfg->fc_oif = nla_get_u32(attr); + break; + case RTA_GATEWAY: + cfg->fc_gw = nla_get_u32(attr); + break; + case RTA_PRIORITY: + cfg->fc_priority = nla_get_u32(attr); + break; + case RTA_PREFSRC: + cfg->fc_prefsrc = nla_get_u32(attr); + break; + case RTA_METRICS: + cfg->fc_mx = nla_data(attr); + cfg->fc_mx_len = nla_len(attr); + break; + case RTA_MULTIPATH: + cfg->fc_mp = nla_data(attr); + cfg->fc_mp_len = nla_len(attr); + break; + case RTA_FLOW: + cfg->fc_flow = nla_get_u32(attr); + break; + case RTA_MP_ALGO: + cfg->fc_mp_alg = nla_get_u32(attr); + break; + case RTA_TABLE: + cfg->fc_table = nla_get_u32(attr); + break; } } + return 0; +errout: + return err; } int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); - if (tb) - return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ESRCH; + tb = fib_get_table(cfg.fc_table); + if (tb == NULL) { + err = -ESRCH; + goto errout; + } + + err = tb->tb_delete(tb, &cfg); +errout: + return err; } int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); - if (tb) - return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ENOBUFS; + tb = fib_new_table(cfg.fc_table); + if (tb == NULL) { + err = -ENOBUFS; + goto errout; + } + + err = tb->tb_insert(tb, &cfg); +errout: + return err; } int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) @@ -396,17 +627,19 @@ out: only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, u32 dst, int dst_len, + struct in_ifaddr *ifa) { - struct fib_table * tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct kern_rta rta; - - memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); + struct fib_table *tb; + struct fib_config cfg = { + .fc_protocol = RTPROT_KERNEL, + .fc_type = type, + .fc_dst = dst, + .fc_dst_len = dst_len, + .fc_prefsrc = ifa->ifa_local, + .fc_oif = ifa->ifa_dev->dev->ifindex, + .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + }; if (type == RTN_UNICAST) tb = fib_new_table(RT_TABLE_MAIN); @@ -416,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr if (tb == NULL) return; - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; + cfg.fc_table = tb->tb_id; - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->tb_id; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + if (type != RTN_LOCAL) + cfg.fc_scope = RT_SCOPE_LINK; + else + cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_insert(tb, &cfg); else - tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b5bee1a71e5c..357557549ce5 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -static int -fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f, *f; struct fib_alias *fa, *new_fa; struct fn_zone *fz; struct fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + u8 tos = cfg->fc_tos; u32 key; int err; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - fz = table->fn_zones[z]; - if (!fz && !(fz = fn_new_zone(table, z))) + + fz = table->fn_zones[cfg->fc_dst_len]; + if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) return -ENOBUFS; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } - if ((fi = fib_create_info(r, rta, n, &err)) == NULL) - return err; + fi = fib_create_info(cfg); + if (IS_ERR(fi)) + return PTR_ERR(fi); if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && - (z==32 || (1< fz->fz_divisor)) + (cfg->fc_dst_len == 32 || + (1 << cfg->fc_dst_len) > fz->fz_divisor)) fn_rehash_zone(fz); f = fib_find_node(fz, key); @@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (n->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + fa->fa_type = cfg->fc_type; + fa->fa_scope = cfg->fc_scope; state = fa->fa_state; fa->fa_state &= ~FA_S_ACCESSED; fib_hash_genid++; @@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) goto out; } - if (!(n->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(n->nlmsg_flags&NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* @@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, + &cfg->fc_nlinfo); return 0; out_free_new_fa: @@ -537,30 +535,25 @@ out: } -static int -fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; - int z = r->rtm_dst_len; struct fn_zone *fz; u32 key; - u8 tos = r->rtm_tos; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - if ((fz = table->fn_zones[z]) == NULL) + + if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) return -ESRCH; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } f = fib_find_node(fz, key); @@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!f) fa = NULL; else - fa = fib_find_alias(&f->fn_alias, tos, 0); + fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); if (!fa) return -ESRCH; @@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if (fa->fa_tos != cfg->fc_tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, n, rta, fi) == 0) { + if ((!cfg->fc_type || + fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, + tb->tb_id, &cfg->fc_nlinfo); kill_fn = 0; write_lock_bh(&fib_hash_lock); diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ddd52496b451..d6d1a89e4003 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head, struct fib_result *res, __u32 zone, __u32 mask, int prefixlen); extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(const struct rtmsg *r, - struct kern_rta *rta, - const struct nlmsghdr *, - int *err); -extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, - struct kern_rta *rta, struct fib_info *fi); +extern struct fib_info *fib_create_info(struct fib_config *cfg); +extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, u32 tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); + int dst_len, u32 tb_id, struct nl_info *info); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5dfdad5cbcd4..340f9db389e5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "fib_lookup.h" @@ -273,27 +274,27 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) } void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, u32 tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req) + int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : n->nlmsg_pid; int payload = sizeof(struct rtmsg) + 256; + u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; int err = -ENOBUFS; skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); if (skb == NULL) goto errout; - err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos, - fa->fa_info, 0); + err = fib_dump_info(skb, info->pid, seq, event, tb_id, + fa->fa_type, fa->fa_scope, &key, dst_len, + fa->fa_tos, fa->fa_info, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL); + err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); errout: if (err < 0) rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); @@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) -{ - while (RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(u32*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - return 0; -} - -static int -fib_count_nexthops(struct rtattr *rta) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) { int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - while (nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(rtnh, remaining)) { nhs++; - nhp = RTNH_NEXT(nhp); - }; - return nhs; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; } -static int -fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { - nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + + nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nh->nh_oif = rtnh->rtnh_ifindex; + nh->nh_weight = rtnh->rtnh_hops + 1; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + nh->nh_gw = nla ? nla_get_u32(nla) : 0; #ifdef CONFIG_NET_CLS_ROUTE - nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + nla = nla_find(attrs, attrlen, RTA_FLOW); + nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); + return 0; } #endif -int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, - struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - struct rtnexthop *nhp; - int nhlen; + struct rtnexthop *rtnh; + int remaining; #endif - if (rta->rta_priority && - *rta->rta_priority != fi->fib_priority) + if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + if (cfg->fc_oif || cfg->fc_gw) { + if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && + (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp == NULL) + if (cfg->fc_mp == NULL) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + + rtnh = cfg->fc_mp; + remaining = cfg->fc_mp_len; for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - u32 gw; + int attrlen; - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) return 1; - if (attrlen) { - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); - if (gw && gw != nh->nh_gw) + + attrlen = rtnh_attrlen(rtnh); + if (attrlen < 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla && nla_get_u32(nla) != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); - if (gw && gw != nh->nh_tclassid) + nla = nla_find(attrs, attrlen, RTA_FLOW); + if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; @@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, |-> {local prefix} (terminal node) */ -static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, + struct fib_nh *nh) { int err; @@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; - if (r->rtm_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; @@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n return 0; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = nh->nh_gw, - .scope = r->rtm_scope + 1 } }, - .oif = nh->nh_oif }; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = nh->nh_gw, + .scope = cfg->fc_scope + 1, + }, + }, + .oif = nh->nh_oif, + }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) @@ -646,39 +651,28 @@ static void fib_hash_move(struct hlist_head *new_info_hash, fib_hash_free(old_laddrhash, bytes); } -struct fib_info * -fib_create_info(const struct rtmsg *r, struct kern_rta *rta, - const struct nlmsghdr *nlh, int *errp) +struct fib_info *fib_create_info(struct fib_config *cfg) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; -#ifdef CONFIG_IP_ROUTE_MULTIPATH int nhs = 1; -#else - const int nhs = 1; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 mp_alg = IP_MP_ALG_NONE; -#endif /* Fast check to catch the most weird cases */ - if (fib_props[r->rtm_type].scope > r->rtm_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp) { - nhs = fib_count_nexthops(rta->rta_mp); + if (cfg->fc_mp) { + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); if (nhs == 0) goto err_inval; } #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rta->rta_mp_alg) { - mp_alg = *rta->rta_mp_alg; - - if (mp_alg < IP_MP_ALG_NONE || - mp_alg > IP_MP_ALG_MAX) + if (cfg->fc_mp_alg) { + if (cfg->fc_mp_alg < IP_MP_ALG_NONE || + cfg->fc_mp_alg > IP_MP_ALG_MAX) goto err_inval; } #endif @@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; fib_info_cnt++; - fi->fib_protocol = r->rtm_protocol; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_flags = cfg->fc_flags; + fi->fib_priority = cfg->fc_priority; + fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) - fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) goto err_inval; - fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); + fi->fib_metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); - if (rta->rta_mp) { + if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + if (err != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif #else @@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, #endif } else { struct fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 4); + + nh->nh_oif = cfg->fc_oif; + nh->nh_gw = cfg->fc_gw; + nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow) - memcpy(&nh->nh_tclassid, rta->rta_flow, 4); + nh->nh_tclassid = cfg->fc_flow; #endif - nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = mp_alg; + fi->fib_mp_alg = cfg->fc_mp_alg; #endif - if (fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (fib_props[cfg->fc_type].error) { + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; } - if (r->rtm_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) goto err_inval; - if (r->rtm_scope == RT_SCOPE_HOST) { + if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(r, fi, nh)) != 0) + if ((err = fib_check_nh(cfg, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fi->fib_prefsrc != cfg->fc_dst) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -846,12 +837,12 @@ err_inval: err = -EINVAL; failure: - *errp = err; if (fi) { fi->fib_dead = 1; free_fib_info(fi); } - return NULL; + + return ERR_PTR(err); } /* Note! fib_semantic_match intentionally uses RCU list functions. */ @@ -1012,150 +1003,6 @@ rtattr_failure: return -1; } -#ifndef CONFIG_IP_NOSIOCRT - -int -fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r) -{ - int plen; - u32 *ptr; - - memset(rtm, 0, sizeof(*rtm)); - memset(rta, 0, sizeof(*rta)); - - if (r->rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - - /* Check mask for validity: - a) it must be contiguous. - b) destination must have all host bits clear. - c) if application forgot to set correct family (AF_INET), - reject request unless it is absolutely clear i.e. - both family and mask are zero. - */ - plen = 32; - ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; - if (!(r->rt_flags&RTF_HOST)) { - u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; - if (r->rt_genmask.sa_family != AF_INET) { - if (mask || r->rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - if (bad_mask(mask, *ptr)) - return -EINVAL; - plen = inet_mask_len(mask); - } - - nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = 0; - nl->nlmsg_seq = 0; - nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); - if (cmd == SIOCDELRT) { - nl->nlmsg_type = RTM_DELROUTE; - nl->nlmsg_flags = 0; - } else { - nl->nlmsg_type = RTM_NEWROUTE; - nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; - rtm->rtm_protocol = RTPROT_BOOT; - } - - rtm->rtm_dst_len = plen; - rta->rta_dst = ptr; - - if (r->rt_metric) { - *(u32*)&r->rt_pad3 = r->rt_metric - 1; - rta->rta_priority = (u32*)&r->rt_pad3; - } - if (r->rt_flags&RTF_REJECT) { - rtm->rtm_scope = RT_SCOPE_HOST; - rtm->rtm_type = RTN_UNREACHABLE; - return 0; - } - rtm->rtm_scope = RT_SCOPE_NOWHERE; - rtm->rtm_type = RTN_UNICAST; - - if (r->rt_dev) { - char *colon; - struct net_device *dev; - char devname[IFNAMSIZ]; - - if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) - return -EFAULT; - devname[IFNAMSIZ-1] = 0; - colon = strchr(devname, ':'); - if (colon) - *colon = 0; - dev = __dev_get_by_name(devname); - if (!dev) - return -ENODEV; - rta->rta_oif = &dev->ifindex; - if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - return -ENODEV; - *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) - if (strcmp(ifa->ifa_label, devname) == 0) - break; - if (ifa == NULL) - return -ENODEV; - rta->rta_prefsrc = &ifa->ifa_local; - } - } - - ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; - if (r->rt_gateway.sa_family == AF_INET && *ptr) { - rta->rta_gw = ptr; - if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) - rtm->rtm_scope = RT_SCOPE_UNIVERSE; - } - - if (cmd == SIOCDELRT) - return 0; - - if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) - return -EINVAL; - - if (rtm->rtm_scope == RT_SCOPE_NOWHERE) - rtm->rtm_scope = RT_SCOPE_LINK; - - if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { - struct rtattr *rec; - struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); - if (mx == NULL) - return -ENOMEM; - rta->rta_mx = mx; - mx->rta_type = RTA_METRICS; - mx->rta_len = RTA_LENGTH(0); - if (r->rt_flags&RTF_MTU) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_ADVMSS; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; - } - if (r->rt_flags&RTF_WINDOW) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_WINDOW; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_window; - } - if (r->rt_flags&RTF_IRTT) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_RTT; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; - } - } - return 0; -} - -#endif - /* Update FIB if: - local address disappeared -> we must delete all the entries diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2a580eb2579b..41bef0a88ab6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1124,17 +1124,14 @@ err: return fa_head; } -static int -fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; struct list_head *fa_head = NULL; struct fib_info *fi; - int plen = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; u32 key, mask; int err; struct leaf *l; @@ -1142,11 +1139,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); @@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = key & mask; - fi = fib_create_info(r, rta, nlhdr, &err); - - if (!fi) + fi = fib_create_info(cfg); + if (IS_ERR(fi)) { + err = PTR_ERR(fi); goto err; + } l = fib_find_node(t, key); fa = NULL; @@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (nlhdr->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; @@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fi_drop = fa->fa_info; new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state &= ~FA_S_ACCESSED; @@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { goto out; } } - if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + &cfg->fc_nlinfo); succeeded: return 0; @@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key) return 1; } -static int -fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; - int plen = r->rtm_dst_len; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; - if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) @@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (fa->fa_tos != tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, nlhdr, rta, fi) == 0) { + if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, return -ESRCH; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, + &cfg->fc_nlinfo); l = fib_find_node(t, key); li = find_leaf_info(l, plen); From be403ea1856f1428b5912b42184acbba808c41d6 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 17 Aug 2006 18:15:17 -0700 Subject: [PATCH 0486/1063] [IPv4]: Convert FIB dumping to use new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 4 +- net/ipv4/fib_hash.c | 2 +- net/ipv4/fib_lookup.h | 2 +- net/ipv4/fib_semantics.c | 88 ++++++++++++++++++++++------------------ net/ipv4/fib_trie.c | 2 +- net/ipv4/route.c | 68 +++++++++++++++---------------- 6 files changed, 86 insertions(+), 80 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index acc18bdf2dee..d537c933abe3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -591,8 +591,8 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; int dumped = 0; - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && + ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) return ip_rt_dump(skb, cb); s_h = cb->args[0]; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 357557549ce5..88133b383dc5 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -693,7 +693,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, tb->tb_id, fa->fa_type, fa->fa_scope, - &f->fn_key, + f->fn_key, fz->fz_order, fa->fa_tos, fa->fa_info, diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index d6d1a89e4003..fd6f7769f8ab 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -26,7 +26,7 @@ extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, void *dst, + u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 340f9db389e5..2ead09543f68 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, goto errout; err = fib_dump_info(skb, info->pid, seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, dst_len, + fa->fa_type, fa->fa_scope, key, dst_len, fa->fa_tos, fa->fa_info, 0); if (err < 0) { kfree_skb(skb); @@ -928,79 +928,87 @@ u32 __fib_res_prefsrc(struct fib_result *res) return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } -int -fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, - struct fib_info *fi, unsigned int flags) +int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, + u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos, + struct fib_info *fi, unsigned int flags) { + struct nlmsghdr *nlh; struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -ENOBUFS; + + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; - RTA_PUT_U32(skb, RTA_TABLE, tb_id); + NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; - if (rtm->rtm_dst_len) - RTA_PUT(skb, RTA_DST, 4, dst); rtm->rtm_protocol = fi->fib_protocol; + + if (rtm->rtm_dst_len) + NLA_PUT_U32(skb, RTA_DST, dst); + if (fi->fib_priority) - RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); + NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); + if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + if (fi->fib_prefsrc) - RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); + NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc); + if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); + NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); + if (fi->fib_nh->nh_oif) - RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); + NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); #ifdef CONFIG_NET_CLS_ROUTE if (fi->fib_nh[0].nh_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); + NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { - struct rtnexthop *nhp; - struct rtattr *mp_head; - if (skb_tailroom(skb) <= RTA_SPACE(0)) - goto rtattr_failure; - mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); + struct rtnexthop *rtnh; + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (mp == NULL) + goto nla_put_failure; for_nexthops(fi) { - if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) - goto rtattr_failure; - nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); - nhp->rtnh_flags = nh->nh_flags & 0xFF; - nhp->rtnh_hops = nh->nh_weight-1; - nhp->rtnh_ifindex = nh->nh_oif; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (rtnh == NULL) + goto nla_put_failure; + + rtnh->rtnh_flags = nh->nh_flags & 0xFF; + rtnh->rtnh_hops = nh->nh_weight - 1; + rtnh->rtnh_ifindex = nh->nh_oif; + if (nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); + NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw); #ifdef CONFIG_NET_CLS_ROUTE if (nh->nh_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); + NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); #endif - nhp->rtnh_len = skb->tail - (unsigned char*)nhp; + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); - mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb->tail - (u8*)mp_head; + + nla_nest_end(skb, mp); } #endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } /* diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 41bef0a88ab6..9c3ff6ba6e21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1854,7 +1854,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi tb->tb_id, fa->fa_type, fa->fa_scope, - &xkey, + xkey, plen, fa->fa_tos, fa->fa_info, 0) < 0) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b8f6cadc5b3a..31b67059ac29 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2639,52 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, { struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct nlmsghdr *nlh; struct rta_cacheinfo ci; -#ifdef CONFIG_IP_MROUTE - struct rtattr *eptr; -#endif - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); - r = NLMSG_DATA(nlh); + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + if (nlh == NULL) + return -ENOBUFS; + + r = nlmsg_data(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; - RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); + NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); + + NLA_PUT_U32(skb, RTA_DST, rt->rt_dst); + if (rt->fl.fl4_src) { r->rtm_src_len = 32; - RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); + NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src); } if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); + NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { - __u32 alg = rt->rt_multipath_alg; - - RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); - } + if (rt->rt_multipath_alg != IP_MP_ALG_NONE) + NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); #endif if (rt->fl.iif) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); + NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); + NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src); + if (rt->rt_dst != rt->rt_gateway) - RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); + NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway); + if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); ci.rta_used = rt->u.dst.__use; ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); @@ -2701,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } -#ifdef CONFIG_IP_MROUTE - eptr = (struct rtattr*)skb->tail; -#endif - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + if (rt->fl.iif) { #ifdef CONFIG_IP_MROUTE u32 dst = rt->rt_dst; @@ -2716,25 +2715,24 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (!nowait) { if (err == 0) return 0; - goto nlmsg_failure; + goto nla_put_failure; } else { if (err == -EMSGSIZE) - goto nlmsg_failure; - ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; + goto nla_put_failure; + ci.rta_error = err; } } } else #endif - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) From d889ce3b29e55b91257964b4c9aac70b91fedd91 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 17 Aug 2006 18:15:44 -0700 Subject: [PATCH 0487/1063] [IPv4]: Convert route get to new netlink api Fixes various unvalidated netlink attributes causing memory corruptions when left empty by userspace applications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 2 +- net/ipv4/route.c | 84 ++++++++++++++++++++++------------------- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 42ed96fab3f5..fcc159a4ac17 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -216,6 +216,7 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ +extern struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d537c933abe3..d0abeab16e66 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -453,7 +453,7 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { +struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { [RTA_DST] = { .type = NLA_U32 }, [RTA_SRC] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 31b67059ac29..a4d4cb85a16c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2737,18 +2737,24 @@ nla_put_failure: int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; - u32 dst = 0; - u32 src = 0; - int iif = 0; - int err = -ENOBUFS; + u32 dst, src, iif; + int err; struct sk_buff *skb; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - goto out; + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2759,61 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - if (rta[RTA_SRC - 1]) - memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); - if (rta[RTA_DST - 1]) - memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); - if (rta[RTA_IIF - 1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); + src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0; + iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { - struct net_device *dev = __dev_get_by_index(iif); - err = -ENODEV; - if (!dev) - goto out_free; + struct net_device *dev; + + dev = __dev_get_by_index(iif); + if (dev == NULL) { + err = -ENODEV; + goto errout_free; + } + skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = (struct rtable*)skb->dst; - if (!err && rt->u.dst.error) + + rt = (struct rtable*) skb->dst; + if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos } } }; - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = dst, + .saddr = src, + .tos = rtm->rtm_tos, + }, + }, + .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + }; err = ip_route_output_key(&rt, &fl); } + if (err) - goto out_free; + goto errout_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (!err) - goto out_free; - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } + if (err <= 0) + goto errout_free; err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -out: +errout: return err; -out_free: +errout_free: kfree_skb(skb); - goto out; + goto errout; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) From e92b43a3455d3e817c13481bb3ea3cd29d0a47f4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 17 Aug 2006 18:17:37 -0700 Subject: [PATCH 0488/1063] [NET] neighbour: reduce exports There are several symbols only used by rtnetlink and since it can not be a module, there is no reason to export them. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c7e653ff5ed0..c0a27407f445 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -889,7 +889,7 @@ out_unlock_bh: return rc; } -static __inline__ void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = @@ -2724,7 +2724,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p) #endif /* CONFIG_SYSCTL */ EXPORT_SYMBOL(__neigh_event_send); -EXPORT_SYMBOL(neigh_add); EXPORT_SYMBOL(neigh_changeaddr); EXPORT_SYMBOL(neigh_compat_output); EXPORT_SYMBOL(neigh_connected_output); @@ -2744,11 +2743,8 @@ EXPORT_SYMBOL(neigh_table_clear); EXPORT_SYMBOL(neigh_table_init); EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); -EXPORT_SYMBOL(neigh_update_hhs); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup); -EXPORT_SYMBOL(neightbl_dump_info); -EXPORT_SYMBOL(neightbl_set); #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); From d3e01f71863da30a2d6bfca069a036168b6c8607 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 17 Aug 2006 18:18:53 -0700 Subject: [PATCH 0489/1063] [ETH]: docbook comments Add docbook style comments to ethernet support. Signed-off-by: Stephen Hemminger Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- net/ethernet/eth.c | 100 +++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 31 deletions(-) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 387c71c584ee..72bdb15036ec 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -64,23 +64,24 @@ __setup("ether=", netdev_boot_setup); -/* - * Create the Ethernet MAC header for an arbitrary protocol layer +/** + * eth_header - create the Ethernet header + * @skb: buffer to alter + * @dev: source device + * @type: Ethernet type field + * @daddr: destination address (NULL leave destination address) + * @saddr: source address (NULL use device source address) + * @len: packet length (<= skb->len) * - * saddr=NULL means use device source address - * daddr=NULL means leave destination address (eg unresolved arp) + * + * Set the protocol type. For a packet of type ETH_P_802_3 we put the length + * in here instead. It is up to the 802.2 layer to carry protocol information. */ - int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); - /* - * Set the protocol type. For a packet of type ETH_P_802_3 we put the length - * in here instead. It is up to the 802.2 layer to carry protocol information. - */ - if(type!=ETH_P_802_3) eth->h_proto = htons(type); else @@ -113,16 +114,16 @@ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, return -ETH_HLEN; } - -/* - * Rebuild the Ethernet MAC header. This is called after an ARP - * (or in future other address resolution) has completed on this - * sk_buff. We now let ARP fill in the other fields. +/** + * eth_rebuild_header- rebuild the Ethernet MAC header. + * @skb: socket buffer to update * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. + * This is called after an ARP or IPV6 ndisc it's resolution on this + * sk_buff. We now let protocol (ARP) fill in the other fields. + * + * This routine CANNOT use cached dst->neigh! + * Really, it is used only when dst->neigh is wrong. */ - int eth_rebuild_header(struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)skb->data; @@ -147,12 +148,15 @@ int eth_rebuild_header(struct sk_buff *skb) } -/* - * Determine the packet's protocol ID. The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. +/** + * eth_type_trans - determine the packet's protocol ID. + * @skb: received socket data + * @dev: receiving network device + * + * The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. */ - __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; @@ -202,6 +206,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) return htons(ETH_P_802_2); } +/** + * eth_header_parse - extract hardware address from packet + * @skb: packet to extract header from + * @haddr: destination buffer + */ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) { struct ethhdr *eth = eth_hdr(skb); @@ -209,6 +218,12 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) return ETH_ALEN; } +/** + * eth_header_cache - fill cache entry from neighbour + * @neigh: source neighbour + * @hh: destination cache entry + * Create an Ethernet header template from the neighbour. + */ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) { unsigned short type = hh->hh_type; @@ -228,10 +243,14 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) return 0; } -/* +/** + * eth_header_cache_update - update cache entry + * @hh: destination cache entry + * @dev: network device + * @haddr: new hardware address + * * Called by Address Resolution module to notify changes in address. */ - void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) { memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), @@ -240,6 +259,15 @@ void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsign EXPORT_SYMBOL(eth_type_trans); +/** + * eth_mac_addr - set new Ethernet hardware address + * @dev: network device + * @p: socket address + * Change hardware address of device. + * + * This doesn't change hardware matching, so needs to be overridden + * for most real devices. + */ static int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr=p; @@ -249,6 +277,14 @@ static int eth_mac_addr(struct net_device *dev, void *p) return 0; } +/** + * eth_change_mtu - set new MTU size + * @dev: network device + * @new_mtu: new Maximum Transfer Unit + * + * Allow changing MTU size. Needs to be overridden for devices + * supporting jumbo frames. + */ static int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) @@ -257,8 +293,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) return 0; } -/* - * Fill in the fields of the device structure with ethernet-generic values. +/** + * ether_setup - setup Ethernet network device + * @dev: network device + * Fill in the fields of the device structure with Ethernet-generic values. */ void ether_setup(struct net_device *dev) { @@ -283,15 +321,15 @@ void ether_setup(struct net_device *dev) EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an ethernet device + * alloc_etherdev - Allocates and sets up an Ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated - * for this ethernet device + * for this Ethernet device * - * Fill in the fields of the device structure with ethernet-generic + * Fill in the fields of the device structure with Ethernet-generic * values. Basically does everything except registering the device. * * Constructs a new net device, complete with a private data area of - * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for * this private data area. */ From 2e4ca75b31b6851dcc036c2cdebf3ecfe279a653 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 17 Aug 2006 18:20:18 -0700 Subject: [PATCH 0490/1063] [ETH]: indentation and cleanup Run ethernet support through Lindent and fix up. Applies after docbook comments patch Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ethernet/eth.c | 98 ++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 72bdb15036ec..43863933f27f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -78,39 +78,37 @@ __setup("ether=", netdev_boot_setup); * in here instead. It is up to the 802.2 layer to carry protocol information. */ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + void *daddr, void *saddr, unsigned len) { - struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); - if(type!=ETH_P_802_3) + if (type != ETH_P_802_3) eth->h_proto = htons(type); else eth->h_proto = htons(len); /* - * Set the source hardware address. + * Set the source hardware address. */ - - if(!saddr) - saddr = dev->dev_addr; - memcpy(eth->h_source,saddr,dev->addr_len); - if(daddr) - { - memcpy(eth->h_dest,daddr,dev->addr_len); + if (!saddr) + saddr = dev->dev_addr; + memcpy(eth->h_source, saddr, dev->addr_len); + + if (daddr) { + memcpy(eth->h_dest, daddr, dev->addr_len); return ETH_HLEN; } - + /* - * Anyway, the loopback-device should never use this function... + * Anyway, the loopback-device should never use this function... */ - if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) - { + if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { memset(eth->h_dest, 0, dev->addr_len); return ETH_HLEN; } - + return -ETH_HLEN; } @@ -129,17 +127,16 @@ int eth_rebuild_header(struct sk_buff *skb) struct ethhdr *eth = (struct ethhdr *)skb->data; struct net_device *dev = skb->dev; - switch (eth->h_proto) - { + switch (eth->h_proto) { #ifdef CONFIG_INET case __constant_htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif + return arp_find(eth->h_dest, skb); +#endif default: printk(KERN_DEBUG - "%s: unable to resolve type %X addresses.\n", + "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); break; } @@ -147,7 +144,6 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } - /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data @@ -161,50 +157,51 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; - + skb->mac.raw = skb->data; - skb_pull(skb,ETH_HLEN); + skb_pull(skb, ETH_HLEN); eth = eth_hdr(skb); - + if (is_multicast_ether_addr(eth->h_dest)) { if (!compare_ether_addr(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; } - + /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. */ - - else if(1 /*dev->flags&IFF_PROMISC*/) { + + else if (1 /*dev->flags&IFF_PROMISC */ ) { if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } - + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - + rawp = skb->data; - + /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. */ if (*(unsigned short *)rawp == 0xFFFF) return htons(ETH_P_802_3); - + /* - * Real 802.2 LLC + * Real 802.2 LLC */ return htons(ETH_P_802_2); } +EXPORT_SYMBOL(eth_type_trans); /** * eth_header_parse - extract hardware address from packet @@ -230,8 +227,8 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) struct ethhdr *eth; struct net_device *dev = neigh->dev; - eth = (struct ethhdr*) - (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); + eth = (struct ethhdr *) + (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == __constant_htons(ETH_P_802_3)) return -1; @@ -251,14 +248,13 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) * * Called by Address Resolution module to notify changes in address. */ -void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) +void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, + unsigned char *haddr) { - memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), + memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), haddr, dev->addr_len); } -EXPORT_SYMBOL(eth_type_trans); - /** * eth_mac_addr - set new Ethernet hardware address * @dev: network device @@ -270,10 +266,10 @@ EXPORT_SYMBOL(eth_type_trans); */ static int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr=p; + struct sockaddr *addr = p; if (netif_running(dev)) return -EBUSY; - memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } @@ -315,7 +311,7 @@ void ether_setup(struct net_device *dev) dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - memset(dev->broadcast,0xFF, ETH_ALEN); + memset(dev->broadcast, 0xFF, ETH_ALEN); } EXPORT_SYMBOL(ether_setup); From e9ce1cd3cf6cf35b21d0ce990f2e738f35907386 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 21 Aug 2006 23:54:55 -0700 Subject: [PATCH 0491/1063] [PKT_SCHED]: Kill pkt_act.h inlining. This was simply making templates of functions and mostly causing a lot of code duplication in the classifier action modules. We solve this more cleanly by having a common "struct tcf_common" that hash worker functions contained once in act_api.c can work with. Callers work with real action objects that have the common struct plus their module specific struct members. You go from a common object to the higher level one using a "to_foo()" macro which makes use of container_of() to do the dirty work. This also kills off act_generic.h which was only used by act_simple.c and keeping it around was more work than the it's value. Signed-off-by: David S. Miller --- include/net/act_api.h | 136 +++++---- include/net/act_generic.h | 142 ---------- include/net/pkt_act.h | 273 ------------------ include/net/tc_act/tc_defact.h | 13 +- include/net/tc_act/tc_gact.h | 18 +- include/net/tc_act/tc_ipt.h | 15 +- include/net/tc_act/tc_mirred.h | 17 +- include/net/tc_act/tc_pedit.h | 15 +- net/sched/act_api.c | 246 ++++++++++++++-- net/sched/act_gact.c | 142 +++++----- net/sched/act_ipt.c | 173 +++++------ net/sched/act_mirred.c | 159 +++++------ net/sched/act_pedit.c | 166 +++++------ net/sched/act_police.c | 504 +++++++++++++++++---------------- net/sched/act_simple.c | 183 ++++++++++-- 15 files changed, 1058 insertions(+), 1144 deletions(-) delete mode 100644 include/net/act_generic.h delete mode 100644 include/net/pkt_act.h diff --git a/include/net/act_api.h b/include/net/act_api.h index 11e9eaf79f5a..8b06c2f3657f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -8,70 +8,110 @@ #include #include -#define tca_gen(name) \ -struct tcf_##name *next; \ - u32 index; \ - int refcnt; \ - int bindcnt; \ - u32 capab; \ - int action; \ - struct tcf_t tm; \ - struct gnet_stats_basic bstats; \ - struct gnet_stats_queue qstats; \ - struct gnet_stats_rate_est rate_est; \ - spinlock_t *stats_lock; \ - spinlock_t lock - -struct tcf_police -{ - tca_gen(police); - int result; - u32 ewma_rate; - u32 burst; - u32 mtu; - u32 toks; - u32 ptoks; - psched_time_t t_c; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; +struct tcf_common { + struct tcf_common *tcfc_next; + u32 tcfc_index; + int tcfc_refcnt; + int tcfc_bindcnt; + u32 tcfc_capab; + int tcfc_action; + struct tcf_t tcfc_tm; + struct gnet_stats_basic tcfc_bstats; + struct gnet_stats_queue tcfc_qstats; + struct gnet_stats_rate_est tcfc_rate_est; + spinlock_t *tcfc_stats_lock; + spinlock_t tcfc_lock; }; +#define tcf_next common.tcfc_next +#define tcf_index common.tcfc_index +#define tcf_refcnt common.tcfc_refcnt +#define tcf_bindcnt common.tcfc_bindcnt +#define tcf_capab common.tcfc_capab +#define tcf_action common.tcfc_action +#define tcf_tm common.tcfc_tm +#define tcf_bstats common.tcfc_bstats +#define tcf_qstats common.tcfc_qstats +#define tcf_rate_est common.tcfc_rate_est +#define tcf_stats_lock common.tcfc_stats_lock +#define tcf_lock common.tcfc_lock + +struct tcf_police { + struct tcf_common common; + int tcfp_result; + u32 tcfp_ewma_rate; + u32 tcfp_burst; + u32 tcfp_mtu; + u32 tcfp_toks; + u32 tcfp_ptoks; + psched_time_t tcfp_t_c; + struct qdisc_rate_table *tcfp_R_tab; + struct qdisc_rate_table *tcfp_P_tab; +}; +#define to_police(pc) \ + container_of(pc, struct tcf_police, common) + +struct tcf_hashinfo { + struct tcf_common **htab; + unsigned int hmask; + rwlock_t *lock; +}; + +static inline unsigned int tcf_hash(u32 index, unsigned int hmask) +{ + return index & hmask; +} #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 -struct tcf_act_hdr -{ - tca_gen(act_hdr); +struct tcf_act_hdr { + struct tcf_common common; }; -struct tc_action -{ - void *priv; - struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct tc_action *next; +struct tc_action { + void *priv; + struct tc_action_ops *ops; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct tc_action *next; }; #define TCA_CAP_NONE 0 -struct tc_action_ops -{ +struct tc_action_ops { struct tc_action_ops *next; + struct tcf_hashinfo *hinfo; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; int (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); - int (*dump)(struct sk_buff *, struct tc_action *,int , int); + int (*dump)(struct sk_buff *, struct tc_action *, int, int); int (*cleanup)(struct tc_action *, int bind); - int (*lookup)(struct tc_action *, u32 ); - int (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int ); - int (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *); + int (*lookup)(struct tc_action *, u32); + int (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int); + int (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *); }; +extern struct tcf_common *tcf_hash_lookup(u32 index, + struct tcf_hashinfo *hinfo); +extern void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); +extern int tcf_hash_release(struct tcf_common *p, int bind, + struct tcf_hashinfo *hinfo); +extern int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a); +extern u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo); +extern int tcf_hash_search(struct tc_action *a, u32 index); +extern struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, + int bind, struct tcf_hashinfo *hinfo); +extern struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, + struct tc_action *a, int size, + int bind, u32 *idx_gen, + struct tcf_hashinfo *hinfo); +extern void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); + extern int tcf_register_action(struct tc_action_ops *a); extern int tcf_unregister_action(struct tc_action_ops *a); extern void tcf_action_destroy(struct tc_action *a, int bind); @@ -96,17 +136,17 @@ tcf_police_release(struct tcf_police *p, int bind) int ret = 0; #ifdef CONFIG_NET_CLS_ACT if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if (p->refcnt <= 0 && !p->bindcnt) { + if (bind) + p->tcf_bindcnt--; + + p->tcf_refcnt--; + if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { tcf_police_destroy(p); ret = 1; } } #else - if (p && --p->refcnt == 0) + if (p && --p->tcf_refcnt == 0) tcf_police_destroy(p); #endif /* CONFIG_NET_CLS_ACT */ diff --git a/include/net/act_generic.h b/include/net/act_generic.h deleted file mode 100644 index c9daa7e52300..000000000000 --- a/include/net/act_generic.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * include/net/act_generic.h - * -*/ -#ifndef _NET_ACT_GENERIC_H -#define _NET_ACT_GENERIC_H -static inline int tcf_defact_release(struct tcf_defact *p, int bind) -{ - int ret = 0; - if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if (p->bindcnt <= 0 && p->refcnt <= 0) { - kfree(p->defdata); - tcf_hash_destroy(p); - ret = 1; - } - } - return ret; -} - -static inline int -alloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) -{ - p->defdata = kmalloc(datalen, GFP_KERNEL); - if (p->defdata == NULL) - return -ENOMEM; - p->datalen = datalen; - memcpy(p->defdata, defdata, datalen); - return 0; -} - -static inline int -realloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) -{ - /* safer to be just brute force for now */ - kfree(p->defdata); - return alloc_defdata(p, datalen, defdata); -} - -static inline int -tcf_defact_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) -{ - struct rtattr *tb[TCA_DEF_MAX]; - struct tc_defact *parm; - struct tcf_defact *p; - void *defdata; - u32 datalen = 0; - int ret = 0; - - if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) - return -EINVAL; - - if (tb[TCA_DEF_PARMS - 1] == NULL || - RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) - return -EINVAL; - - parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); - defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); - if (defdata == NULL) - return -EINVAL; - - datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); - if (datalen <= 0) - return -EINVAL; - - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) - return -ENOMEM; - - ret = alloc_defdata(p, datalen, defdata); - if (ret < 0) { - kfree(p); - return ret; - } - ret = ACT_P_CREATED; - } else { - if (!ovr) { - tcf_defact_release(p, bind); - return -EEXIST; - } - realloc_defdata(p, datalen, defdata); - } - - spin_lock_bh(&p->lock); - p->action = parm->action; - spin_unlock_bh(&p->lock); - if (ret == ACT_P_CREATED) - tcf_hash_insert(p); - return ret; -} - -static inline int tcf_defact_cleanup(struct tc_action *a, int bind) -{ - struct tcf_defact *p = PRIV(a, defact); - - if (p != NULL) - return tcf_defact_release(p, bind); - return 0; -} - -static inline int -tcf_defact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) -{ - unsigned char *b = skb->tail; - struct tc_defact opt; - struct tcf_defact *p = PRIV(a, defact); - struct tcf_t t; - - opt.index = p->index; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.action = p->action; - RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); - RTA_PUT(skb, TCA_DEF_DATA, p->datalen, p->defdata); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); - RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); - return skb->len; - -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -#define tca_use_default_ops \ - .dump = tcf_defact_dump, \ - .cleanup = tcf_defact_cleanup, \ - .init = tcf_defact_init, \ - .walk = tcf_generic_walker, \ - -#define tca_use_default_defines(name) \ - static u32 idx_gen; \ - static struct tcf_defact *tcf_##name_ht[MY_TAB_SIZE]; \ - static DEFINE_RWLOCK(##name_lock); -#endif /* _NET_ACT_GENERIC_H */ diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h deleted file mode 100644 index cf5e4d2e4c21..000000000000 --- a/include/net/pkt_act.h +++ /dev/null @@ -1,273 +0,0 @@ -#ifndef __NET_PKT_ACT_H -#define __NET_PKT_ACT_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define tca_st(val) (struct tcf_##val *) -#define PRIV(a,name) ( tca_st(name) (a)->priv) - -#if 0 /* control */ -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - -#if 0 /* data */ -#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define D2PRINTK(format,args...) -#endif - -static __inline__ unsigned -tcf_hash(u32 index) -{ - return index & MY_TAB_MASK; -} - -/* probably move this from being inline - * and put into act_generic -*/ -static inline void -tcf_hash_destroy(struct tcf_st *p) -{ - unsigned h = tcf_hash(p->index); - struct tcf_st **p1p; - - for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) { - if (*p1p == p) { - write_lock_bh(&tcf_t_lock); - *p1p = p->next; - write_unlock_bh(&tcf_t_lock); -#ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->bstats, &p->rate_est); -#endif - kfree(p); - return; - } - } - BUG_TRAP(0); -} - -static inline int -tcf_hash_release(struct tcf_st *p, int bind ) -{ - int ret = 0; - if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if(p->bindcnt <=0 && p->refcnt <= 0) { - tcf_hash_destroy(p); - ret = 1; - } - } - return ret; -} - -static __inline__ int -tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, - struct tc_action *a) -{ - struct tcf_st *p; - int err =0, index = -1,i= 0, s_i = 0, n_i = 0; - struct rtattr *r ; - - read_lock(&tcf_t_lock); - - s_i = cb->args[0]; - - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_ht[tcf_hash(i)]; - - for (; p; p = p->next) { - index++; - if (index < s_i) - continue; - a->priv = p; - a->order = n_i; - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - err = tcf_action_dump_1(skb, a, 0, 0); - if (0 > err) { - index--; - skb_trim(skb, (u8*)r - skb->data); - goto done; - } - r->rta_len = skb->tail - (u8*)r; - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) { - goto done; - } - } - } -done: - read_unlock(&tcf_t_lock); - if (n_i) - cb->args[0] += n_i; - return n_i; - -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - goto done; -} - -static __inline__ int -tcf_del_walker(struct sk_buff *skb, struct tc_action *a) -{ - struct tcf_st *p, *s_p; - struct rtattr *r ; - int i= 0, n_i = 0; - - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_ht[tcf_hash(i)]; - - while (p != NULL) { - s_p = p->next; - if (ACT_P_DELETED == tcf_hash_release(p, 0)) { - module_put(a->ops->owner); - } - n_i++; - p = s_p; - } - } - RTA_PUT(skb, TCA_FCNT, 4, &n_i); - r->rta_len = skb->tail - (u8*)r; - - return n_i; -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - return -EINVAL; -} - -static __inline__ int -tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, - struct tc_action *a) -{ - if (type == RTM_DELACTION) { - return tcf_del_walker(skb,a); - } else if (type == RTM_GETACTION) { - return tcf_dump_walker(skb,cb,a); - } else { - printk("tcf_generic_walker: unknown action %d\n",type); - return -EINVAL; - } -} - -static __inline__ struct tcf_st * -tcf_hash_lookup(u32 index) -{ - struct tcf_st *p; - - read_lock(&tcf_t_lock); - for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) { - if (p->index == index) - break; - } - read_unlock(&tcf_t_lock); - return p; -} - -static __inline__ u32 -tcf_hash_new_index(void) -{ - do { - if (++idx_gen == 0) - idx_gen = 1; - } while (tcf_hash_lookup(idx_gen)); - - return idx_gen; -} - - -static inline int -tcf_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_st *p = tcf_hash_lookup(index); - - if (p != NULL) { - a->priv = p; - return 1; - } - return 0; -} - -#ifdef CONFIG_NET_ACT_INIT -static inline struct tcf_st * -tcf_hash_check(u32 index, struct tc_action *a, int ovr, int bind) -{ - struct tcf_st *p = NULL; - if (index && (p = tcf_hash_lookup(index)) != NULL) { - if (bind) { - p->bindcnt++; - p->refcnt++; - } - a->priv = p; - } - return p; -} - -static inline struct tcf_st * -tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind) -{ - struct tcf_st *p = NULL; - - p = kmalloc(size, GFP_KERNEL); - if (p == NULL) - return p; - - memset(p, 0, size); - p->refcnt = 1; - - if (bind) { - p->bindcnt = 1; - } - - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; - p->index = index ? : tcf_hash_new_index(); - p->tm.install = jiffies; - p->tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR - if (est) - gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); -#endif - a->priv = (void *) p; - return p; -} - -static inline void tcf_hash_insert(struct tcf_st *p) -{ - unsigned h = tcf_hash(p->index); - - write_lock_bh(&tcf_t_lock); - p->next = tcf_ht[h]; - tcf_ht[h] = p; - write_unlock_bh(&tcf_t_lock); -} - -#endif - -#endif diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index 463aa671f95d..65f024b80958 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -3,11 +3,12 @@ #include -struct tcf_defact -{ - tca_gen(defact); - u32 datalen; - void *defdata; +struct tcf_defact { + struct tcf_common common; + u32 tcfd_datalen; + void *tcfd_defdata; }; +#define to_defact(pc) \ + container_of(pc, struct tcf_defact, common) -#endif +#endif /* __NET_TC_DEF_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 59f0d9628ad1..9e3f6767b80e 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -3,15 +3,15 @@ #include -struct tcf_gact -{ - tca_gen(gact); +struct tcf_gact { + struct tcf_common common; #ifdef CONFIG_GACT_PROB - u16 ptype; - u16 pval; - int paction; + u16 tcfg_ptype; + u16 tcfg_pval; + int tcfg_paction; #endif - }; - -#endif +#define to_gact(pc) \ + container_of(pc, struct tcf_gact, common) + +#endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index cb37ad08427f..f7d25dfcc4b7 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -5,12 +5,13 @@ struct xt_entry_target; -struct tcf_ipt -{ - tca_gen(ipt); - u32 hook; - char *tname; - struct xt_entry_target *t; +struct tcf_ipt { + struct tcf_common common; + u32 tcfi_hook; + char *tcfi_tname; + struct xt_entry_target *tcfi_t; }; +#define to_ipt(pc) \ + container_of(pc, struct tcf_ipt, common) -#endif +#endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index b5c32f65c12c..ceac661cdfd5 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -3,13 +3,14 @@ #include -struct tcf_mirred -{ - tca_gen(mirred); - int eaction; - int ifindex; - int ok_push; - struct net_device *dev; +struct tcf_mirred { + struct tcf_common common; + int tcfm_eaction; + int tcfm_ifindex; + int tcfm_ok_push; + struct net_device *tcfm_dev; }; +#define to_mirred(pc) \ + container_of(pc, struct tcf_mirred, common) -#endif +#endif /* __NET_TC_MIR_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index eb21689d759d..e6f6e15956f5 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -3,12 +3,13 @@ #include -struct tcf_pedit -{ - tca_gen(pedit); - unsigned char nkeys; - unsigned char flags; - struct tc_pedit_key *keys; +struct tcf_pedit { + struct tcf_common common; + unsigned char tcfp_nkeys; + unsigned char tcfp_flags; + struct tc_pedit_key *tcfp_keys; }; +#define to_pedit(pc) \ + container_of(pc, struct tcf_pedit, common) -#endif +#endif /* __NET_TC_PED_H */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 6990747d6d5a..835070e9169c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -33,16 +33,230 @@ #include #include -#if 0 /* control */ -#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define DPRINTK(format, args...) +void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + struct tcf_common **p1p; + + for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == p) { + write_lock_bh(hinfo->lock); + *p1p = p->tcfc_next; + write_unlock_bh(hinfo->lock); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&p->tcfc_bstats, + &p->tcfc_rate_est); #endif -#if 0 /* data */ -#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define D2PRINTK(format, args...) + kfree(p); + return; + } + } + BUG_TRAP(0); +} +EXPORT_SYMBOL(tcf_hash_destroy); + +int tcf_hash_release(struct tcf_common *p, int bind, + struct tcf_hashinfo *hinfo) +{ + int ret = 0; + + if (p) { + if (bind) + p->tcfc_bindcnt--; + + p->tcfc_refcnt--; + if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { + tcf_hash_destroy(p, hinfo); + ret = 1; + } + } + return ret; +} +EXPORT_SYMBOL(tcf_hash_release); + +static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, + struct tc_action *a, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; + struct rtattr *r ; + + read_lock(hinfo->lock); + + s_i = cb->args[0]; + + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + for (; p; p = p->tcfc_next) { + index++; + if (index < s_i) + continue; + a->priv = p; + a->order = n_i; + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + err = tcf_action_dump_1(skb, a, 0, 0); + if (err < 0) { + index--; + skb_trim(skb, (u8*)r - skb->data); + goto done; + } + r->rta_len = skb->tail - (u8*)r; + n_i++; + if (n_i >= TCA_ACT_MAX_PRIO) + goto done; + } + } +done: + read_unlock(hinfo->lock); + if (n_i) + cb->args[0] += n_i; + return n_i; + +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + goto done; +} + +static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p, *s_p; + struct rtattr *r ; + int i= 0, n_i = 0; + + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + while (p != NULL) { + s_p = p->tcfc_next; + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + module_put(a->ops->owner); + n_i++; + p = s_p; + } + } + RTA_PUT(skb, TCA_FCNT, 4, &n_i); + r->rta_len = skb->tail - (u8*)r; + + return n_i; +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + return -EINVAL; +} + +int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + + if (type == RTM_DELACTION) { + return tcf_del_walker(skb, a, hinfo); + } else if (type == RTM_GETACTION) { + return tcf_dump_walker(skb, cb, a, hinfo); + } else { + printk("tcf_generic_walker: unknown action %d\n", type); + return -EINVAL; + } +} +EXPORT_SYMBOL(tcf_generic_walker); + +struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} +EXPORT_SYMBOL(tcf_hash_lookup); + +u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_hash_lookup(val, hinfo)); + + return (*idx_gen = val); +} +EXPORT_SYMBOL(tcf_hash_new_index); + +int tcf_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct tcf_common *p = tcf_hash_lookup(index, hinfo); + + if (p) { + a->priv = p; + return 1; + } + return 0; +} +EXPORT_SYMBOL(tcf_hash_search); + +struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = NULL; + if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (bind) { + p->tcfc_bindcnt++; + p->tcfc_refcnt++; + } + a->priv = p; + } + return p; +} +EXPORT_SYMBOL(tcf_hash_check); + +struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = kzalloc(size, GFP_KERNEL); + + if (unlikely(!p)) + return p; + p->tcfc_refcnt = 1; + if (bind) + p->tcfc_bindcnt = 1; + + spin_lock_init(&p->tcfc_lock); + p->tcfc_stats_lock = &p->tcfc_lock; + p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); + p->tcfc_tm.install = jiffies; + p->tcfc_tm.lastuse = jiffies; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + p->tcfc_stats_lock, est); #endif + a->priv = (void *) p; + return p; +} +EXPORT_SYMBOL(tcf_hash_create); + +void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + + write_lock_bh(hinfo->lock); + p->tcfc_next = hinfo->htab[h]; + hinfo->htab[h] = p; + write_unlock_bh(hinfo->lock); +} +EXPORT_SYMBOL(tcf_hash_insert); static struct tc_action_ops *act_base = NULL; static DEFINE_RWLOCK(act_mod_lock); @@ -155,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", - skb, skb->input_dev ? skb->input_dev->name : "xxx", - skb->dev->name); ret = TC_ACT_OK; goto exec_done; } @@ -187,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind) for (a = act; a; a = act) { if (a->ops && a->ops->cleanup) { - DPRINTK("tcf_action_destroy destroying %p next %p\n", - a, a->next); if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -331,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, if (*err != ACT_P_CREATED) module_put(a_o->owner); a->ops = a_o; - DPRINTK("tcf_action_init_1: successfull %s\n", act_name); *err = 0; return a; @@ -392,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->stats_lock, &d); + TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->stats_lock, &d); + h->tcf_stats_lock, &d); if (err < 0) goto errout; @@ -406,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (a->ops->get_stats(skb, a) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &h->qstats) < 0) + gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e75a147ad60f..6cff56696a81 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -34,48 +34,43 @@ #include #include -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 - -static u32 idx_gen; -static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE]; +#define GACT_TAB_MASK 15 +static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; +static u32 gact_idx_gen; static DEFINE_RWLOCK(gact_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_gact -#define tc_st tc_gact -#define tcf_t_lock gact_lock -#define tcf_ht tcf_gact_ht - -#define CONFIG_NET_ACT_INIT 1 -#include +static struct tcf_hashinfo gact_hash_info = { + .htab = tcf_gact_ht, + .hmask = GACT_TAB_MASK, + .lock = &gact_lock, +}; #ifdef CONFIG_GACT_PROB -static int gact_net_rand(struct tcf_gact *p) +static int gact_net_rand(struct tcf_gact *gact) { - if (net_random()%p->pval) - return p->action; - return p->paction; + if (net_random() % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -static int gact_determ(struct tcf_gact *p) +static int gact_determ(struct tcf_gact *gact) { - if (p->bstats.packets%p->pval) - return p->action; - return p->paction; + if (gact->tcf_bstats.packets % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -typedef int (*g_rand)(struct tcf_gact *p); +typedef int (*g_rand)(struct tcf_gact *gact); static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; -#endif +#endif /* CONFIG_GACT_PROB */ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm; - struct tcf_gact *p; + struct tcf_gact *gact; + struct tcf_common *pc; int ret = 0; if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) @@ -94,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, return -EOPNOTSUPP; #endif - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), + bind, &gact_idx_gen, &gact_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &gact_hash_info); return -EEXIST; } } - spin_lock_bh(&p->lock); - p->action = parm->action; + gact = to_gact(pc); + + spin_lock_bh(&gact->tcf_lock); + gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB-1] != NULL) { struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); - p->paction = p_parm->paction; - p->pval = p_parm->pval; - p->ptype = p_parm->ptype; + gact->tcfg_paction = p_parm->paction; + gact->tcfg_pval = p_parm->pval; + gact->tcfg_ptype = p_parm->ptype; } #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &gact_hash_info); return ret; } -static int -tcf_gact_cleanup(struct tc_action *a, int bind) +static int tcf_gact_cleanup(struct tc_action *a, int bind) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; - if (p != NULL) - return tcf_hash_release(p, bind); + if (gact) + return tcf_hash_release(&gact->common, bind, &gact_hash_info); return 0; } -static int -tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; int action = TC_ACT_SHOT; - spin_lock(&p->lock); + spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (p->ptype && gact_rand[p->ptype] != NULL) - action = gact_rand[p->ptype](p); + if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + action = gact_rand[gact->tcfg_ptype](gact); else - action = p->action; + action = gact->tcf_action; #else - action = p->action; + action = gact->tcf_action; #endif - p->bstats.bytes += skb->len; - p->bstats.packets++; + gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) - p->qstats.drops++; - p->tm.lastuse = jiffies; - spin_unlock(&p->lock); + gact->tcf_qstats.drops++; + gact->tcf_tm.lastuse = jiffies; + spin_unlock(&gact->tcf_lock); return action; } -static int -tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; struct tcf_t t; - opt.index = p->index; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.action = p->action; + opt.index = gact->tcf_index; + opt.refcnt = gact->tcf_refcnt - ref; + opt.bindcnt = gact->tcf_bindcnt - bind; + opt.action = gact->tcf_action; RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB - if (p->ptype) { + if (gact->tcfg_ptype) { struct tc_gact_p p_opt; - p_opt.paction = p->paction; - p_opt.pval = p->pval; - p_opt.ptype = p->ptype; + p_opt.paction = gact->tcfg_paction; + p_opt.pval = gact->tcfg_pval; + p_opt.ptype = gact->tcfg_ptype; RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_gact_ops = { .kind = "gact", + .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -208,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Classifier actions"); MODULE_LICENSE("GPL"); -static int __init -gact_init_module(void) +static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB printk("GACT probability on\n"); @@ -219,8 +214,7 @@ gact_init_module(void) return tcf_register_action(&act_gact_ops); } -static void __exit -gact_cleanup_module(void) +static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d799e01248c4..224c078a398e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -38,25 +38,19 @@ #include -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; -/* ipt hash table lock */ +#define IPT_TAB_MASK 15 +static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; +static u32 ipt_idx_gen; static DEFINE_RWLOCK(ipt_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_ipt -#define tcf_t_lock ipt_lock -#define tcf_ht tcf_ipt_ht +static struct tcf_hashinfo ipt_hash_info = { + .htab = tcf_ipt_ht, + .hmask = IPT_TAB_MASK, + .lock = &ipt_lock, +}; -#define CONFIG_NET_ACT_INIT -#include - -static int -ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) +static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { struct ipt_target *target; int ret = 0; @@ -65,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) if (!target) return -ENOENT; - DPRINTK("ipt_init_target: found %s\n", target->name); t->u.kernel.target = target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), @@ -78,8 +71,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) t->u.kernel.target, t->data, t->u.target_size - sizeof(*t), hook)) { - DPRINTK("ipt_init_target: check failed for `%s'.\n", - t->u.kernel.target->name); module_put(t->u.kernel.target->me); ret = -EINVAL; } @@ -87,8 +78,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) return ret; } -static void -ipt_destroy_target(struct ipt_entry_target *t) +static void ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) t->u.kernel.target->destroy(t->u.kernel.target, t->data, @@ -96,31 +86,30 @@ ipt_destroy_target(struct ipt_entry_target *t) module_put(t->u.kernel.target->me); } -static int -tcf_ipt_release(struct tcf_ipt *p, int bind) +static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) { int ret = 0; - if (p) { + if (ipt) { if (bind) - p->bindcnt--; - p->refcnt--; - if (p->bindcnt <= 0 && p->refcnt <= 0) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); - tcf_hash_destroy(p); + ipt->tcf_bindcnt--; + ipt->tcf_refcnt--; + if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); + tcf_hash_destroy(&ipt->common, &ipt_hash_info); ret = ACT_P_DELETED; } } return ret; } -static int -tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_IPT_MAX]; - struct tcf_ipt *p; + struct tcf_ipt *ipt; + struct tcf_common *pc; struct ipt_entry_target *td, *t; char *tname; int ret = 0, err; @@ -144,49 +133,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); - p = tcf_hash_check(index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(index, a, bind, &ipt_hash_info); + if (!pc) { + pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, + &ipt_idx_gen, &ipt_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_ipt_release(p, bind); + tcf_ipt_release(to_ipt(pc), bind); return -EEXIST; } } + ipt = to_ipt(pc); hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); err = -ENOMEM; tname = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (tname == NULL) + if (unlikely(!tname)) goto err1; if (tb[TCA_IPT_TABLE - 1] == NULL || rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); t = kmalloc(td->u.target_size, GFP_KERNEL); - if (t == NULL) + if (unlikely(!t)) goto err2; memcpy(t, td, td->u.target_size); if ((err = ipt_init_target(t, tname, hook)) < 0) goto err3; - spin_lock_bh(&p->lock); + spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); } - p->tname = tname; - p->t = t; - p->hook = hook; - spin_unlock_bh(&p->lock); + ipt->tcfi_tname = tname; + ipt->tcfi_t = t; + ipt->tcfi_hook = hook; + spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &ipt_hash_info); return ret; err3: @@ -194,33 +185,32 @@ err3: err2: kfree(tname); err1: - kfree(p); + kfree(pc); return err; } -static int -tcf_ipt_cleanup(struct tc_action *a, int bind) +static int tcf_ipt_cleanup(struct tc_action *a, int bind) { - struct tcf_ipt *p = PRIV(a, ipt); - return tcf_ipt_release(p, bind); + struct tcf_ipt *ipt = a->priv; + return tcf_ipt_release(ipt, bind); } -static int -tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { int ret = 0, result = 0; - struct tcf_ipt *p = PRIV(a, ipt); + struct tcf_ipt *ipt = a->priv; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return TC_ACT_UNSPEC; } - spin_lock(&p->lock); + spin_lock(&ipt->tcf_lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + ipt->tcf_tm.lastuse = jiffies; + ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed @@ -229,16 +219,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) /* iptables targets take a double skb pointer in case the skb * needs to be replaced. We don't own the skb, so this must not * happen. The pskb_expand_head above should make sure of this */ - ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, - p->t->u.kernel.target, p->t->data, - NULL); + ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ipt->tcfi_hook, + ipt->tcfi_t->u.kernel.target, + ipt->tcfi_t->data, NULL); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; - p->qstats.drops++; + ipt->tcf_qstats.drops++; break; case IPT_CONTINUE: result = TC_ACT_PIPE; @@ -249,53 +240,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) result = TC_POLICE_OK; break; } - spin_unlock(&p->lock); + spin_unlock(&ipt->tcf_lock); return result; } -static int -tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { + unsigned char *b = skb->tail; + struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; struct tc_cnt c; - unsigned char *b = skb->tail; - struct tcf_ipt *p = PRIV(a, ipt); /* for simple targets kernel size == user size ** user name = target name ** for foolproof you need to not assume this */ - t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); - if (t == NULL) + t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); + if (unlikely(!t)) goto rtattr_failure; - c.bindcnt = p->bindcnt - bind; - c.refcnt = p->refcnt - ref; - memcpy(t, p->t, p->t->u.user.target_size); - strcpy(t->u.user.name, p->t->u.kernel.target->name); + c.bindcnt = ipt->tcf_bindcnt - bind; + c.refcnt = ipt->tcf_refcnt - ref; + memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); + strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); - DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, - strlen(p->tname)); - DPRINTK("\tdump target name %s size %d size user %d " - "data[0] %x data[1] %x\n", p->t->u.kernel.target->name, - p->t->u.target_size, p->t->u.user.target_size, - p->t->data[0], p->t->data[1]); - RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t); - RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index); - RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook); + RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); + RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); + RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); - RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); - tm.install = jiffies_to_clock_t(jiffies - p->tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - tm.expires = jiffies_to_clock_t(p->tm.expires); + RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); + tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); kfree(t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); kfree(t); return -1; @@ -303,6 +287,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", + .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -318,14 +303,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Iptables target actions"); MODULE_LICENSE("GPL"); -static int __init -ipt_init_module(void) +static int __init ipt_init_module(void) { return tcf_register_action(&act_ipt_ops); } -static void __exit -ipt_cleanup_module(void) +static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_ipt_ops); } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fc562047ecc5..483897271f15 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -39,46 +39,39 @@ #include #include - -/* use generic hash table */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE]; +#define MIRRED_TAB_MASK 7 +static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; +static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_mirred -#define tc_st tc_mirred -#define tcf_t_lock mirred_lock -#define tcf_ht tcf_mirred_ht +static struct tcf_hashinfo mirred_hash_info = { + .htab = tcf_mirred_ht, + .hmask = MIRRED_TAB_MASK, + .lock = &mirred_lock, +}; -#define CONFIG_NET_ACT_INIT 1 -#include - -static inline int -tcf_mirred_release(struct tcf_mirred *p, int bind) +static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) { - if (p) { + if (m) { if (bind) - p->bindcnt--; - p->refcnt--; - if(!p->bindcnt && p->refcnt <= 0) { - dev_put(p->dev); - tcf_hash_destroy(p); + m->tcf_bindcnt--; + m->tcf_refcnt--; + if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { + dev_put(m->tcfm_dev); + tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } } return 0; } -static int -tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_MIRRED_MAX]; struct tc_mirred *parm; - struct tcf_mirred *p; + struct tcf_mirred *m; + struct tcf_common *pc; struct net_device *dev = NULL; int ret = 0; int ok_push = 0; @@ -110,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, } } - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); + if (!pc) { if (!parm->ifindex) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, + &mirred_idx_gen, &mirred_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_mirred_release(p, bind); + tcf_mirred_release(to_mirred(pc), bind); return -EEXIST; } } + m = to_mirred(pc); - spin_lock_bh(&p->lock); - p->action = parm->action; - p->eaction = parm->eaction; + spin_lock_bh(&m->tcf_lock); + m->tcf_action = parm->action; + m->tcfm_eaction = parm->eaction; if (parm->ifindex) { - p->ifindex = parm->ifindex; + m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(p->dev); - p->dev = dev; + dev_put(m->tcfm_dev); + m->tcfm_dev = dev; dev_hold(dev); - p->ok_push = ok_push; + m->tcfm_ok_push = ok_push; } - spin_unlock_bh(&p->lock); + spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &mirred_hash_info); - DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s " - "ifindex %d\n", parm->index, parm->action, parm->eaction, - dev->name, parm->ifindex); return ret; } -static int -tcf_mirred_cleanup(struct tc_action *a, int bind) +static int tcf_mirred_cleanup(struct tc_action *a, int bind) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; - if (p != NULL) - return tcf_mirred_release(p, bind); + if (m) + return tcf_mirred_release(m, bind); return 0; } -static int -tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2 = NULL; u32 at = G_TC_AT(skb->tc_verd); - spin_lock(&p->lock); + spin_lock(&m->tcf_lock); - dev = p->dev; - p->tm.lastuse = jiffies; + dev = m->tcfm_dev; + m->tcf_tm.lastuse = jiffies; if (!(dev->flags&IFF_UP) ) { if (net_ratelimit()) @@ -176,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) bad_mirred: if (skb2 != NULL) kfree_skb(skb2); - p->qstats.overlimits++; - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); + m->tcf_qstats.overlimits++; + m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.packets++; + spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -189,59 +180,59 @@ bad_mirred: skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto bad_mirred; - if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR && + m->tcfm_eaction != TCA_EGRESS_REDIR) { if (net_ratelimit()) - printk("tcf_mirred unknown action %d\n", p->eaction); + printk("tcf_mirred unknown action %d\n", + m->tcfm_eaction); goto bad_mirred; } - p->bstats.bytes += skb2->len; - p->bstats.packets++; + m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) - if (p->ok_push) + if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); /* mirror is always swallowed */ - if (p->eaction != TCA_EGRESS_MIRROR) + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; skb2->input_dev = skb->dev; dev_queue_xmit(skb2); - spin_unlock(&p->lock); - return p->action; + spin_unlock(&m->tcf_lock); + return m->tcf_action; } -static int -tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_mirred *m = a->priv; struct tc_mirred opt; - struct tcf_mirred *p = PRIV(a, mirred); struct tcf_t t; - opt.index = p->index; - opt.action = p->action; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.eaction = p->eaction; - opt.ifindex = p->ifindex; - DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n", - p->index, p->action, p->eaction, p->ifindex); + opt.index = m->tcf_index; + opt.action = m->tcf_action; + opt.refcnt = m->tcf_refcnt - ref; + opt.bindcnt = m->tcf_bindcnt - bind; + opt.eaction = m->tcfm_eaction; + opt.ifindex = m->tcfm_ifindex; RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(m->tcf_tm.expires); RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", + .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -257,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); -static int __init -mirred_init_module(void) +static int __init mirred_init_module(void) { printk("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } -static void __exit -mirred_cleanup_module(void) +static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index f257475e0e0c..8ac65c219b98 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -33,32 +33,25 @@ #include #include - -#define PEDIT_DEB 1 - -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE]; +#define PEDIT_TAB_MASK 15 +static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; +static u32 pedit_idx_gen; static DEFINE_RWLOCK(pedit_lock); -#define tcf_st tcf_pedit -#define tc_st tc_pedit -#define tcf_t_lock pedit_lock -#define tcf_ht tcf_pedit_ht +static struct tcf_hashinfo pedit_hash_info = { + .htab = tcf_pedit_ht, + .hmask = PEDIT_TAB_MASK, + .lock = &pedit_lock, +}; -#define CONFIG_NET_ACT_INIT 1 -#include - -static int -tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_PEDIT_MAX]; struct tc_pedit *parm; int ret = 0; struct tcf_pedit *p; + struct tcf_common *pc; struct tc_pedit_key *keys = NULL; int ksize; @@ -73,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) return -EINVAL; - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); + if (!pc) { if (!parm->nkeys) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &pedit_idx_gen, &pedit_hash_info); + if (unlikely(!pc)) return -ENOMEM; + p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(p); + kfree(pc); return -ENOMEM; } ret = ACT_P_CREATED; } else { + p = to_pedit(pc); if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &pedit_hash_info); return -EEXIST; } - if (p->nkeys && p->nkeys != parm->nkeys) { + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) return -ENOMEM; } } - spin_lock_bh(&p->lock); - p->flags = parm->flags; - p->action = parm->action; + spin_lock_bh(&p->tcf_lock); + p->tcfp_flags = parm->flags; + p->tcf_action = parm->action; if (keys) { - kfree(p->keys); - p->keys = keys; - p->nkeys = parm->nkeys; + kfree(p->tcfp_keys); + p->tcfp_keys = keys; + p->tcfp_nkeys = parm->nkeys; } - memcpy(p->keys, parm->keys, ksize); - spin_unlock_bh(&p->lock); + memcpy(p->tcfp_keys, parm->keys, ksize); + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &pedit_hash_info); return ret; } -static int -tcf_pedit_cleanup(struct tc_action *a, int bind) +static int tcf_pedit_cleanup(struct tc_action *a, int bind) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; - if (p != NULL) { - struct tc_pedit_key *keys = p->keys; - if (tcf_hash_release(p, bind)) { + if (p) { + struct tc_pedit_key *keys = p->tcfp_keys; + if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { kfree(keys); return 1; } @@ -128,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) return 0; } -static int -tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; int i, munged = 0; u8 *pptr; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - return p->action; + return p->tcf_action; } } pptr = skb->nh.raw; - spin_lock(&p->lock); + spin_lock(&p->tcf_lock); - p->tm.lastuse = jiffies; + p->tcf_tm.lastuse = jiffies; - if (p->nkeys > 0) { - struct tc_pedit_key *tkey = p->keys; + if (p->tcfp_nkeys > 0) { + struct tc_pedit_key *tkey = p->tcfp_keys; - for (i = p->nkeys; i > 0; i--, tkey++) { + for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr; int offset = tkey->off; @@ -169,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) printk("offset must be on 32 bit boundaries\n"); goto bad; } - if (skb->len < 0 || (offset > 0 && offset > skb->len)) { + if (skb->len < 0 || + (offset > 0 && offset > skb->len)) { printk("offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; @@ -185,63 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else { - printk("pedit BUG: index %d\n",p->index); + printk("pedit BUG: index %d\n", p->tcf_index); } bad: - p->qstats.overlimits++; + p->tcf_qstats.overlimits++; done: - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); - return p->action; + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + spin_unlock(&p->tcf_lock); + return p->tcf_action; } -static int -tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) +static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_pedit *p = a->priv; struct tc_pedit *opt; - struct tcf_pedit *p = PRIV(a, pedit); struct tcf_t t; int s; - s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); + s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); - if (opt == NULL) + if (unlikely(!opt)) return -ENOBUFS; - memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); - opt->index = p->index; - opt->nkeys = p->nkeys; - opt->flags = p->flags; - opt->action = p->action; - opt->refcnt = p->refcnt - ref; - opt->bindcnt = p->bindcnt - bind; - - -#ifdef PEDIT_DEB - { - /* Debug - get rid of later */ - int i; - struct tc_pedit_key *key = opt->keys; - - for (i=0; inkeys; i++, key++) { - printk( "\n key #%d",i); - printk( " at %d: val %08x mask %08x", - (unsigned int)key->off, - (unsigned int)key->val, - (unsigned int)key->mask); - } - } -#endif + memcpy(opt->keys, p->tcfp_keys, + p->tcfp_nkeys * sizeof(struct tc_pedit_key)); + opt->index = p->tcf_index; + opt->nkeys = p->tcfp_nkeys; + opt->flags = p->tcfp_flags; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); kfree(opt); return skb->len; @@ -252,9 +232,9 @@ rtattr_failure: return -1; } -static -struct tc_action_ops act_pedit_ops = { +static struct tc_action_ops act_pedit_ops = { .kind = "pedit", + .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -270,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Packet Editor actions"); MODULE_LICENSE("GPL"); -static int __init -pedit_init_module(void) +static int __init pedit_init_module(void) { return tcf_register_action(&act_pedit_ops); } -static void __exit -pedit_cleanup_module(void) +static void __exit pedit_cleanup_module(void) { tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index da905d7b4b40..fed47b658837 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -32,43 +32,27 @@ #include #include -#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) -#define PRIV(a) ((struct tcf_police *) (a)->priv) +#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; -/* Policer hash table lock */ +#define POL_TAB_MASK 15 +static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; +static u32 police_idx_gen; static DEFINE_RWLOCK(police_lock); +static struct tcf_hashinfo police_hash_info = { + .htab = tcf_police_ht, + .hmask = POL_TAB_MASK, + .lock = &police_lock, +}; + /* Each policer is serialized by its individual spinlock */ -static __inline__ unsigned tcf_police_hash(u32 index) -{ - return index&0xF; -} - -static __inline__ struct tcf_police * tcf_police_lookup(u32 index) -{ - struct tcf_police *p; - - read_lock(&police_lock); - for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { - if (p->index == index) - break; - } - read_unlock(&police_lock); - return p; -} - #ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { - struct tcf_police *p; + struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; @@ -76,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c s_i = cb->args[0]; - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_police_ht[tcf_police_hash(i)]; + for (i = 0; i < (POL_TAB_MASK + 1); i++) { + p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; - for (; p; p = p->next) { + for (; p; p = p->tcfc_next) { index++; if (index < s_i) continue; @@ -110,48 +94,26 @@ rtattr_failure: skb_trim(skb, (u8*)r - skb->data); goto done; } - -static inline int -tcf_act_police_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_police *p = tcf_police_lookup(index); - - if (p != NULL) { - a->priv = p; - return 1; - } else { - return 0; - } -} #endif -static inline u32 tcf_police_new_index(void) -{ - do { - if (++idx_gen == 0) - idx_gen = 1; - } while (tcf_police_lookup(idx_gen)); - - return idx_gen; -} - void tcf_police_destroy(struct tcf_police *p) { - unsigned h = tcf_police_hash(p->index); - struct tcf_police **p1p; + unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); + struct tcf_common **p1p; - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { - if (*p1p == p) { + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == &p->common) { write_lock_bh(&police_lock); - *p1p = p->next; + *p1p = p->tcf_next; write_unlock_bh(&police_lock); #ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->bstats, &p->rate_est); + gen_kill_estimator(&p->tcf_bstats, + &p->tcf_rate_est); #endif - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - if (p->P_tab) - qdisc_put_rtab(p->P_tab); + if (p->tcfp_R_tab) + qdisc_put_rtab(p->tcfp_R_tab); + if (p->tcfp_P_tab) + qdisc_put_rtab(p->tcfp_P_tab); kfree(p); return; } @@ -167,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, int ret = 0, err; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; - struct tcf_police *p; + struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) @@ -185,27 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) return -EINVAL; - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - a->priv = p; - if (bind) { - p->bindcnt += 1; - p->refcnt += 1; + if (parm->index) { + struct tcf_common *pc; + + pc = tcf_hash_lookup(parm->index, &police_hash_info); + if (pc != NULL) { + a->priv = pc; + police = to_police(pc); + if (bind) { + police->tcf_bindcnt += 1; + police->tcf_refcnt += 1; + } + if (ovr) + goto override; + return ret; } - if (ovr) - goto override; - return ret; } - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (police == NULL) return -ENOMEM; - ret = ACT_P_CREATED; - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (bind) - p->bindcnt = 1; + police->tcf_bindcnt = 1; override: if (parm->rate.rate) { err = -ENOMEM; @@ -215,67 +182,71 @@ override: if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) { + if (P_tab == NULL) { qdisc_put_rtab(R_tab); goto failure; } } } /* No failure allowed after this point */ - spin_lock_bh(&p->lock); + spin_lock_bh(&police->tcf_lock); if (R_tab != NULL) { - qdisc_put_rtab(p->R_tab); - p->R_tab = R_tab; + qdisc_put_rtab(police->tcfp_R_tab); + police->tcfp_R_tab = R_tab; } if (P_tab != NULL) { - qdisc_put_rtab(p->P_tab); - p->P_tab = P_tab; + qdisc_put_rtab(police->tcfp_P_tab); + police->tcfp_P_tab = P_tab; } if (tb[TCA_POLICE_RESULT-1]) - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<R_tab->rate.cell_log; + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) - gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) return ret; - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - h = tcf_police_hash(p->index); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_hash_new_index(&police_idx_gen, &police_hash_info); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - a->priv = p; + a->priv = police; return ret; failure: if (ret == ACT_P_CREATED) - kfree(p); + kfree(police); return err; } static int tcf_act_police_cleanup(struct tc_action *a, int bind) { - struct tcf_police *p = PRIV(a); + struct tcf_police *p = a->priv; if (p != NULL) return tcf_police_release(p, bind); @@ -285,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { + struct tcf_police *police = a->priv; psched_time_t now; - struct tcf_police *p = PRIV(a); long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_police *police = a->priv; struct tc_police opt; - struct tcf_police *p = PRIV(a); - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - if (p->R_tab) - opt.rate = p->R_tab->rate; + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + opt.refcnt = police->tcf_refcnt - ref; + opt.bindcnt = police->tcf_bindcnt - bind; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -379,13 +351,14 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", + .hinfo = &police_hash_info, .type = TCA_ID_POLICE, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_act_police_hash_search, + .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -407,10 +380,39 @@ module_exit(police_cleanup_module); #else /* CONFIG_NET_CLS_ACT */ -struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) +static struct tcf_common *tcf_police_lookup(u32 index) { - unsigned h; - struct tcf_police *p; + struct tcf_hashinfo *hinfo = &police_hash_info; + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} + +static u32 tcf_police_new_index(void) +{ + u32 *idx_gen = &police_idx_gen; + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_police_lookup(val)); + + return (*idx_gen = val); +} + +struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) +{ + unsigned int h; + struct tcf_police *police; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; @@ -423,149 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - p->refcnt++; - return p; - } + if (parm->index) { + struct tcf_common *pc; - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + pc = tcf_police_lookup(parm->index); + if (pc) { + police = to_police(pc); + police->tcf_refcnt++; + return police; + } + } + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (unlikely(!police)) return NULL; - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { - p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (p->R_tab == NULL) + police->tcfp_R_tab = + qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (police->tcfp_R_tab == NULL) goto failure; if (parm->peakrate.rate) { - p->P_tab = qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) + police->tcfp_P_tab = + qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (police->tcfp_P_tab == NULL) goto failure; } } if (tb[TCA_POLICE_RESULT-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) goto failure; - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } #endif - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<R_tab->rate.cell_log; + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_police_new_index(); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (est) - gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - h = tcf_police_hash(p->index); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - return p; + return police; failure: - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - kfree(p); + if (police->tcfp_R_tab) + qdisc_put_rtab(police->tcfp_R_tab); + kfree(police); return NULL; } -int tcf_police(struct sk_buff *skb, struct tcf_police *p) +int tcf_police(struct sk_buff *skb, struct tcf_police *police) { psched_time_t now; long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } EXPORT_SYMBOL(tcf_police); -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) { - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct tc_police opt; - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - if (p->R_tab) - opt.rate = p->R_tab->rate; + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -574,19 +585,20 @@ rtattr_failure: return -1; } -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) { struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, p->stats_lock, &d) < 0) + TCA_XSTATS, police->tcf_stats_lock, + &d) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &p->qstats) < 0) + gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 17105c82537f..8c1ab8ad8fa6 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -20,54 +20,175 @@ #define TCA_ACT_SIMP 22 -/* XXX: Hide all these common elements under some macro - * probably -*/ #include #include -/* use generic hash table with 8 buckets */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE]; +#define SIMP_TAB_MASK 7 +static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; +static u32 simp_idx_gen; static DEFINE_RWLOCK(simp_lock); -/* override the defaults */ -#define tcf_st tcf_defact -#define tc_st tc_defact -#define tcf_t_lock simp_lock -#define tcf_ht tcf_simp_ht - -#define CONFIG_NET_ACT_INIT 1 -#include -#include +struct tcf_hashinfo simp_hash_info = { + .htab = tcf_simp_ht, + .hmask = SIMP_TAB_MASK, + .lock = &simp_lock, +}; static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_defact *p = PRIV(a, defact); + struct tcf_defact *d = a->priv; - spin_lock(&p->lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.packets++; /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ - printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); - spin_unlock(&p->lock); - return p->action; + printk("simple: %s_%d\n", + (char *)d->tcfd_defdata, d->tcf_bstats.packets); + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static int tcf_simp_release(struct tcf_defact *d, int bind) +{ + int ret = 0; + if (d) { + if (bind) + d->tcf_bindcnt--; + d->tcf_refcnt--; + if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { + kfree(d->tcfd_defdata); + tcf_hash_destroy(&d->common, &simp_hash_info); + ret = 1; + } + } + return ret; +} + +static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); + if (unlikely(!d->tcfd_defdata)) + return -ENOMEM; + d->tcfd_datalen = datalen; + memcpy(d->tcfd_defdata, defdata, datalen); + return 0; +} + +static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + kfree(d->tcfd_defdata); + return alloc_defdata(d, datalen, defdata); +} + +static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_DEF_MAX]; + struct tc_defact *parm; + struct tcf_defact *d; + struct tcf_common *pc; + void *defdata; + u32 datalen = 0; + int ret = 0; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_DEF_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + + parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); + defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); + if (defdata == NULL) + return -EINVAL; + + datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); + if (datalen <= 0) + return -EINVAL; + + pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &simp_idx_gen, &simp_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_defact(pc); + ret = alloc_defdata(d, datalen, defdata); + if (ret < 0) { + kfree(pc); + return ret; + } + ret = ACT_P_CREATED; + } else { + d = to_defact(pc); + if (!ovr) { + tcf_simp_release(d, bind); + return -EEXIST; + } + realloc_defdata(d, datalen, defdata); + } + + spin_lock_bh(&d->tcf_lock); + d->tcf_action = parm->action; + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &simp_hash_info); + return ret; +} + +static inline int tcf_simp_cleanup(struct tc_action *a, int bind) +{ + struct tcf_defact *d = a->priv; + + if (d) + return tcf_simp_release(d, bind); + return 0; +} + +static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb->tail; + struct tcf_defact *d = a->priv; + struct tc_defact opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); + RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static struct tc_action_ops act_simp_ops = { - .kind = "simple", - .type = TCA_ACT_SIMP, - .capab = TCA_CAP_NONE, - .owner = THIS_MODULE, - .act = tcf_simp, - tca_use_default_ops + .kind = "simple", + .hinfo = &simp_hash_info, + .type = TCA_ACT_SIMP, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_simp, + .dump = tcf_simp_dump, + .cleanup = tcf_simp_cleanup, + .init = tcf_simp_init, + .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); From e0a1ad73d34fd6dfdb630479400511e9879069c0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:00:21 -0700 Subject: [PATCH 0492/1063] [IPv6] route: Simplify ip6_del_rt() Provide a simple ip6_del_rt() for the majority of users and an alternative for the exception via netlink. Avoids code obfuscation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_route.h | 5 +---- net/ipv6/addrconf.c | 6 +++--- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 18 ++++++++++++------ 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 01bfe404784f..a7e6086a2bd4 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -69,10 +69,7 @@ extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, void *rtattr, struct netlink_skb_parms *req); -extern int ip6_del_rt(struct rt6_info *, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req); +extern int ip6_del_rt(struct rt6_info *); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f1ede9004887..27f2e3309598 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -736,7 +736,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { rt->rt6i_expires = expires; @@ -1662,7 +1662,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (valid_lft == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else { rt->rt6i_expires = jiffies + rt_expires; @@ -3557,7 +3557,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->u.dst); break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5743e8bffefd..419d65163819 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -961,7 +961,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct rt6_info *rt; rt = rt6_get_dflt_router(saddr, dev); if (rt) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); } out: @@ -1114,7 +1114,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt && lifetime == 0) { neigh_clone(neigh); - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1aca787ead85..8d511de0db1b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -457,7 +457,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); if (rt && !lifetime) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } @@ -813,7 +813,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); else dst_release(dst); } @@ -1218,7 +1218,8 @@ out: return err; } -int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; struct fib6_table *table; @@ -1237,6 +1238,11 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct return err; } +int ip6_del_rt(struct rt6_info *rt) +{ + return __ip6_del_rt(rt, NULL, NULL, NULL); +} + static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req, u32 table_id) @@ -1271,7 +1277,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - return ip6_del_rt(rt, nlh, _rtattr, req); + return __ip6_del_rt(rt, nlh, _rtattr, req); } } read_unlock_bh(&table->tb6_lock); @@ -1395,7 +1401,7 @@ restart: call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); return; } @@ -1631,7 +1637,7 @@ restart: if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); goto restart; } } From 40e22e8f3d4d4f1ff68fb03683f007c53ee8b348 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:00:45 -0700 Subject: [PATCH 0493/1063] [IPv6] route: Simplify ip6_ins_rt() Provide a simple ip6_ins_rt() for the majority of users and an alternative for the exception via netlink. Avoids code obfuscation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_route.h | 5 +---- net/ipv6/addrconf.c | 2 +- net/ipv6/anycast.c | 2 +- net/ipv6/route.c | 19 ++++++++++++------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a7e6086a2bd4..172c4761e2bf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -65,10 +65,7 @@ extern int ip6_route_add(struct in6_rtmsg *rtmsg, void *rtattr, struct netlink_skb_parms *req, u32 table_id); -extern int ip6_ins_rt(struct rt6_info *, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req); +extern int ip6_ins_rt(struct rt6_info *); extern int ip6_del_rt(struct rt6_info *); extern int ip6_rt_addr_add(struct in6_addr *addr, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 27f2e3309598..aafba9ea9cb6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3548,7 +3548,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: - ip6_ins_rt(ifp->rt, NULL, NULL, NULL); + ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); break; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f6881d7a0385..abbc35a13e08 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) write_unlock_bh(&idev->lock); dst_hold(&rt->u.dst); - if (ip6_ins_rt(rt, NULL, NULL, NULL)) + if (ip6_ins_rt(rt)) dst_release(&rt->u.dst); addrconf_join_solict(dev, &aca->aca_addr); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d511de0db1b..9ec348a72a95 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -546,8 +546,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, be destroyed. */ -int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; struct fib6_table *table; @@ -560,6 +560,11 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, return err; } +int ip6_ins_rt(struct rt6_info *rt) +{ + return __ip6_ins_rt(rt, NULL, NULL, NULL); +} + static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -657,7 +662,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, NULL); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -752,7 +757,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, NULL); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -1206,7 +1211,7 @@ install_route: rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; - return ip6_ins_rt(rt, nlh, _rtattr, req); + return __ip6_ins_rt(rt, nlh, _rtattr, req); out: if (dev) @@ -1393,7 +1398,7 @@ restart: nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); - if (ip6_ins_rt(nrt, NULL, NULL, NULL)) + if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->u.dst; @@ -1483,7 +1488,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - ip6_ins_rt(nrt, NULL, NULL, NULL); + ip6_ins_rt(nrt); } out: dst_release(&rt->u.dst); From 86872cb57925c46a6499887d77afb880a892c0ec Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:01:08 -0700 Subject: [PATCH 0494/1063] [IPv6] route: FIB6 configuration using struct fib6_config Replaces the struct in6_rtmsg based interface orignating from the ioctl interface with a struct fib6_config based on. Allows changing the interface without breaking the ioctl interface and avoids passing on tons of parameters. The recently introduced struct nl_info is used to pass on netlink authorship information for notifications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 38 +++-- include/net/ip6_route.h | 6 +- net/ipv6/addrconf.c | 63 ++++---- net/ipv6/ip6_fib.c | 19 ++- net/ipv6/route.c | 329 +++++++++++++++++++++++----------------- 5 files changed, 257 insertions(+), 198 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 69c444209781..9610b887ffb5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,14 +16,35 @@ #ifdef __KERNEL__ #include - -#include -#include #include #include +#include +#include +#include struct rt6_info; +struct fib6_config +{ + u32 fc_table; + u32 fc_metric; + int fc_dst_len; + int fc_src_len; + int fc_ifindex; + u32 fc_flags; + u32 fc_protocol; + + struct in6_addr fc_dst; + struct in6_addr fc_src; + struct in6_addr fc_gateway; + + unsigned long fc_expires; + struct nlattr *fc_mx; + int fc_mx_len; + + struct nl_info fc_nlinfo; +}; + struct fib6_node { struct fib6_node *parent; @@ -175,18 +196,13 @@ extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh, - void *rtattr, - struct netlink_skb_parms *req); + struct nl_info *info); extern int fib6_del(struct rt6_info *rt, - struct nlmsghdr *nlh, - void *rtattr, - struct netlink_skb_parms *req); + struct nl_info *info); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nlmsghdr *nlh, - struct netlink_skb_parms *req); + struct nl_info *info); extern void fib6_run_gc(unsigned long dummy); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 172c4761e2bf..3f170f667c7b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -60,11 +60,7 @@ extern void ip6_route_cleanup(void); extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); -extern int ip6_route_add(struct in6_rtmsg *rtmsg, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req, - u32 table_id); +extern int ip6_route_add(struct fib6_config *cfg); extern int ip6_ins_rt(struct rt6_info *); extern int ip6_del_rt(struct rt6_info *); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index aafba9ea9cb6..fc9cff3426c4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1509,59 +1509,56 @@ static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_PREFIX, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_expires = expires, + .fc_dst_len = plen, + .fc_flags = RTF_UP | flags, + }; - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx); - rtmsg.rtmsg_dst_len = plen; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_info = expires; - rtmsg.rtmsg_flags = RTF_UP|flags; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&cfg.fc_dst, pfx); /* Prevent useless cloning on PtP SIT. This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ - if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) - rtmsg.rtmsg_flags |= RTF_NONEXTHOP; + if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) + cfg.fc_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX); + ip6_route_add(&cfg); } /* Create "default" multicast route to the interface */ static void addrconf_add_mroute(struct net_device *dev) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_LOCAL, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 8, + .fc_flags = RTF_UP, + }; - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_set(&rtmsg.rtmsg_dst, - htonl(0xFF000000), 0, 0, 0); - rtmsg.rtmsg_dst_len = 8; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_flags = RTF_UP; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL); + ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); + + ip6_route_add(&cfg); } static void sit_route_add(struct net_device *dev) { - struct in6_rtmsg rtmsg; - - memset(&rtmsg, 0, sizeof(rtmsg)); - - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_MAIN, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 96, + .fc_flags = RTF_UP | RTF_NONEXTHOP, + }; /* prefix length - 96 bits "::d.d.d.d" */ - rtmsg.rtmsg_dst_len = 96; - rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; - rtmsg.rtmsg_ifindex = dev->ifindex; - - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN); + ip6_route_add(&cfg); } static void addrconf_add_lroute(struct net_device *dev) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index be36f4acda94..667b1b1ea25d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -610,7 +610,7 @@ insert_above: */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nlmsghdr *nlh, struct netlink_skb_parms *req) + struct nl_info *info) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -665,7 +665,7 @@ out: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); + inet6_rt_notify(RTM_NEWROUTE, rt, info); rt6_stats.fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { @@ -695,8 +695,7 @@ void fib6_force_start_gc(void) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn; int err = -ENOMEM; @@ -769,7 +768,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, nlh, req); + err = fib6_add_rt2node(fn, rt, info); if (err == 0) { fib6_start_gc(rt); @@ -1076,7 +1075,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) } static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) + struct nl_info *info) { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; @@ -1132,11 +1131,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (atomic_read(&rt->rt6i_ref) != 1) BUG(); } - inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); + inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); } -int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_del(struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -1161,7 +1160,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { if (*rtp == rt) { - fib6_del_route(fn, rtp, nlh, _rtattr, req); + fib6_del_route(fn, rtp, info); return 0; } } @@ -1290,7 +1289,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL, NULL, NULL); + res = fib6_del(rt, NULL); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ec348a72a95..7bcffa6ddba3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -546,15 +546,14 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req); + err = fib6_add(&table->tb6_root, rt, info); write_unlock_bh(&table->tb6_lock); return err; @@ -562,7 +561,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL, NULL, NULL); + return __ip6_ins_rt(rt, NULL); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -1014,30 +1013,24 @@ int ipv6_get_hoplimit(struct net_device *dev) * */ -int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req, - u32 table_id) +int ip6_route_add(struct fib6_config *cfg) { int err; - struct rtmsg *r; - struct rtattr **rta; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; int addr_type; - rta = (struct rtattr **) _rtattr; - - if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) return -EINVAL; #ifndef CONFIG_IPV6_SUBTREES - if (rtmsg->rtmsg_src_len) + if (cfg->fc_src_len) return -EINVAL; #endif - if (rtmsg->rtmsg_ifindex) { + if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(rtmsg->rtmsg_ifindex); + dev = dev_get_by_index(cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1045,10 +1038,10 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto out; } - if (rtmsg->rtmsg_metric == 0) - rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + if (cfg->fc_metric == 0) + cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(table_id); + table = fib6_new_table(cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; @@ -1062,14 +1055,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); - if (nlh && (r = NLMSG_DATA(nlh))) { - rt->rt6i_protocol = r->rtm_protocol; - } else { - rt->rt6i_protocol = RTPROT_BOOT; - } + rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); - addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); + if (cfg->fc_protocol == RTPROT_UNSPEC) + cfg->fc_protocol = RTPROT_BOOT; + rt->rt6i_protocol = cfg->fc_protocol; + + addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) rt->u.dst.input = ip6_mc_input; @@ -1078,24 +1070,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, rt->u.dst.output = ip6_output; - ipv6_addr_prefix(&rt->rt6i_dst.addr, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); - rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; + ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) rt->u.dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); - rt->rt6i_src.plen = rtmsg->rtmsg_src_len; + ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->rt6i_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = rtmsg->rtmsg_metric; + rt->rt6i_metric = cfg->fc_metric; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes */ - if ((rtmsg->rtmsg_flags&RTF_REJECT) || + if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != &loopback_dev) { @@ -1118,12 +1108,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto install_route; } - if (rtmsg->rtmsg_flags & RTF_GATEWAY) { + if (cfg->fc_flags & RTF_GATEWAY) { struct in6_addr *gw_addr; int gwa_type; - gw_addr = &rtmsg->rtmsg_gateway; - ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); + gw_addr = &cfg->fc_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -1140,7 +1130,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); + grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1172,7 +1162,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (dev == NULL) goto out; - if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { + if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); if (IS_ERR(rt->rt6i_nexthop)) { err = PTR_ERR(rt->rt6i_nexthop); @@ -1181,24 +1171,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } } - rt->rt6i_flags = rtmsg->rtmsg_flags; + rt->rt6i_flags = cfg->fc_flags; install_route: - if (rta && rta[RTA_METRICS-1]) { - int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); - struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) { + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) { err = -EINVAL; goto out; } - rt->u.dst.metrics[flavor-1] = - *(u32 *)RTA_DATA(attr); + + rt->u.dst.metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } @@ -1211,7 +1201,7 @@ install_route: rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; - return __ip6_ins_rt(rt, nlh, _rtattr, req); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: if (dev) @@ -1223,8 +1213,7 @@ out: return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; @@ -1235,7 +1224,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, nlh, _rtattr, req); + err = fib6_del(rt, info); dst_release(&rt->u.dst); write_unlock_bh(&table->tb6_lock); @@ -1245,44 +1234,41 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL, NULL, NULL); + return __ip6_del_rt(rt, NULL); } -static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req, - u32 table_id) +static int ip6_route_del(struct fib6_config *cfg) { struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(table_id); + table = fib6_get_table(cfg->fc_table); if (table == NULL) return err; read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); + &cfg->fc_dst, cfg->fc_dst_len, + &cfg->fc_src, cfg->fc_src_len); if (fn) { for (rt = fn->leaf; rt; rt = rt->u.next) { - if (rtmsg->rtmsg_ifindex && + if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || - rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) + rt->rt6i_dev->ifindex != cfg->fc_ifindex)) continue; - if (rtmsg->rtmsg_flags&RTF_GATEWAY && - !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) continue; - if (rtmsg->rtmsg_metric && - rtmsg->rtmsg_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - return __ip6_del_rt(rt, nlh, _rtattr, req); + return __ip6_del_rt(rt, &cfg->fc_nlinfo); } } read_unlock_bh(&table->tb6_lock); @@ -1565,21 +1551,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct in6_addr *gwaddr, int ifindex, unsigned pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_INFO, + .fc_metric = 1024, + .fc_ifindex = ifindex, + .fc_dst_len = prefixlen, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | + RTF_UP | RTF_PREF(pref), + }; + + ipv6_addr_copy(&cfg.fc_dst, prefix); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - memset(&rtmsg, 0, sizeof(rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); - rtmsg.rtmsg_dst_len = prefixlen; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); /* We should treat it as a default route if prefix length is 0. */ if (!prefixlen) - rtmsg.rtmsg_flags |= RTF_DEFAULT; - rtmsg.rtmsg_ifindex = ifindex; + cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO); + ip6_route_add(&cfg); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1611,18 +1599,18 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_DFLT, + .fc_metric = 1024, + .fc_ifindex = dev->ifindex, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | + RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + }; - memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | - RTF_PREF(pref); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - rtmsg.rtmsg_ifindex = dev->ifindex; + ip6_route_add(&cfg); - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT); return rt6_get_dflt_router(gwaddr, dev); } @@ -1649,8 +1637,27 @@ restart: read_unlock_bh(&table->tb6_lock); } +static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, + struct fib6_config *cfg) +{ + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_ifindex = rtmsg->rtmsg_ifindex; + cfg->fc_metric = rtmsg->rtmsg_metric; + cfg->fc_expires = rtmsg->rtmsg_info; + cfg->fc_dst_len = rtmsg->rtmsg_dst_len; + cfg->fc_src_len = rtmsg->rtmsg_src_len; + cfg->fc_flags = rtmsg->rtmsg_flags; + + ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); + ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); + ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); +} + int ipv6_route_ioctl(unsigned int cmd, void __user *arg) { + struct fib6_config cfg; struct in6_rtmsg rtmsg; int err; @@ -1663,16 +1670,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) sizeof(struct in6_rtmsg)); if (err) return -EFAULT; - + + rtmsg_to_fib6_config(&rtmsg, &cfg); + rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL, NULL, NULL, - RT6_TABLE_MAIN); + err = ip6_route_add(&cfg); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL, NULL, NULL, - RT6_TABLE_MAIN); + err = ip6_route_del(&cfg); break; default: err = -EINVAL; @@ -1823,66 +1830,104 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) fib6_clean_all(rt6_mtu_change_route, 0, &arg); } -static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, - struct in6_rtmsg *rtmsg) +static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { + [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, +}; + +static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib6_config *cfg) { - memset(rtmsg, 0, sizeof(*rtmsg)); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int err; - rtmsg->rtmsg_dst_len = r->rtm_dst_len; - rtmsg->rtmsg_src_len = r->rtm_src_len; - rtmsg->rtmsg_flags = RTF_UP; - if (r->rtm_type == RTN_UNREACHABLE) - rtmsg->rtmsg_flags |= RTF_REJECT; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; - if (rta[RTA_GATEWAY-1]) { - if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); - rtmsg->rtmsg_flags |= RTF_GATEWAY; + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = rtm->rtm_table; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_flags = RTF_UP; + cfg->fc_protocol = rtm->rtm_protocol; + + if (rtm->rtm_type == RTN_UNREACHABLE) + cfg->fc_flags |= RTF_REJECT; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + if (tb[RTA_GATEWAY]) { + nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_flags |= RTF_GATEWAY; } - if (rta[RTA_DST-1]) { - if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); + + if (tb[RTA_DST]) { + int plen = (rtm->rtm_dst_len + 7) >> 3; + + if (nla_len(tb[RTA_DST]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); } - if (rta[RTA_SRC-1]) { - if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); + + if (tb[RTA_SRC]) { + int plen = (rtm->rtm_src_len + 7) >> 3; + + if (nla_len(tb[RTA_SRC]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); } - if (rta[RTA_OIF-1]) { - if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) - return -EINVAL; - memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + + if (tb[RTA_OIF]) + cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); + + if (tb[RTA_PRIORITY]) + cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); + + if (tb[RTA_METRICS]) { + cfg->fc_mx = nla_data(tb[RTA_METRICS]); + cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); } - if (rta[RTA_PRIORITY-1]) { - if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - } - return 0; + + if (tb[RTA_TABLE]) + cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + + err = 0; +errout: + return err; } int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), - rtm_get_table(arg, r->rtm_table)); + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return ip6_route_del(&cfg); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), - rtm_get_table(arg, r->rtm_table)); + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return ip6_route_add(&cfg); } static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, @@ -2063,15 +2108,21 @@ out_free: goto out; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, - struct netlink_skb_parms *req) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; - u32 seq = nlh ? nlh->nlmsg_seq : 0; + u32 pid = 0, seq = 0; + struct nlmsghdr *nlh = NULL; int payload = sizeof(struct rtmsg) + 256; int err = -ENOBUFS; + if (info) { + pid = info->pid; + nlh = info->nlh; + if (nlh) + seq = nlh->nlmsg_seq; + } + skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); if (skb == NULL) goto errout; From 2d7202bfdd28687073f5efef8d2f51bbab0af867 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:01:27 -0700 Subject: [PATCH 0495/1063] [IPv6] route: Convert FIB6 dumping to use new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 31 +++++++++++++++----------- net/ipv6/route.c | 52 ++++++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dfc58269240a..eeff0b23e944 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -188,22 +188,27 @@ void rtnl_set_sk_err(u32 group, int error) int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { - struct rtattr *mx = (struct rtattr*)skb->tail; - int i; + struct nlattr *mx; + int i, valid = 0; - RTA_PUT(skb, RTA_METRICS, 0, NULL); - for (i=0; irta_len = skb->tail - (u8*)mx; - if (mx->rta_len == RTA_LENGTH(0)) - skb_trim(skb, (u8*)mx - skb->data); - return 0; -rtattr_failure: - skb_trim(skb, (u8*)mx - skb->data); - return -1; + if (!valid) + goto nla_put_failure; + + return nla_nest_end(skb, mx); + +nla_put_failure: + return nla_nest_cancel(skb, mx); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7bcffa6ddba3..f0a66de84331 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1936,8 +1936,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, int prefix, unsigned int flags) { struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct nlmsghdr *nlh; struct rta_cacheinfo ci; u32 table; @@ -1948,8 +1947,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } } - nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + if (nlh == NULL) + return -ENOBUFS; + + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET6; rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_src_len = rt->rt6i_src.plen; @@ -1959,7 +1961,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, else table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; - RTA_PUT_U32(skb, RTA_TABLE, table); + NLA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) @@ -1980,31 +1982,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - RTA_PUT(skb, RTA_DST, 16, dst); + NLA_PUT(skb, RTA_DST, 16, dst); rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); + NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); #ifdef CONFIG_IPV6_SUBTREES if (src) { - RTA_PUT(skb, RTA_SRC, 16, src); + NLA_PUT(skb, RTA_SRC, 16, src); rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len) - RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); + NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif if (iif) - RTA_PUT(skb, RTA_IIF, 4, &iif); + NLA_PUT_U32(skb, RTA_IIF, iif); else if (dst) { struct in6_addr saddr_buf; if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) - RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } + if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + if (rt->u.dst.neighbour) - RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); - RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); + NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); + + NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); if (rt->rt6i_expires) ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); @@ -2016,14 +2022,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, ci.rta_id = 0; ci.rta_ts = 0; ci.rta_tsage = 0; - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } int rt6_dump_route(struct rt6_info *rt, void *p_arg) @@ -2031,8 +2035,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; int prefix; - if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { - struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); + if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { + struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; } else prefix = 0; From ab364a6f96bad9625bdb97b5688c76c44eb1e96e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:01:47 -0700 Subject: [PATCH 0496/1063] [IPv6] route: Convert GETROUTE to use new netlink api Fixes various unvalidated netlink attributes causing memory corruptions when left empty by userspace applications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/route.c | 94 ++++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f0a66de84331..5d6e9083ca2c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1833,6 +1833,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, }; @@ -2048,16 +2049,54 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtattr **rta = arg; - int iif = 0; - int err = -ENOBUFS; - struct sk_buff *skb; - struct flowi fl; + struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; + struct sk_buff *skb; + struct rtmsg *rtm; + struct flowi fl; + int err, iif = 0; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + memset(&fl, 0, sizeof(fl)); + + if (tb[RTA_SRC]) { + if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) + goto errout; + + ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); + } + + if (tb[RTA_DST]) { + if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) + goto errout; + + ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + } + + if (tb[RTA_IIF]) + iif = nla_get_u32(tb[RTA_IIF]); + + if (tb[RTA_OIF]) + fl.oif = nla_get_u32(tb[RTA_OIF]); + + if (iif) { + struct net_device *dev; + dev = __dev_get_by_index(iif); + if (!dev) { + err = -ENODEV; + goto errout; + } + } skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto out; + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2065,51 +2104,20 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->mac.raw = skb->data; skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - memset(&fl, 0, sizeof(fl)); - if (rta[RTA_SRC-1]) - ipv6_addr_copy(&fl.fl6_src, - (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); - if (rta[RTA_DST-1]) - ipv6_addr_copy(&fl.fl6_dst, - (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); - - if (rta[RTA_IIF-1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - - if (iif) { - struct net_device *dev; - dev = __dev_get_by_index(iif); - if (!dev) { - err = -ENODEV; - goto out_free; - } - } - - fl.oif = 0; - if (rta[RTA_OIF-1]) - memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); - - rt = (struct rt6_info*)ip6_route_output(NULL, &fl); - + rt = (struct rt6_info*) ip6_route_output(NULL, &fl); skb->dst = &rt->u.dst; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt6_fill_node(skb, rt, - &fl.fl6_dst, &fl.fl6_src, - iif, + err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0); if (err < 0) { - err = -EMSGSIZE; - goto out_free; + kfree_skb(skb); + goto errout; } err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -out: +errout: return err; -out_free: - kfree_skb(skb); - goto out; } void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) From 72d3b2c970a2d5d2ccb1d1cab4fb76663c4f2e49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Aug 2006 00:13:07 -0700 Subject: [PATCH 0497/1063] [IPV6]: Fixup ip6_del_rt() call for new args. Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index abbc35a13e08..b80fc502ca03 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -378,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->u.dst); - if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL)) + if (ip6_del_rt(aca->aca_rt)) dst_free(&aca->aca_rt->u.dst); else dst_release(&aca->aca_rt->u.dst); From ac0b04627269ff16c3c7ab854a65fe6780c6e3e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 00:15:33 -0700 Subject: [PATCH 0498/1063] [SCTP]: Extend /proc/net/sctp/snmp to provide more statistics. This patch adds more statistics info under /proc/net/sctp/snmp that should be useful for debugging. The additional events that are counted now include timer expirations, retransmits, packet and data chunk discards. The Data chunk discards include all the cases where a data chunk is discarded including high tsn, bad stream, dup tsn and the most useful one(out of receive buffer/rwnd). Also moved the SCTP MIB data structures from the generic include directories to include/sctp/sctp.h. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/snmp.h | 33 ------------------------------- include/net/sctp/sctp.h | 44 +++++++++++++++++++++++++++++++++++++++++ include/net/snmp.h | 6 ------ net/sctp/input.c | 8 ++++++-- net/sctp/inqueue.c | 4 ++-- net/sctp/outqueue.c | 6 +++++- net/sctp/proc.c | 17 +++++++++++++++- net/sctp/sm_statefuns.c | 15 ++++++++++++++ 8 files changed, 88 insertions(+), 45 deletions(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 30156556f78d..854aa6b543f1 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -160,39 +160,6 @@ enum __UDP_MIB_MAX }; -/* sctp mib definitions */ -/* - * draft-ietf-sigtran-sctp-mib-07.txt - */ -enum -{ - SCTP_MIB_NUM = 0, - SCTP_MIB_CURRESTAB, /* CurrEstab */ - SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ - SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ - SCTP_MIB_ABORTEDS, /* Aborteds */ - SCTP_MIB_SHUTDOWNS, /* Shutdowns */ - SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ - SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ - SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ - SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ - SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ - SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ - SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ - SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ - SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ - SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ - SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ - SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ - SCTP_MIB_RTOALGORITHM, /* RtoAlgorithm */ - SCTP_MIB_RTOMIN, /* RtoMin */ - SCTP_MIB_RTOMAX, /* RtoMax */ - SCTP_MIB_RTOINITIAL, /* RtoInitial */ - SCTP_MIB_VALCOOKIELIFE, /* ValCookieLife */ - SCTP_MIB_MAXINITRETR, /* MaxInitRetr */ - __SCTP_MIB_MAX -}; - /* linux mib definitions */ enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1c1abce5f6b6..e274fd479990 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -216,6 +216,50 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); #endif /* !TEST_FRAME */ +/* sctp mib definitions */ +enum +{ + SCTP_MIB_NUM = 0, + SCTP_MIB_CURRESTAB, /* CurrEstab */ + SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ + SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ + SCTP_MIB_ABORTEDS, /* Aborteds */ + SCTP_MIB_SHUTDOWNS, /* Shutdowns */ + SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ + SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ + SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ + SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ + SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ + SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ + SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ + SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ + SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ + SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ + SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ + SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ + SCTP_MIB_T1_INIT_EXPIREDS, + SCTP_MIB_T1_COOKIE_EXPIREDS, + SCTP_MIB_T2_SHUTDOWN_EXPIREDS, + SCTP_MIB_T3_RTX_EXPIREDS, + SCTP_MIB_T4_RTO_EXPIREDS, + SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, + SCTP_MIB_DELAY_SACK_EXPIREDS, + SCTP_MIB_AUTOCLOSE_EXPIREDS, + SCTP_MIB_T3_RETRANSMITS, + SCTP_MIB_PMTUD_RETRANSMITS, + SCTP_MIB_FAST_RETRANSMITS, + SCTP_MIB_IN_PKT_SOFTIRQ, + SCTP_MIB_IN_PKT_BACKLOG, + SCTP_MIB_IN_PKT_DISCARDS, + SCTP_MIB_IN_DATA_CHUNK_DISCARDS, + __SCTP_MIB_MAX +}; + +#define SCTP_MIB_MAX __SCTP_MIB_MAX +struct sctp_mib { + unsigned long mibs[SCTP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + /* Print debugging messages. */ #if SCTP_DEBUG diff --git a/include/net/snmp.h b/include/net/snmp.h index a36bed8ea210..464970e39ec0 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -100,12 +100,6 @@ struct udp_mib { unsigned long mibs[UDP_MIB_MAX]; } __SNMP_MIB_ALIGN__; -/* SCTP */ -#define SCTP_MIB_MAX __SCTP_MIB_MAX -struct sctp_mib { - unsigned long mibs[SCTP_MIB_MAX]; -} __SNMP_MIB_ALIGN__; - /* Linux */ #define LINUX_MIB_MAX __LINUX_MIB_MAX struct linux_mib { diff --git a/net/sctp/input.c b/net/sctp/input.c index 42b66e74bbb5..8a34d95602ce 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk)) { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); - else + } else { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); + } sctp_bh_unlock_sock(sk); @@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf0c767d43ae..cf6deed7e849 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ -void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) +void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ @@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - list_add_tail(&packet->list, &q->in_chunk_list); + list_add_tail(&chunk->list, &q->in_chunk_list); q->immediate.func(q->immediate.data); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 30b710c54e64..37074a39ecbb 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, switch(reason) { case SCTP_RTXR_T3_RTX: + SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); break; case SCTP_RTXR_FAST_RTX: + SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: - default: + SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); break; + default: + BUG(); } sctp_retransmit_mark(q, transport, fast_retransmit); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5b3b0e0ae7e5..a356d8d310a9 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), + SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), + SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), + SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), + SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), + SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), + SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), + SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), + SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), + SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), + SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), + SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), + SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), + SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), + SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; @@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, - (sk->sk_rcvbuf - assoc->rwnd), assoc->sndbuf_used, + (sk->sk_rcvbuf - assoc->rwnd), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5b5ae7958322..32f57f42af9e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2663,9 +2663,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3652,6 +3654,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4548,6 +4551,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; + SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4616,6 +4621,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -4650,6 +4656,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -4709,6 +4716,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -4753,6 +4761,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4814,6 +4824,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in * RFC2960 [5] section 8.1 and 8.2. @@ -4880,6 +4892,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -4910,6 +4923,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; + SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper * layer, the endpoint enters SHUTDOWN-PENDING state and From df7deeb5402087ea0387173aaf067d37a264a8f0 Mon Sep 17 00:00:00 2001 From: Vladislav Yasevich Date: Tue, 22 Aug 2006 00:19:51 -0700 Subject: [PATCH 0499/1063] [SCTP]: Cleanup nomem handling in the state functions. This patch cleans up the "nomem" conditions that may occur during the processing by the state machine functions. In most cases we delay adding side-effect commands until all memory allocations are done. Signed-off-by: Vladislav Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 161 ++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 74 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 32f57f42af9e..1c42fe983a5b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 0, 0, 0, GFP_ATOMIC); - if (!ev) - goto nomem; - - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + if (ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint * will verify that it is in SHUTDOWN-ACK-SENT state, if it is @@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* @@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, GFP_ATOMIC)) goto nomem_init; - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, @@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sizeof(sctp_chunkhdr_t); if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) - goto nomem_ack; + goto nomem_init; repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) - goto nomem_ack; + goto nomem_init; /* If there are errors need to be reported for unknown parameters, * include them in the outgoing INIT ACK as "Unrecognized parameter" @@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sctp_chunk_free(err_chunk); } + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* @@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DELETE_TCB; -nomem_ack: - if (err_chunk) - sctp_chunk_free(err_chunk); nomem_init: sctp_association_free(new_asoc); nomem: + if (err_chunk) + sctp_chunk_free(err_chunk); return SCTP_DISPOSITION_NOMEM; } @@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, struct sctp_association *new_asoc; sctp_init_chunk_t *peer_init; struct sctp_chunk *repl; - struct sctp_ulpevent *ev; + struct sctp_ulpevent *ev, *ai_ev = NULL; int error = 0; struct sctp_chunk *err_chk_p; @@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, }; } - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, - SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); - sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - - /* Re-build the bind address for the association is done in + /* Delay state machine commands until later. + * + * Re-build the bind address for the association is done in * the sctp_unpack_cookie() already. */ /* This is a brand-new association, so these are not yet side @@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) - goto nomem_repl; - - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + goto nomem_init; /* RFC 2960 5.1 Normal Establishment of an Association * @@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaption layer. */ if (new_asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(new_asoc, + ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc, GFP_ATOMIC); - if (!ev) - goto nomem_ev; - - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); + if (!ai_ev) + goto nomem_aiev; } + /* Add all the state machine commands now since we've created + * everything. This way we don't introduce memory corruptions + * during side-effect processing and correclty count established + * associations. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); + + if (new_asoc->autoclose) + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); + + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); + + /* This will send the COOKIE ACK */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + + /* Queue the ASSOC_CHANGE event */ + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + + /* Send up the Adaptation Layer Indication event */ + if (ai_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ai_ev)); + return SCTP_DISPOSITION_CONSUME; +nomem_aiev: + sctp_ulpevent_free(ev); nomem_ev: sctp_chunk_free(repl); -nomem_repl: nomem_init: sctp_association_free(new_asoc); nomem: @@ -1360,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, sctp_source(chunk), (sctp_init_chunk_t *)chunk->chunk_hdr, - GFP_ATOMIC)) { - retval = SCTP_DISPOSITION_NOMEM; - goto nomem_init; - } + GFP_ATOMIC)) + goto nomem; /* Make sure no new addresses are being added during the * restart. Do not do this check for COOKIE-WAIT state, @@ -1374,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { retval = SCTP_DISPOSITION_CONSUME; - goto cleanup_asoc; + goto nomem_retval; } } @@ -1430,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); retval = SCTP_DISPOSITION_CONSUME; + return retval; + +nomem: + retval = SCTP_DISPOSITION_NOMEM; +nomem_retval: + if (new_asoc) + sctp_association_free(new_asoc); cleanup: if (err_chunk) sctp_chunk_free(err_chunk); return retval; -nomem: - retval = SCTP_DISPOSITION_NOMEM; - goto cleanup; -nomem_init: -cleanup_asoc: - sctp_association_free(new_asoc); - goto cleanup; } /* @@ -1611,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, */ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); - /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - /* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, new_asoc->c.sinit_num_ostreams, @@ -1628,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); return SCTP_DISPOSITION_CONSUME; @@ -1751,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL; + struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -1778,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, * SCTP user upon reception of a valid COOKIE * ECHO chunk. */ - ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, + ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, - new_asoc->c.sinit_num_ostreams, - new_asoc->c.sinit_max_instreams, + asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, GFP_ATOMIC); if (!ev) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter, * SCTP delivers this notification to inform the application * that of the peers requested adaption layer. */ - if (new_asoc->peer.adaption_ind) { - ev = sctp_ulpevent_make_adaption_indication(new_asoc, + if (asoc->peer.adaption_ind) { + ai_ev = sctp_ulpevent_make_adaption_indication(asoc, GFP_ATOMIC); - if (!ev) + if (!ai_ev) goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); } } sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); @@ -1809,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, if (!repl) goto nomem; + if (ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); + if (ai_ev) + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ai_ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; nomem: + if (ai_ev) + sctp_ulpevent_free(ai_ev); if (ev) sctp_ulpevent_free(ev); return SCTP_DISPOSITION_NOMEM; @@ -3019,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); - /* 10.2 H) SHUTDOWN COMPLETE notification * * When SCTP completes the shutdown procedures (section 9.2) this @@ -3030,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!ev) goto nomem; + /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ + reply = sctp_make_shutdown_complete(asoc, chunk); + if (!reply) + goto nomem_chunk; + + /* Do all the commands now (after allocation), so that we + * have consistent state if memory allocation failes + */ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall @@ -3041,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ - reply = sctp_make_shutdown_complete(asoc, chunk); - if (!reply) - goto nomem; - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); @@ -3056,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; +nomem_chunk: + sctp_ulpevent_free(ev); nomem: return SCTP_DISPOSITION_NOMEM; } From eb5fa39f5ef490c72901b547ac5e7211efd47d56 Mon Sep 17 00:00:00 2001 From: Vladislav Yasevich Date: Tue, 22 Aug 2006 00:23:13 -0700 Subject: [PATCH 0500/1063] [SCTP]: Fix IPv6 address flag setting when doing peel-off/accept. During accept/peeloff we try to copy the list of bound addresses from the original endpoint to the new one. However, we forgot to set the flag to say that IPv6 is allowed on the new endpoint. Signed-off-by: Vladislav Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 85caf7963886..30d2dbeebb43 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5619,6 +5619,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ + if (PF_INET6 == assoc->base.sk->sk_family) + flags = SCTP_ADDR6_ALLOWED; if (assoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (assoc->peer.ipv6_address) From 8abfedd889e46ad4977dfcdab737edf5c5803c62 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 00:24:09 -0700 Subject: [PATCH 0501/1063] [SCTP]: Use the flags value that is passed as an arg to sctp_accept. No need to do multiple dereferences - sk->sk_socket->file->f_flags Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 30d2dbeebb43..3b6e82cb372f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2970,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) goto out; } - timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) From 131852176c1f5b4350b4af811d1836db387d0c61 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Tue, 22 Aug 2006 00:28:33 -0700 Subject: [PATCH 0502/1063] [TG3]: Convert the pci_device_id table to PCI_DEVICE() Convert the pci_device_ids to PCI_DEVICE() macro. Saves 1.5k in the sourcefile. Signed-off-by: Henrik Kretzschmar Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 167 ++++++++++++++++------------------------------ 1 file changed, 56 insertions(+), 111 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 34078a7c1a84..fb7026153861 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -149,117 +149,62 @@ module_param(tg3_debug, int, 0); MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value"); static struct pci_device_id tg3_pci_tbl[] = { - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, - { 0, } + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)}, + {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, + {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, + {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, + {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001)}, + {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003)}, + {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100)}, + {PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3)}, + {} }; MODULE_DEVICE_TABLE(pci, tg3_pci_tbl); From 9ba1627617d396135a4d679542a3623d5819e628 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:29:37 -0700 Subject: [PATCH 0503/1063] [NETFILTER]: x_tables: replace IPv4 dscp match by address family independent version This replaces IPv4 dscp match by address family independent version. This also - utilizes dsfield.h to get the DS field in IPv4/IPv6 header, and - checks for the DSCP value from user space. - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_dscp.h | 23 +++++ include/linux/netfilter_ipv4/ipt_dscp.h | 14 ++- net/ipv4/netfilter/Kconfig | 11 --- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_dscp.c | 54 ----------- net/netfilter/Kconfig | 11 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_dscp.c | 113 ++++++++++++++++++++++++ 8 files changed, 154 insertions(+), 74 deletions(-) create mode 100644 include/linux/netfilter/xt_dscp.h delete mode 100644 net/ipv4/netfilter/ipt_dscp.c create mode 100644 net/netfilter/xt_dscp.c diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h new file mode 100644 index 000000000000..1da61e6acaf7 --- /dev/null +++ b/include/linux/netfilter/xt_dscp.h @@ -0,0 +1,23 @@ +/* x_tables module for matching the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp +*/ +#ifndef _XT_DSCP_H +#define _XT_DSCP_H + +#define XT_DSCP_MASK 0xfc /* 11111100 */ +#define XT_DSCP_SHIFT 2 +#define XT_DSCP_MAX 0x3f /* 00111111 */ + +/* match info */ +struct xt_dscp_info { + u_int8_t dscp; + u_int8_t invert; +}; + +#endif /* _XT_DSCP_H */ diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h index 2fa6dfe92894..4b82ca912b0e 100644 --- a/include/linux/netfilter_ipv4/ipt_dscp.h +++ b/include/linux/netfilter_ipv4/ipt_dscp.h @@ -10,14 +10,12 @@ #ifndef _IPT_DSCP_H #define _IPT_DSCP_H -#define IPT_DSCP_MASK 0xfc /* 11111100 */ -#define IPT_DSCP_SHIFT 2 -#define IPT_DSCP_MAX 0x3f /* 00111111 */ +#include -/* match info */ -struct ipt_dscp_info { - u_int8_t dscp; - u_int8_t invert; -}; +#define IPT_DSCP_MASK XT_DSCP_MASK +#define IPT_DSCP_SHIFT XT_DSCP_SHIFT +#define IPT_DSCP_MAX XT_DSCP_MAX + +#define ipt_dscp_info xt_dscp_info #endif /* _IPT_DSCP_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ef0b5aac5838..d88d71d1ce0d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_DSCP - tristate "DSCP match support" - depends on IP_NF_IPTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_AH tristate "AH match support" depends on IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3ded4a3af59c..b946b0f3ea9d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 47177591aeb6..000000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null @@ -1,54 +0,0 @@ -/* IP tables module for matching the value of the IPv4 DSCP field - * - * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("iptables DSCP matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_dscp_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; -} - -static struct ipt_match dscp_match = { - .name = "dscp", - .match = match, - .matchsize = sizeof(struct ipt_dscp_info), - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_match(&dscp_match); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_match(&dscp_match); - -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a9894ddfd72a..f781405f5d65 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -263,6 +263,17 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_DSCP + tristate '"DSCP" match support' + depends on NETFILTER_XTABLES + help + This option adds a `DSCP' match, which allows you to match against + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_ESP tristate '"ESP" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6fa4b7580458..0b8a70c1df46 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c new file mode 100644 index 000000000000..82e250d1f007 --- /dev/null +++ b/net/netfilter/xt_dscp.c @@ -0,0 +1,113 @@ +/* IP tables module for matching the value of the IPv4/IPv6 DSCP field + * + * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp + * + * (C) 2002 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("x_tables DSCP matching module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_dscp"); +MODULE_ALIAS("ip6t_dscp"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int match6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_dscp_info *info = matchinfo; + u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; + + return (dscp == info->dscp) ^ !!info->invert; +} + +static int checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; + + if (dscp > XT_DSCP_MAX) { + printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); + return 0; + } + + return 1; +} + +static struct xt_match dscp_match = { + .name = "dscp", + .match = match, + .checkentry = checkentry, + .matchsize = sizeof(struct xt_dscp_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match dscp6_match = { + .name = "dscp", + .match = match6, + .checkentry = checkentry, + .matchsize = sizeof(struct xt_dscp_info), + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static int __init xt_dscp_match_init(void) +{ + int ret; + ret = xt_register_match(&dscp_match); + if (ret) + return ret; + + ret = xt_register_match(&dscp6_match); + if (ret) + xt_unregister_match(&dscp_match); + + return ret; +} + +static void __exit xt_dscp_match_fini(void) +{ + xt_unregister_match(&dscp_match); + xt_unregister_match(&dscp6_match); +} + +module_init(xt_dscp_match_init); +module_exit(xt_dscp_match_fini); From a468701db58a8b3e08e3f55fa6ac66db42014922 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 22 Aug 2006 00:30:26 -0700 Subject: [PATCH 0504/1063] [NETFILTER]: x_tables: replace IPv4 DSCP target by address family independent version This replaces IPv4 DSCP target by address family independent version. This also - utilizes dsfield.h to get/mangle DS field in IPv4/IPv6 header - fixes Kconfig help text. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_DSCP.h | 20 ++++ include/linux/netfilter_ipv4/ipt_DSCP.h | 6 +- net/ipv4/netfilter/Kconfig | 11 -- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_DSCP.c | 96 ----------------- net/netfilter/Kconfig | 12 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_DSCP.c | 130 ++++++++++++++++++++++++ 8 files changed, 165 insertions(+), 112 deletions(-) create mode 100644 include/linux/netfilter/xt_DSCP.h delete mode 100644 net/ipv4/netfilter/ipt_DSCP.c create mode 100644 net/netfilter/xt_DSCP.c diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h new file mode 100644 index 000000000000..3c7c963997bd --- /dev/null +++ b/include/linux/netfilter/xt_DSCP.h @@ -0,0 +1,20 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field + * + * (C) 2002 Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * This software is distributed under GNU GPL v2, 1991 + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp +*/ +#ifndef _XT_DSCP_TARGET_H +#define _XT_DSCP_TARGET_H +#include + +/* target info */ +struct xt_DSCP_info { + u_int8_t dscp; +}; + +#endif /* _XT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h index b30f510b5bef..3491e524d5ea 100644 --- a/include/linux/netfilter_ipv4/ipt_DSCP.h +++ b/include/linux/netfilter_ipv4/ipt_DSCP.h @@ -11,10 +11,8 @@ #ifndef _IPT_DSCP_TARGET_H #define _IPT_DSCP_TARGET_H #include +#include -/* target info */ -struct ipt_DSCP_info { - u_int8_t dscp; -}; +#define ipt_DSCP_info xt_DSCP_info #endif /* _IPT_DSCP_TARGET_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d88d71d1ce0d..a55b8ff70ded 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -557,17 +557,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_DSCP - tristate "DSCP target support" - depends on IP_NF_MANGLE - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_TTL tristate 'TTL target support' depends on IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b946b0f3ea9d..09aaed1a8063 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -67,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index c8e971288dfe..000000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null @@ -1,96 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("iptables DSCP modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo, - void *userinfo) -{ - const struct ipt_DSCP_info *dinfo = targinfo; - u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - - if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) - | sh_dscp; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^ 0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; - - if ((dscp > IPT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - return 1; -} - -static struct ipt_target ipt_dscp_reg = { - .name = "DSCP", - .target = target, - .targetsize = sizeof(struct ipt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_target(&ipt_dscp_reg); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_target(&ipt_dscp_reg); -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f781405f5d65..0a28d2c5c44f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK . The module will be called ipt_CONNMARK.o. If unsure, say `N'. +config NETFILTER_XT_TARGET_DSCP + tristate '"DSCP" target support' + depends on NETFILTER_XTABLES + depends on IP_NF_MANGLE || IP6_NF_MANGLE + help + This option adds a `DSCP' target, which allows you to manipulate + the IPv4/IPv6 header DSCP field (differentiated services codepoint). + + The DSCP field can have any value between 0x0 and 0x3f inclusive. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0b8a70c1df46..a74be492fd0a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c new file mode 100644 index 000000000000..79df8165cd79 --- /dev/null +++ b/net/netfilter/xt_DSCP.c @@ -0,0 +1,130 @@ +/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 + * + * (C) 2002 by Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp +*/ + +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("x_tables DSCP modification module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_DSCP"); +MODULE_ALIAS("ip6t_DSCP"); + +static unsigned int target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, + void *userinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct iphdr))) + return NF_DROP; + + ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + + } + return XT_CONTINUE; +} + +static unsigned int target6(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, + void *userinfo) +{ + const struct xt_DSCP_info *dinfo = targinfo; + u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; + + if (dscp != dinfo->dscp) { + if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) + return NF_DROP; + + ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + } + return XT_CONTINUE; +} + +static int checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; + + if ((dscp > XT_DSCP_MAX)) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + return 0; + } + return 1; +} + +static struct xt_target xt_dscp_reg = { + .name = "DSCP", + .target = target, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_target xt_dscp6_reg = { + .name = "DSCP", + .target = target6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static int __init xt_dscp_target_init(void) +{ + int ret; + ret = xt_register_target(&xt_dscp_reg); + if (ret) + return ret; + + ret = xt_register_target(&xt_dscp6_reg); + if (ret) + xt_unregister_target(&xt_dscp_reg); + + return ret; +} + +static void __exit xt_dscp_target_fini(void) +{ + xt_unregister_target(&xt_dscp_reg); + xt_unregister_target(&xt_dscp6_reg); +} + +module_init(xt_dscp_target_init); +module_exit(xt_dscp_target_fini); From b93ff78317c0b8f42830e2bb13dd8df596232528 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Tue, 22 Aug 2006 00:30:55 -0700 Subject: [PATCH 0505/1063] [NETFILTER]: ipt_recent: add module parameter for changing ownership of /proc/net/ipt_recent/* Signed-off-by: Daniel De Graaf Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_recent.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 61a2139f9cfd..682c0946201e 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; static unsigned int ip_list_hash_size = 0; static unsigned int ip_list_perms = 0644; +static unsigned int ip_list_uid = 0; +static unsigned int ip_list_gid = 0; module_param(ip_list_tot, uint, 0400); module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); +module_param(ip_list_uid, uint, 0400); +module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); struct recent_entry { @@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip, goto out; } t->proc->proc_fops = &recent_fops; + t->proc->uid = ip_list_uid; + t->proc->gid = ip_list_gid; t->proc->data = t; #endif spin_lock_bh(&recent_lock); From 2521c12cf1a29f6c380b13ca32a38175f6beed08 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:31:24 -0700 Subject: [PATCH 0506/1063] [NETFILTER]: conntrack: introduce connection mark event This patch introduces the mark event. ctnetlink can use this to know if the mark needs to be dumped. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ net/netfilter/xt_CONNMARK.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index d2e4bd7a7a14..9e0dae07861e 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -125,6 +125,10 @@ enum ip_conntrack_events /* Counter highest bit has been set */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), + + /* Mark is set */ + IPCT_MARK_BIT = 12, + IPCT_MARK = (1 << IPCT_MARK_BIT), }; enum ip_conntrack_expect_events { diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 60c375d36f01..784482b74e58 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -52,13 +52,25 @@ target(struct sk_buff **pskb, switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; - if (newmark != *ctmark) + if (newmark != *ctmark) { *ctmark = newmark; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } break; case XT_CONNMARK_SAVE: newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); - if (*ctmark != newmark) + if (*ctmark != newmark) { *ctmark = newmark; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_event_cache(IPCT_MARK, *pskb); +#else + nf_conntrack_event_cache(IPCT_MARK, *pskb); +#endif + } break; case XT_CONNMARK_RESTORE: nfmark = (*pskb)->nfmark; From b9a37e0c81c498be2db9f52063c53e55d76c815e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:31:49 -0700 Subject: [PATCH 0507/1063] [NETFILTER]: ctnetlink: dump connection mark ctnetlink dumps the mark iif the event mark happened Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 4 ++++ net/netfilter/nf_conntrack_netlink.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 0d4cc92391fa..38708e6cfae7 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -385,6 +385,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; + if (events & IPCT_MARK + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6527d4e048d8..aa0148f418a9 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -395,6 +395,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; + if (events & IPCT_MARK + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; From b3a27bfba51d445784eb0cd6451b73a73fb69cf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:32:05 -0700 Subject: [PATCH 0508/1063] [NETFILTER]: ctnetlink: check for listeners before sending expectation events This patch uses nfnetlink_has_listeners to check for listeners in userspace. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 3 +++ net/netfilter/nf_conntrack_netlink.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 38708e6cfae7..ef84f43f0734 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1257,6 +1257,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; + if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + return NOTIFY_DONE; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index aa0148f418a9..dc4f081dca91 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1278,6 +1278,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; + if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + return NOTIFY_DONE; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; From 1a31526baeed30aaa70503cee0ab281f78cae0d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Aug 2006 00:32:23 -0700 Subject: [PATCH 0509/1063] [NETFILTER]: ctnetlink: remove impossible events tests for updates IPCT_HELPER and IPCT_NATINFO bits are never set on updates. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 6 +----- net/netfilter/nf_conntrack_netlink.c | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index ef84f43f0734..a20b0e385f1b 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { + } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dc4f081dca91..8cd85cfd9a02 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -339,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { + } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else From 1158ba27bec6d1a20999099a938908cf85f47640 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:32:47 -0700 Subject: [PATCH 0510/1063] [NETFILTER]: nfnetlink_queue: fix typo in error message Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nfnetlink_queue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index eddfbe4441a2..8eb2473d83e1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, queue->queue_dropped++; status = -ENOSPC; if (net_ratelimit()) - printk(KERN_WARNING "ip_queue: full at %d entries, " + printk(KERN_WARNING "nf_queue: full at %d entries, " "dropping packets(s). Dropped: %d\n", queue->queue_total, queue->queue_dropped); goto err_out_free_nskb; @@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) diff, GFP_ATOMIC); if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " + printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } From da878c8e5aae3eeceeee7af8d52633d7bc125edf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:33:09 -0700 Subject: [PATCH 0511/1063] [NETFILTER]: replace open coded checksum updates Replace open coded checksum update by nf_csum_update calls and clean up the surrounding code a bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_ECN.c | 22 +++++++++------------- net/ipv4/netfilter/ipt_TOS.c | 22 ++++++++-------------- net/ipv4/netfilter/ipt_TTL.c | 9 +++------ 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 35916c74fe4e..7e30e6d2b5da 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -27,22 +27,18 @@ MODULE_DESCRIPTION("iptables ECN modification module"); static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { - if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) - != (einfo->ip_ect & IPT_ECN_IP_MASK)) { - u_int16_t diffs[2]; + struct iphdr *iph = (*pskb)->nh.iph; + u_int16_t oldtos; + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; - (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, + iph->check); } return 1; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 1c7a5ca399b3..52e9d705d48e 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -30,23 +30,17 @@ target(struct sk_buff **pskb, void *userinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; + struct iphdr *iph = (*pskb)->nh.iph; + u_int16_t oldtos; - if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { - u_int16_t diffs[2]; - + if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos - = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) - | tosinfo->tos; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; + iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, + iph->check); } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index f48892ae0be5..2afb2a8aa8c5 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -27,7 +27,6 @@ ipt_ttl_target(struct sk_buff **pskb, { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; - u_int16_t diffs[2]; int new_ttl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->check = nf_csum_update((iph->ttl << 8) ^ 0xFFFF, + new_ttl << 8, + iph->check); iph->ttl = new_ttl; - diffs[1] = htons(((unsigned)iph->ttl) << 8); - iph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - iph->check^0xFFFF)); } return IPT_CONTINUE; From 90528e6fe92ee1a353d6a639930e7d70d85b5c85 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:33:26 -0700 Subject: [PATCH 0512/1063] [NETFILTER]: xt_CONNMARK: use tabs for indentation Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_CONNMARK.c | 57 +++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 784482b74e58..19989a915433 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -49,36 +49,37 @@ target(struct sk_buff **pskb, u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); if (ctmark) { - switch(markinfo->mode) { - case XT_CONNMARK_SET: - newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; - if (newmark != *ctmark) { - *ctmark = newmark; + switch(markinfo->mode) { + case XT_CONNMARK_SET: + newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; + if (newmark != *ctmark) { + *ctmark = newmark; #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - ip_conntrack_event_cache(IPCT_MARK, *pskb); + ip_conntrack_event_cache(IPCT_MARK, *pskb); #else - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, *pskb); #endif } - break; - case XT_CONNMARK_SAVE: - newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); - if (*ctmark != newmark) { - *ctmark = newmark; + break; + case XT_CONNMARK_SAVE: + newmark = (*ctmark & ~markinfo->mask) | + ((*pskb)->nfmark & markinfo->mask); + if (*ctmark != newmark) { + *ctmark = newmark; #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - ip_conntrack_event_cache(IPCT_MARK, *pskb); + ip_conntrack_event_cache(IPCT_MARK, *pskb); #else - nf_conntrack_event_cache(IPCT_MARK, *pskb); + nf_conntrack_event_cache(IPCT_MARK, *pskb); #endif + } + break; + case XT_CONNMARK_RESTORE: + nfmark = (*pskb)->nfmark; + diff = (*ctmark ^ nfmark) & markinfo->mask; + if (diff != 0) + (*pskb)->nfmark = nfmark ^ diff; + break; } - break; - case XT_CONNMARK_RESTORE: - nfmark = (*pskb)->nfmark; - diff = (*ctmark ^ nfmark) & markinfo->mask; - if (diff != 0) - (*pskb)->nfmark = nfmark ^ diff; - break; - } } return XT_CONTINUE; @@ -95,17 +96,17 @@ checkentry(const char *tablename, struct xt_connmark_target_info *matchinfo = targinfo; if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "CONNMARK: restore can only be " + "called from \"mangle\" table, not \"%s\"\n", + tablename); + return 0; + } } - if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return 0; } - return 1; } From 52d9c42ef2563d2c420eb23b96bf5a4cae9e167b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:33:45 -0700 Subject: [PATCH 0513/1063] [NETFILTER]: x_tables: add helpers for mass match/target registration Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 5 +++ net/netfilter/x_tables.c | 60 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 48cc32d83f77..9a9912430e3a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -290,8 +290,13 @@ struct xt_table_info extern int xt_register_target(struct xt_target *target); extern void xt_unregister_target(struct xt_target *target); +extern int xt_register_targets(struct xt_target *target, unsigned int n); +extern void xt_unregister_targets(struct xt_target *target, unsigned int n); + extern int xt_register_match(struct xt_match *target); extern void xt_unregister_match(struct xt_match *target); +extern int xt_register_matches(struct xt_match *match, unsigned int n); +extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 174e8f970095..8037ba63d587 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -86,6 +86,36 @@ xt_unregister_target(struct xt_target *target) } EXPORT_SYMBOL(xt_unregister_target); +int +xt_register_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_target(&target[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_targets(target, i); + return err; +} +EXPORT_SYMBOL(xt_register_targets); + +void +xt_unregister_targets(struct xt_target *target, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_target(&target[i]); +} +EXPORT_SYMBOL(xt_unregister_targets); + int xt_register_match(struct xt_match *match) { @@ -113,6 +143,36 @@ xt_unregister_match(struct xt_match *match) } EXPORT_SYMBOL(xt_unregister_match); +int +xt_register_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = xt_register_match(&match[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + xt_unregister_matches(match, i); + return err; +} +EXPORT_SYMBOL(xt_register_matches); + +void +xt_unregister_matches(struct xt_match *match, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + xt_unregister_match(&match[i]); +} +EXPORT_SYMBOL(xt_unregister_matches); + /* * These are weird, but module loading must not be done with mutex From 4470bbc749e5551cce914529309456f631e25120 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:34:04 -0700 Subject: [PATCH 0514/1063] [NETFILTER]: x_tables: make use of mass registation helpers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_REJECT.c | 4 +- net/netfilter/xt_CLASSIFY.c | 60 ++++++++--------- net/netfilter/xt_CONNMARK.c | 51 ++++++-------- net/netfilter/xt_CONNSECMARK.c | 57 +++++++--------- net/netfilter/xt_DSCP.c | 51 ++++++-------- net/netfilter/xt_MARK.c | 84 +++++++++-------------- net/netfilter/xt_NFQUEUE.c | 68 +++++++------------ net/netfilter/xt_NOTRACK.c | 47 +++++-------- net/netfilter/xt_SECMARK.c | 55 ++++++--------- net/netfilter/xt_comment.c | 45 +++++-------- net/netfilter/xt_connbytes.c | 47 ++++++------- net/netfilter/xt_connmark.c | 53 ++++++--------- net/netfilter/xt_conntrack.c | 5 +- net/netfilter/xt_dccp.c | 51 ++++++-------- net/netfilter/xt_dscp.c | 47 ++++++------- net/netfilter/xt_esp.c | 51 ++++++-------- net/netfilter/xt_helper.c | 52 ++++++--------- net/netfilter/xt_length.c | 43 +++++------- net/netfilter/xt_limit.c | 47 ++++++------- net/netfilter/xt_mac.c | 52 +++++++-------- net/netfilter/xt_mark.c | 47 ++++++------- net/netfilter/xt_multiport.c | 111 ++++++++++++------------------- net/netfilter/xt_physdev.c | 49 ++++++-------- net/netfilter/xt_pkttype.c | 44 +++++------- net/netfilter/xt_policy.c | 51 ++++++-------- net/netfilter/xt_quota.c | 51 ++++++-------- net/netfilter/xt_sctp.c | 51 ++++++-------- net/netfilter/xt_state.c | 53 ++++++--------- net/netfilter/xt_statistic.c | 53 ++++++--------- net/netfilter/xt_string.c | 50 ++++++-------- net/netfilter/xt_tcpmss.c | 49 ++++++-------- net/netfilter/xt_tcpudp.c | 107 +++++++++++------------------ 32 files changed, 679 insertions(+), 1007 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index c4eba1aeb323..7929ff402166 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -257,9 +257,7 @@ static struct ip6t_target ip6t_reject_reg = { static int __init ip6t_reject_init(void) { - if (ip6t_register_target(&ip6t_reject_reg)) - return -EINVAL; - return 0; + return ip6t_register_target(&ip6t_reject_reg); } static void __exit ip6t_reject_fini(void) diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index e54e57730012..1f92edd05933 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -40,47 +40,41 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target classify_reg = { - .name = "CLASSIFY", - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .family = AF_INET, - .me = THIS_MODULE, +static struct xt_target xt_classify_target[] = { + { + .family = AF_INET, + .name = "CLASSIFY", + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "CLASSIFY", + .family = AF_INET6, + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .me = THIS_MODULE, + }, }; -static struct xt_target classify6_reg = { - .name = "CLASSIFY", - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .family = AF_INET6, - .me = THIS_MODULE, -}; - static int __init xt_classify_init(void) { - int ret; - - ret = xt_register_target(&classify_reg); - if (ret) - return ret; - - ret = xt_register_target(&classify6_reg); - if (ret) - xt_unregister_target(&classify_reg); - - return ret; + return xt_register_targets(xt_classify_target, + ARRAY_SIZE(xt_classify_target)); } static void __exit xt_classify_fini(void) { - xt_unregister_target(&classify_reg); - xt_unregister_target(&classify6_reg); + xt_unregister_targets(xt_classify_target, + ARRAY_SIZE(xt_classify_target)); } module_init(xt_classify_init); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 19989a915433..e577356b5c71 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -110,45 +110,36 @@ checkentry(const char *tablename, return 1; } -static struct xt_target connmark_reg = { - .name = "CONNMARK", - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_target connmark6_reg = { - .name = "CONNMARK", - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_target xt_connmark_target[] = { + { + .name = "CONNMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), + .me = THIS_MODULE + }, + { + .name = "CONNMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), + .me = THIS_MODULE + }, }; static int __init xt_connmark_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_target(&connmark_reg); - if (ret) - return ret; - - ret = xt_register_target(&connmark6_reg); - if (ret) - xt_unregister_target(&connmark_reg); - - return ret; + return xt_register_targets(xt_connmark_target, + ARRAY_SIZE(xt_connmark_target)); } static void __exit xt_connmark_fini(void) { - xt_unregister_target(&connmark_reg); - xt_unregister_target(&connmark6_reg); + xt_unregister_targets(xt_connmark_target, + ARRAY_SIZE(xt_connmark_target)); } module_init(xt_connmark_init); diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 8c011e020769..48f7fc3c85cd 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target ipt_connsecmark_reg = { - .name = "CONNSECMARK", - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, -}; - -static struct xt_target ip6t_connsecmark_reg = { - .name = "CONNSECMARK", - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static struct xt_target xt_connsecmark_target[] = { + { + .name = "CONNSECMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "CONNSECMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_connsecmark_init(void) { - int err; - need_conntrack(); - - err = xt_register_target(&ipt_connsecmark_reg); - if (err) - return err; - - err = xt_register_target(&ip6t_connsecmark_reg); - if (err) - xt_unregister_target(&ipt_connsecmark_reg); - - return err; + return xt_register_targets(xt_connsecmark_targets, + ARRAY_SIZE(xt_connsecmark_targets)); } static void __exit xt_connsecmark_fini(void) { - xt_unregister_target(&ip6t_connsecmark_reg); - xt_unregister_target(&ipt_connsecmark_reg); + xt_unregister_targets(xt_connsecmark_targets, + ARRAY_SIZE(xt_connsecmark_targets)); } module_init(xt_connsecmark_init); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 79df8165cd79..a1cd9723644f 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -86,44 +86,35 @@ static int checkentry(const char *tablename, return 1; } -static struct xt_target xt_dscp_reg = { - .name = "DSCP", - .target = target, - .targetsize = sizeof(struct xt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_target xt_dscp6_reg = { - .name = "DSCP", - .target = target6, - .targetsize = sizeof(struct xt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_target xt_dscp_target[] = { + { + .name = "DSCP", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "DSCP", + .family = AF_INET6, + .checkentry = checkentry, + .target = target6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_dscp_target_init(void) { - int ret; - ret = xt_register_target(&xt_dscp_reg); - if (ret) - return ret; - - ret = xt_register_target(&xt_dscp6_reg); - if (ret) - xt_unregister_target(&xt_dscp_reg); - - return ret; + return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); } static void __exit xt_dscp_target_fini(void) { - xt_unregister_target(&xt_dscp_reg); - xt_unregister_target(&xt_dscp6_reg); + xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); } module_init(xt_dscp_target_init); diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index ee9c34edc76c..0a6127219467 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -112,65 +112,47 @@ checkentry_v1(const char *tablename, return 1; } -static struct xt_target ipt_mark_reg_v0 = { - .name = "MARK", - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .checkentry = checkentry_v0, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, -}; - -static struct xt_target ipt_mark_reg_v1 = { - .name = "MARK", - .target = target_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), - .table = "mangle", - .checkentry = checkentry_v1, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 1, -}; - -static struct xt_target ip6t_mark_reg_v0 = { - .name = "MARK", - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .checkentry = checkentry_v0, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static struct xt_target xt_mark_target[] = { + { + .name = "MARK", + .family = AF_INET, + .revision = 0, + .checkentry = checkentry_v0, + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "MARK", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .target = target_v1, + .targetsize = sizeof(struct xt_mark_target_info_v1), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "MARK", + .family = AF_INET6, + .revision = 0, + .checkentry = checkentry_v0, + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_mark_init(void) { - int err; - - err = xt_register_target(&ipt_mark_reg_v0); - if (err) - return err; - - err = xt_register_target(&ipt_mark_reg_v1); - if (err) - xt_unregister_target(&ipt_mark_reg_v0); - - err = xt_register_target(&ip6t_mark_reg_v0); - if (err) { - xt_unregister_target(&ipt_mark_reg_v0); - xt_unregister_target(&ipt_mark_reg_v1); - } - - return err; + return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); } static void __exit xt_mark_fini(void) { - xt_unregister_target(&ipt_mark_reg_v0); - xt_unregister_target(&ipt_mark_reg_v1); - xt_unregister_target(&ip6t_mark_reg_v0); + xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); } module_init(xt_mark_init); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 86ccceb61fdd..7b982283abdb 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -37,57 +37,39 @@ target(struct sk_buff **pskb, return NF_QUEUE_NR(tinfo->queuenum); } -static struct xt_target ipt_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_target ip6t_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = AF_INET6, - .me = THIS_MODULE, -}; - -static struct xt_target arpt_NFQ_reg = { - .name = "NFQUEUE", - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .family = NF_ARP, - .me = THIS_MODULE, +static struct xt_target xt_nfqueue_target[] = { + { + .name = "NFQUEUE", + .family = AF_INET, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, + { + .name = "NFQUEUE", + .family = AF_INET6, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, + { + .name = "NFQUEUE", + .family = NF_ARP, + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .me = THIS_MODULE, + }, }; static int __init xt_nfqueue_init(void) { - int ret; - ret = xt_register_target(&ipt_NFQ_reg); - if (ret) - return ret; - ret = xt_register_target(&ip6t_NFQ_reg); - if (ret) - goto out_ip; - ret = xt_register_target(&arpt_NFQ_reg); - if (ret) - goto out_ip6; - - return ret; -out_ip6: - xt_unregister_target(&ip6t_NFQ_reg); -out_ip: - xt_unregister_target(&ipt_NFQ_reg); - - return ret; + return xt_register_targets(xt_nfqueue_target, + ARRAY_SIZE(xt_nfqueue_target)); } static void __exit xt_nfqueue_fini(void) { - xt_unregister_target(&arpt_NFQ_reg); - xt_unregister_target(&ip6t_NFQ_reg); - xt_unregister_target(&ipt_NFQ_reg); + xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); } module_init(xt_nfqueue_init); diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 98f4b5363ce8..cab881d4424c 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -34,43 +34,32 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target notrack_reg = { - .name = "NOTRACK", - .target = target, - .targetsize = 0, - .table = "raw", - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_target notrack6_reg = { - .name = "NOTRACK", - .target = target, - .targetsize = 0, - .table = "raw", - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_target xt_notrack_target[] = { + { + .name = "NOTRACK", + .family = AF_INET, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, + { + .name = "NOTRACK", + .family = AF_INET6, + .target = target, + .table = "raw", + .me = THIS_MODULE, + }, }; static int __init xt_notrack_init(void) { - int ret; - - ret = xt_register_target(¬rack_reg); - if (ret) - return ret; - - ret = xt_register_target(¬rack6_reg); - if (ret) - xt_unregister_target(¬rack_reg); - - return ret; + return xt_register_targets(xt_notrack_target, + ARRAY_SIZE(xt_notrack_target)); } static void __exit xt_notrack_fini(void) { - xt_unregister_target(¬rack6_reg); - xt_unregister_target(¬rack_reg); + xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); } module_init(xt_notrack_init); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index de9537ad9a7c..4300988786c9 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -111,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target ipt_secmark_reg = { - .name = "SECMARK", - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET, - .revision = 0, -}; - -static struct xt_target ip6t_secmark_reg = { - .name = "SECMARK", - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, - .family = AF_INET6, - .revision = 0, +static struct xt_target xt_secmark_target = { + { + .name = "SECMARK", + .family = AF_INET, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, + { + .name = "SECMARK", + .family = AF_INET6, + .checkentry = checkentry, + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .me = THIS_MODULE, + }, }; static int __init xt_secmark_init(void) { - int err; - - err = xt_register_target(&ipt_secmark_reg); - if (err) - return err; - - err = xt_register_target(&ip6t_secmark_reg); - if (err) - xt_unregister_target(&ipt_secmark_reg); - - return err; + return xt_register_targets(xt_secmark_target, + ARRAY_SIZE(xt_secmark_target)); } static void __exit xt_secmark_fini(void) { - xt_unregister_target(&ip6t_secmark_reg); - xt_unregister_target(&ipt_secmark_reg); + xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); } module_init(xt_secmark_init); diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 197609cb06d7..7db492d65220 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -29,41 +29,32 @@ match(const struct sk_buff *skb, return 1; } -static struct xt_match comment_match = { - .name = "comment", - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_match comment6_match = { - .name = "comment", - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_comment_match[] = { + { + .name = "comment", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE + }, + { + .name = "comment", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE + }, }; static int __init xt_comment_init(void) { - int ret; - - ret = xt_register_match(&comment_match); - if (ret) - return ret; - - ret = xt_register_match(&comment6_match); - if (ret) - xt_unregister_match(&comment_match); - - return ret; + return xt_register_matches(xt_comment_match, + ARRAY_SIZE(xt_comment_match)); } static void __exit xt_comment_fini(void) { - xt_unregister_match(&comment_match); - xt_unregister_match(&comment6_match); + xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); } module_init(xt_comment_init); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 1396fe2d07c1..2d49948d3c38 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -143,40 +143,35 @@ static int check(const char *tablename, return 1; } -static struct xt_match connbytes_match = { - .name = "connbytes", - .match = match, - .checkentry = check, - .matchsize = sizeof(struct xt_connbytes_info), - .family = AF_INET, - .me = THIS_MODULE -}; -static struct xt_match connbytes6_match = { - .name = "connbytes", - .match = match, - .checkentry = check, - .matchsize = sizeof(struct xt_connbytes_info), - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_connbytes_match = { + { + .name = "connbytes", + .family = AF_INET, + .checkentry = check, + .match = match, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE + }, + { + .name = "connbytes", + .family = AF_INET6, + .checkentry = check, + .match = match, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE + }, }; static int __init xt_connbytes_init(void) { - int ret; - ret = xt_register_match(&connbytes_match); - if (ret) - return ret; - - ret = xt_register_match(&connbytes6_match); - if (ret) - xt_unregister_match(&connbytes_match); - return ret; + return xt_register_matches(xt_connbytes_match, + ARRAY_SIZE(xt_connbytes_match)); } static void __exit xt_connbytes_fini(void) { - xt_unregister_match(&connbytes_match); - xt_unregister_match(&connbytes6_match); + xt_unregister_matches(xt_connbytes_match, + ARRAY_SIZE(xt_connbytes_match)); } module_init(xt_connbytes_init); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 56324c8aff0a..a97b2d455b79 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -82,46 +82,37 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) #endif } -static struct xt_match connmark_match = { - .name = "connmark", - .match = match, - .matchsize = sizeof(struct xt_connmark_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_match connmark6_match = { - .name = "connmark", - .match = match, - .matchsize = sizeof(struct xt_connmark_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_connmark_match[] = { + { + .name = "connmark", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_connmark_info), + .me = THIS_MODULE + }, + { + .name = "connmark", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_connmark_info), + .me = THIS_MODULE + }, }; static int __init xt_connmark_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_match(&connmark_match); - if (ret) - return ret; - - ret = xt_register_match(&connmark6_match); - if (ret) - xt_unregister_match(&connmark_match); - return ret; + return xt_register_matches(xt_connmark_match, + ARRAY_SIZE(xt_connmark_match)); } static void __exit xt_connmark_fini(void) { - xt_unregister_match(&connmark6_match); - xt_unregister_match(&connmark_match); + xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); } module_init(xt_connmark_init); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 145489a4c3f2..1540885174ee 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -241,11 +241,8 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { - int ret; need_conntrack(); - ret = xt_register_match(&conntrack_match); - - return ret; + return xt_register_match(&conntrack_match); } static void __exit xt_conntrack_fini(void) diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 2e2f825dad4c..5ca6f5288f46 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -141,27 +141,26 @@ checkentry(const char *tablename, && !(info->invflags & ~info->flags); } -static struct xt_match dccp_match = -{ - .name = "dccp", - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, +static struct xt_match xt_dccp_match[] = { + { + .name = "dccp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .me = THIS_MODULE, + }, + { + .name = "dccp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .me = THIS_MODULE, + }, }; -static struct xt_match dccp6_match = -{ - .name = "dccp", - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, -}; - static int __init xt_dccp_init(void) { @@ -173,27 +172,19 @@ static int __init xt_dccp_init(void) dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); if (!dccp_optbuf) return -ENOMEM; - ret = xt_register_match(&dccp_match); + ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); if (ret) goto out_kfree; - ret = xt_register_match(&dccp6_match); - if (ret) - goto out_unreg; - return ret; -out_unreg: - xt_unregister_match(&dccp_match); out_kfree: kfree(dccp_optbuf); - return ret; } static void __exit xt_dccp_fini(void) { - xt_unregister_match(&dccp6_match); - xt_unregister_match(&dccp_match); + xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); kfree(dccp_optbuf); } diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 82e250d1f007..d84075c30159 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -71,42 +71,33 @@ static int checkentry(const char *tablename, return 1; } -static struct xt_match dscp_match = { - .name = "dscp", - .match = match, - .checkentry = checkentry, - .matchsize = sizeof(struct xt_dscp_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match dscp6_match = { - .name = "dscp", - .match = match6, - .checkentry = checkentry, - .matchsize = sizeof(struct xt_dscp_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_dscp_match[] = { + { + .name = "dscp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_dscp_info), + .me = THIS_MODULE, + }, + { + .name = "dscp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match6, + .matchsize = sizeof(struct xt_dscp_info), + .me = THIS_MODULE, + }, }; static int __init xt_dscp_match_init(void) { - int ret; - ret = xt_register_match(&dscp_match); - if (ret) - return ret; - - ret = xt_register_match(&dscp6_match); - if (ret) - xt_unregister_match(&dscp_match); - - return ret; + return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); } static void __exit xt_dscp_match_fini(void) { - xt_unregister_match(&dscp_match); - xt_unregister_match(&dscp6_match); + xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); } module_init(xt_dscp_match_init); diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 9dad6281e0c1..7b19bc9ea205 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -92,44 +92,35 @@ checkentry(const char *tablename, return 1; } -static struct xt_match esp_match = { - .name = "esp", - .family = AF_INET, - .proto = IPPROTO_ESP, - .match = &match, - .matchsize = sizeof(struct xt_esp), - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match esp6_match = { - .name = "esp", - .family = AF_INET6, - .proto = IPPROTO_ESP, - .match = &match, - .matchsize = sizeof(struct xt_esp), - .checkentry = &checkentry, - .me = THIS_MODULE, +static struct xt_match xt_esp_match[] = { + { + .name = "esp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_esp), + .proto = IPPROTO_ESP, + .me = THIS_MODULE, + }, + { + .name = "esp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_esp), + .proto = IPPROTO_ESP, + .me = THIS_MODULE, + }, }; static int __init xt_esp_init(void) { - int ret; - ret = xt_register_match(&esp_match); - if (ret) - return ret; - - ret = xt_register_match(&esp6_match); - if (ret) - xt_unregister_match(&esp_match); - - return ret; + return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); } static void __exit xt_esp_cleanup(void) { - xt_unregister_match(&esp_match); - xt_unregister_match(&esp6_match); + xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); } module_init(xt_esp_init); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 799c2a43e3b9..db453a7a154e 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -163,45 +163,37 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) #endif } -static struct xt_match helper_match = { - .name = "helper", - .match = match, - .matchsize = sizeof(struct xt_helper_info), - .checkentry = check, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE, -}; -static struct xt_match helper6_match = { - .name = "helper", - .match = match, - .matchsize = sizeof(struct xt_helper_info), - .checkentry = check, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_helper_match[] = { + { + .name = "helper", + .family = AF_INET, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, + }, + { + .name = "helper", + .family = AF_INET6, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, + }, }; static int __init xt_helper_init(void) { - int ret; need_conntrack(); - - ret = xt_register_match(&helper_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&helper6_match); - if (ret < 0) - xt_unregister_match(&helper_match); - - return ret; + return xt_register_matches(xt_helper_match, + ARRAY_SIZE(xt_helper_match)); } static void __exit xt_helper_fini(void) { - xt_unregister_match(&helper_match); - xt_unregister_match(&helper6_match); + xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); } module_init(xt_helper_init); diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 109132c9a146..67fd30d9f303 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -52,39 +52,32 @@ match6(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static struct xt_match length_match = { - .name = "length", - .match = match, - .matchsize = sizeof(struct xt_length_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match length6_match = { - .name = "length", - .match = match6, - .matchsize = sizeof(struct xt_length_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_length_match[] = { + { + .name = "length", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_length_info), + .me = THIS_MODULE, + }, + { + .name = "length", + .family = AF_INET6, + .match = match6, + .matchsize = sizeof(struct xt_length_info), + .me = THIS_MODULE, + }, }; static int __init xt_length_init(void) { - int ret; - ret = xt_register_match(&length_match); - if (ret) - return ret; - ret = xt_register_match(&length6_match); - if (ret) - xt_unregister_match(&length_match); - - return ret; + return xt_register_matches(xt_length_match, + ARRAY_SIZE(xt_length_match)); } static void __exit xt_length_fini(void) { - xt_unregister_match(&length_match); - xt_unregister_match(&length6_match); + xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); } module_init(xt_length_init); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index ce7fdb7e4e07..e8d5e7ac695a 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -136,42 +136,33 @@ ipt_limit_checkentry(const char *tablename, return 1; } -static struct xt_match ipt_limit_reg = { - .name = "limit", - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), - .checkentry = ipt_limit_checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; -static struct xt_match limit6_reg = { - .name = "limit", - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), - .checkentry = ipt_limit_checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_limit_match[] = { + { + .name = "limit", + .family = AF_INET, + .checkentry = ipt_limit_checkentry, + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), + .me = THIS_MODULE, + }, + { + .name = "limit", + .family = AF_INET6, + .checkentry = ipt_limit_checkentry, + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), + .me = THIS_MODULE, + }, }; static int __init xt_limit_init(void) { - int ret; - - ret = xt_register_match(&ipt_limit_reg); - if (ret) - return ret; - - ret = xt_register_match(&limit6_reg); - if (ret) - xt_unregister_match(&ipt_limit_reg); - - return ret; + return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); } static void __exit xt_limit_fini(void) { - xt_unregister_match(&ipt_limit_reg); - xt_unregister_match(&limit6_reg); + xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); } module_init(xt_limit_init); diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 356290ffe386..425fc21e31f5 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -43,43 +43,37 @@ match(const struct sk_buff *skb, ^ info->invert)); } -static struct xt_match mac_match = { - .name = "mac", - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .family = AF_INET, - .me = THIS_MODULE, -}; -static struct xt_match mac6_match = { - .name = "mac", - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_mac_match[] = { + { + .name = "mac", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | + (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .me = THIS_MODULE, + }, + { + .name = "mac", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | + (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .me = THIS_MODULE, + }, }; static int __init xt_mac_init(void) { - int ret; - ret = xt_register_match(&mac_match); - if (ret) - return ret; - - ret = xt_register_match(&mac6_match); - if (ret) - xt_unregister_match(&mac_match); - - return ret; + return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); } static void __exit xt_mac_fini(void) { - xt_unregister_match(&mac_match); - xt_unregister_match(&mac6_match); + xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); } module_init(xt_mac_init); diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 876bc5797738..39f9b079f5d4 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -51,42 +51,33 @@ checkentry(const char *tablename, return 1; } -static struct xt_match mark_match = { - .name = "mark", - .match = match, - .matchsize = sizeof(struct xt_mark_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match mark6_match = { - .name = "mark", - .match = match, - .matchsize = sizeof(struct xt_mark_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_mark_match[] = { + { + .name = "mark", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_mark_info), + .me = THIS_MODULE, + }, + { + .name = "mark", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_mark_info), + .me = THIS_MODULE, + }, }; static int __init xt_mark_init(void) { - int ret; - ret = xt_register_match(&mark_match); - if (ret) - return ret; - - ret = xt_register_match(&mark6_match); - if (ret) - xt_unregister_match(&mark_match); - - return ret; + return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); } static void __exit xt_mark_fini(void) { - xt_unregister_match(&mark_match); - xt_unregister_match(&mark6_match); + xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); } module_init(xt_mark_init); diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 1ff0a25396e7..e74f9bb98b3c 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -231,84 +231,55 @@ checkentry6_v1(const char *tablename, multiinfo->count); } -static struct xt_match multiport_match = { - .name = "multiport", - .revision = 0, - .matchsize = sizeof(struct xt_multiport), - .match = &match, - .checkentry = &checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match multiport_match_v1 = { - .name = "multiport", - .revision = 1, - .matchsize = sizeof(struct xt_multiport_v1), - .match = &match_v1, - .checkentry = &checkentry_v1, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match multiport6_match = { - .name = "multiport", - .revision = 0, - .matchsize = sizeof(struct xt_multiport), - .match = &match, - .checkentry = &checkentry6, - .family = AF_INET6, - .me = THIS_MODULE, -}; - -static struct xt_match multiport6_match_v1 = { - .name = "multiport", - .revision = 1, - .matchsize = sizeof(struct xt_multiport_v1), - .match = &match_v1, - .checkentry = &checkentry6_v1, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_multiport_match[] = { + { + .name = "multiport", + .family = AF_INET, + .revision = 0, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_multiport), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .match = match_v1, + .matchsize = sizeof(struct xt_multiport_v1), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET6, + .revision = 0, + .checkentry = checkentry6, + .match = match, + .matchsize = sizeof(struct xt_multiport), + .me = THIS_MODULE, + }, + { + .name = "multiport", + .family = AF_INET6, + .revision = 1, + .checkentry = checkentry6_v1, + .match = match_v1, + .matchsize = sizeof(struct xt_multiport_v1), + .me = THIS_MODULE, + }, }; static int __init xt_multiport_init(void) { - int ret; - - ret = xt_register_match(&multiport_match); - if (ret) - goto out; - - ret = xt_register_match(&multiport_match_v1); - if (ret) - goto out_unreg_multi_v0; - - ret = xt_register_match(&multiport6_match); - if (ret) - goto out_unreg_multi_v1; - - ret = xt_register_match(&multiport6_match_v1); - if (ret) - goto out_unreg_multi6_v0; - - return ret; - -out_unreg_multi6_v0: - xt_unregister_match(&multiport6_match); -out_unreg_multi_v1: - xt_unregister_match(&multiport_match_v1); -out_unreg_multi_v0: - xt_unregister_match(&multiport_match); -out: - return ret; + return xt_register_matches(xt_multiport_match, + ARRAY_SIZE(xt_multiport_match)); } static void __exit xt_multiport_fini(void) { - xt_unregister_match(&multiport_match); - xt_unregister_match(&multiport_match_v1); - xt_unregister_match(&multiport6_match); - xt_unregister_match(&multiport6_match_v1); + xt_unregister_matches(xt_multiport_match, + ARRAY_SIZE(xt_multiport_match)); } module_init(xt_multiport_init); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 63a965467465..af3d70f96ecd 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -132,43 +132,34 @@ checkentry(const char *tablename, return 1; } -static struct xt_match physdev_match = { - .name = "physdev", - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match physdev6_match = { - .name = "physdev", - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_physdev_match[] = { + { + .name = "physdev", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, + { + .name = "physdev", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, + }, }; static int __init xt_physdev_init(void) { - int ret; - - ret = xt_register_match(&physdev_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&physdev6_match); - if (ret < 0) - xt_unregister_match(&physdev_match); - - return ret; + return xt_register_matches(xt_physdev_match, + ARRAY_SIZE(xt_physdev_match)); } static void __exit xt_physdev_fini(void) { - xt_unregister_match(&physdev_match); - xt_unregister_match(&physdev6_match); + xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); } module_init(xt_physdev_init); diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index d2f5320a80bf..16e7b0804287 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -43,40 +43,32 @@ static int match(const struct sk_buff *skb, return (type == info->pkttype) ^ info->invert; } -static struct xt_match pkttype_match = { - .name = "pkttype", - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match pkttype6_match = { - .name = "pkttype", - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_pkttype_match[] = { + { + .name = "pkttype", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, + }, + { + .name = "pkttype", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, + }, }; static int __init xt_pkttype_init(void) { - int ret; - ret = xt_register_match(&pkttype_match); - if (ret) - return ret; - - ret = xt_register_match(&pkttype6_match); - if (ret) - xt_unregister_match(&pkttype_match); - - return ret; + return xt_register_matches(xt_pkttype_match, + ARRAY_SIZE(xt_pkttype_match)); } static void __exit xt_pkttype_fini(void) { - xt_unregister_match(&pkttype_match); - xt_unregister_match(&pkttype6_match); + xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); } module_init(xt_pkttype_init); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index ba1ca03abad3..f5639c451112 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -165,43 +165,36 @@ static int checkentry(const char *tablename, const void *ip_void, return 1; } -static struct xt_match policy_match = { - .name = "policy", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match policy6_match = { - .name = "policy", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_policy_match[] = { + { + .name = "policy", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .family = AF_INET, + .me = THIS_MODULE, + }, + { + .name = "policy", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .family = AF_INET6, + .me = THIS_MODULE, + }, }; static int __init init(void) { - int ret; - - ret = xt_register_match(&policy_match); - if (ret) - return ret; - ret = xt_register_match(&policy6_match); - if (ret) - xt_unregister_match(&policy_match); - return ret; + return xt_register_matches(xt_policy_match, + ARRAY_SIZE(xt_policy_match)); } static void __exit fini(void) { - xt_unregister_match(&policy6_match); - xt_unregister_match(&policy_match); + xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); } module_init(init); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index be8d3c26b568..cc44f87cb8e6 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -52,46 +52,33 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match quota_match = { - .name = "quota", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .checkentry = checkentry, - .me = THIS_MODULE -}; - -static struct xt_match quota_match6 = { - .name = "quota", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .checkentry = checkentry, - .me = THIS_MODULE +static struct xt_match xt_quota_match[] = { + { + .name = "quota", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE + }, + { + .name = "quota", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE + }, }; static int __init xt_quota_init(void) { - int ret; - - ret = xt_register_match("a_match); - if (ret) - goto err1; - ret = xt_register_match("a_match6); - if (ret) - goto err2; - return ret; - -err2: - xt_unregister_match("a_match); -err1: - return ret; + return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); } static void __exit xt_quota_fini(void) { - xt_unregister_match("a_match6); - xt_unregister_match("a_match); + xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); } module_init(xt_quota_init); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 843383e01d41..5628621170e6 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -178,44 +178,35 @@ checkentry(const char *tablename, | SCTP_CHUNK_MATCH_ONLY))); } -static struct xt_match sctp_match = { - .name = "sctp", - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE -}; - -static struct xt_match sctp6_match = { - .name = "sctp", - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_sctp_match[] = { + { + .name = "sctp", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .me = THIS_MODULE + }, + { + .name = "sctp", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .me = THIS_MODULE + }, }; static int __init xt_sctp_init(void) { - int ret; - ret = xt_register_match(&sctp_match); - if (ret) - return ret; - - ret = xt_register_match(&sctp6_match); - if (ret) - xt_unregister_match(&sctp_match); - - return ret; + return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); } static void __exit xt_sctp_fini(void) { - xt_unregister_match(&sctp6_match); - xt_unregister_match(&sctp_match); + xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); } module_init(xt_sctp_init); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index f9e304dc4504..5f9492e3b2b1 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -69,47 +69,36 @@ destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) #endif } -static struct xt_match state_match = { - .name = "state", - .match = match, - .checkentry = check, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match state6_match = { - .name = "state", - .match = match, - .checkentry = check, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_state_match[] = { + { + .name = "state", + .family = AF_INET, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .me = THIS_MODULE, + }, + { + .name = "state", + .family = AF_INET6, + .checkentry = check, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .me = THIS_MODULE, + }, }; static int __init xt_state_init(void) { - int ret; - need_conntrack(); - - ret = xt_register_match(&state_match); - if (ret < 0) - return ret; - - ret = xt_register_match(&state6_match); - if (ret < 0) - xt_unregister_match(&state_match); - - return ret; + return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } static void __exit xt_state_fini(void) { - xt_unregister_match(&state_match); - xt_unregister_match(&state6_match); + xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); } module_init(xt_state_init); diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index de1037f58596..5181630a87fc 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match statistic_match = { - .name = "statistic", - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .checkentry = checkentry, - .family = AF_INET, - .me = THIS_MODULE, -}; - -static struct xt_match statistic_match6 = { - .name = "statistic", - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .checkentry = checkentry, - .family = AF_INET6, - .me = THIS_MODULE, +static struct xt_match xt_statistic_match[] = { + { + .name = "statistic", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, + }, + { + .name = "statistic", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, + }, }; static int __init xt_statistic_init(void) { - int ret; - - ret = xt_register_match(&statistic_match); - if (ret) - goto err1; - - ret = xt_register_match(&statistic_match6); - if (ret) - goto err2; - return ret; -err2: - xt_unregister_match(&statistic_match); -err1: - return ret; + return xt_register_matches(xt_statistic_match, + ARRAY_SIZE(xt_statistic_match)); } static void __exit xt_statistic_fini(void) { - xt_unregister_match(&statistic_match6); - xt_unregister_match(&statistic_match); + xt_unregister_matches(xt_statistic_match, + ARRAY_SIZE(xt_statistic_match)); } module_init(xt_statistic_init); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 275330fcdaaa..1a1c1d17d85e 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -75,43 +75,35 @@ static void destroy(const struct xt_match *match, void *matchinfo, textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } -static struct xt_match string_match = { - .name = "string", - .match = match, - .matchsize = sizeof(struct xt_string_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET, - .me = THIS_MODULE -}; -static struct xt_match string6_match = { - .name = "string", - .match = match, - .matchsize = sizeof(struct xt_string_info), - .checkentry = checkentry, - .destroy = destroy, - .family = AF_INET6, - .me = THIS_MODULE +static struct xt_match xt_string_match[] = { + { + .name = "string", + .family = AF_INET, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, + { + .name = "string", + .family = AF_INET6, + .checkentry = checkentry, + .match = match, + .destroy = destroy, + .matchsize = sizeof(struct xt_string_info), + .me = THIS_MODULE + }, }; static int __init xt_string_init(void) { - int ret; - - ret = xt_register_match(&string_match); - if (ret) - return ret; - ret = xt_register_match(&string6_match); - if (ret) - xt_unregister_match(&string_match); - - return ret; + return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); } static void __exit xt_string_fini(void) { - xt_unregister_match(&string_match); - xt_unregister_match(&string6_match); + xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); } module_init(xt_string_init); diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index cf7d335cadcd..7baa9ebc46c1 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -93,43 +93,34 @@ match(const struct sk_buff *skb, info->invert, hotdrop); } -static struct xt_match tcpmss_match = { - .name = "tcpmss", - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .family = AF_INET, - .me = THIS_MODULE, +static struct xt_match xt_tcpmss_match[] = { + { + .name = "tcpmss", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "tcpmss", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, }; -static struct xt_match tcpmss6_match = { - .name = "tcpmss", - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .family = AF_INET6, - .me = THIS_MODULE, -}; - - static int __init xt_tcpmss_init(void) { - int ret; - ret = xt_register_match(&tcpmss_match); - if (ret) - return ret; - - ret = xt_register_match(&tcpmss6_match); - if (ret) - xt_unregister_match(&tcpmss_match); - - return ret; + return xt_register_matches(xt_tcpmss_match, + ARRAY_SIZE(xt_tcpmss_match)); } static void __exit xt_tcpmss_fini(void) { - xt_unregister_match(&tcpmss6_match); - xt_unregister_match(&tcpmss_match); + xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); } module_init(xt_tcpmss_init); diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index a9a63aa68936..54aab051af86 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -199,81 +199,54 @@ udp_checkentry(const char *tablename, return !(udpinfo->invflags & ~XT_UDP_INV_MASK); } -static struct xt_match tcp_matchstruct = { - .name = "tcp", - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .family = AF_INET, - .checkentry = tcp_checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match tcp6_matchstruct = { - .name = "tcp", - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .family = AF_INET6, - .checkentry = tcp_checkentry, - .me = THIS_MODULE, -}; - -static struct xt_match udp_matchstruct = { - .name = "udp", - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .family = AF_INET, - .checkentry = udp_checkentry, - .me = THIS_MODULE, -}; -static struct xt_match udp6_matchstruct = { - .name = "udp", - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .family = AF_INET6, - .checkentry = udp_checkentry, - .me = THIS_MODULE, +static struct xt_match xt_tcpudp_match[] = { + { + .name = "tcp", + .family = AF_INET, + .checkentry = tcp_checkentry, + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "tcp", + .family = AF_INET6, + .checkentry = tcp_checkentry, + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .me = THIS_MODULE, + }, + { + .name = "udp", + .family = AF_INET, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .me = THIS_MODULE, + }, + { + .name = "udp", + .family = AF_INET6, + .checkentry = udp_checkentry, + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .me = THIS_MODULE, + }, }; static int __init xt_tcpudp_init(void) { - int ret; - ret = xt_register_match(&tcp_matchstruct); - if (ret) - return ret; - - ret = xt_register_match(&tcp6_matchstruct); - if (ret) - goto out_unreg_tcp; - - ret = xt_register_match(&udp_matchstruct); - if (ret) - goto out_unreg_tcp6; - - ret = xt_register_match(&udp6_matchstruct); - if (ret) - goto out_unreg_udp; - - return ret; - -out_unreg_udp: - xt_unregister_match(&udp_matchstruct); -out_unreg_tcp6: - xt_unregister_match(&tcp6_matchstruct); -out_unreg_tcp: - xt_unregister_match(&tcp_matchstruct); - return ret; + return xt_register_matches(xt_tcpudp_match, + ARRAY_SIZE(xt_tcpudp_match)); } static void __exit xt_tcpudp_fini(void) { - xt_unregister_match(&udp6_matchstruct); - xt_unregister_match(&udp_matchstruct); - xt_unregister_match(&tcp6_matchstruct); - xt_unregister_match(&tcp_matchstruct); + xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); } module_init(xt_tcpudp_init); From fe1cb10873b44cf89082465823ee6d4d4ac63ad7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:35:47 -0700 Subject: [PATCH 0515/1063] [NETFILTER]: x_tables: remove unused argument to target functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +-- include/linux/netfilter_arp/arp_tables.h | 3 +-- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- net/ipv4/netfilter/arp_tables.c | 9 +++------ net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/arptable_filter.c | 2 +- net/ipv4/netfilter/ip_nat_rule.c | 8 +++----- net/ipv4/netfilter/ip_tables.c | 9 +++------ net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 +-- net/ipv4/netfilter/ipt_ECN.c | 3 +-- net/ipv4/netfilter/ipt_LOG.c | 3 +-- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +-- net/ipv4/netfilter/ipt_NETMAP.c | 3 +-- net/ipv4/netfilter/ipt_REDIRECT.c | 3 +-- net/ipv4/netfilter/ipt_REJECT.c | 3 +-- net/ipv4/netfilter/ipt_SAME.c | 3 +-- net/ipv4/netfilter/ipt_TCPMSS.c | 3 +-- net/ipv4/netfilter/ipt_TOS.c | 3 +-- net/ipv4/netfilter/ipt_TTL.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv4/netfilter/iptable_filter.c | 4 ++-- net/ipv4/netfilter/iptable_mangle.c | 4 ++-- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 9 +++------ net/ipv6/netfilter/ip6t_HL.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 3 +-- net/ipv6/netfilter/ip6t_REJECT.c | 3 +-- net/ipv6/netfilter/ip6table_filter.c | 4 ++-- net/ipv6/netfilter/ip6table_mangle.c | 4 ++-- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/netfilter/xt_CLASSIFY.c | 3 +-- net/netfilter/xt_CONNMARK.c | 3 +-- net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_DSCP.c | 6 ++---- net/netfilter/xt_MARK.c | 6 ++---- net/netfilter/xt_NFQUEUE.c | 3 +-- net/netfilter/xt_NOTRACK.c | 3 +-- net/netfilter/xt_SECMARK.c | 2 +- net/netfilter/xt_connbytes.c | 2 +- net/sched/act_ipt.c | 2 +- 41 files changed, 55 insertions(+), 90 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9a9912430e3a..9cef0e91542b 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -211,8 +211,7 @@ struct xt_target const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userdata); + const void *targinfo); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 62cc27daca4e..149e87c9ab13 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -248,8 +248,7 @@ extern unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata); + struct arpt_table *table); #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1)) #endif /*__KERNEL__*/ diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index c0dac16e1902..a536bbdef145 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -312,8 +312,7 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata); + struct ipt_table *table); #define IPT_ALIGN(s) XT_ALIGN(s) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index d0d5d1ee4be3..d7a8e9c0dad0 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,8 +300,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ip6t_table *table, - void *userdata); + struct ip6t_table *table); /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d1d7a6e72a5..c6bd270bf46a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -208,8 +208,7 @@ static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", (char *)targinfo); @@ -226,8 +225,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata) + struct arpt_table *table) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -302,8 +300,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); /* Target might have changed stuff. */ arp = (*pskb)->nh.arph; diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a58325c1ceb9..05fb2421bb26 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -11,7 +11,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { const struct arpt_mangle *mangle = targinfo; struct arphdr *arp; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d7c472faa53b..7edea2a1696c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return arpt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aba926c1cb0..1aa0e4f462a5 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -104,8 +104,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -147,8 +146,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -255,7 +253,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 048514f15f2f..8ce5b6f76447 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", (char *)targinfo); @@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata) + struct ipt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d994c5f5744c..a08383cf9e7a 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -302,8 +302,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 7e30e6d2b5da..1c3da4a48e5f 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -85,8 +85,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_ECN_info *einfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b98f7b08b084..a8d356c6191f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_log_info *loginfo = targinfo; struct nf_loginfo li; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index ebd94f2abf0d..9659793c66c0 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -64,8 +64,7 @@ masquerade_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 736c4b5a86a7..fd5e74a19fb5 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -55,8 +55,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f290463232de..839fe99f71d4 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -58,8 +58,7 @@ redirect_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 95c6662b663c..1dfd8e56be8b 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -228,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_reject_info *reject = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 7169b09b5a67..cf801749490f 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -133,8 +133,7 @@ same_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 0fce85e05507..6d668dcfc22a 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -41,8 +41,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 52e9d705d48e..043df0137084 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -26,8 +26,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; struct iphdr *iph = (*pskb)->nh.iph; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 2afb2a8aa8c5..164007107b5e 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -23,7 +23,7 @@ static unsigned int ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d46fd677fa11..4c5f0a117862 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7f417484bfbf..e2e7dd8d7903 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4e7998beda63..79336cb42527 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook, daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7912cce1e1b8..bcbeb4aeacd9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c9d6b23cd3f7..38cd7ffda9a0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -220,8 +220,7 @@ ip6t_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip6_tables: error: `%s'\n", (char *)targinfo); @@ -258,8 +257,7 @@ ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct xt_table *table, - void *userdata) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; @@ -349,8 +347,7 @@ ip6t_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index b8eff8ee69b1..c85d124f9a3d 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -22,7 +22,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 73c6300109d6..acb91733e1fd 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 7929ff402166..343acd3cbf5e 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -180,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ip6t_reject_info *reject = targinfo; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 60976c0c58e8..2fc07c74decf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 03a13eab1dae..32db04fd8310 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -174,7 +174,7 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 61a7c58e99f8..b4154da575c0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 1f92edd05933..50de965bb104 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -29,8 +29,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_classify_target_info *clinfo = targinfo; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index e577356b5c71..c2125f6ee128 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -38,8 +38,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_connmark_target_info *markinfo = targinfo; u_int32_t diff; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 48f7fc3c85cd..4b9cc65bb82b 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb) static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index a1cd9723644f..9d23c9580d80 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -32,8 +32,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; @@ -54,8 +53,7 @@ static unsigned int target6(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 0a6127219467..95a171c87994 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info *markinfo = targinfo; @@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; int mark = 0; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 7b982283abdb..db9b896e57c8 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -29,8 +29,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct xt_NFQ_info *tinfo = targinfo; diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index cab881d4424c..6d00dcaed238 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -16,8 +16,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { /* Previously seen (loopback)? Ignore. */ if ((*pskb)->nfct != NULL) diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 4300988786c9..8a04dcf2611e 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -31,7 +31,7 @@ static u8 mode; static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { u32 secmark = 0; const struct xt_secmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 2d49948d3c38..d725e8b84503 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -143,7 +143,7 @@ static int check(const char *tablename, return 1; } -static struct xt_match xt_connbytes_match = { +static struct xt_match xt_connbytes_match[] = { { .name = "connbytes", .family = AF_INET, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 224c078a398e..45a3143b8629 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -222,7 +222,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data, NULL); + ipt->tcfi_t->data); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; From efa741656e9ebf5fd6e0432b0d1b3c7f156392d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:36:37 -0700 Subject: [PATCH 0516/1063] [NETFILTER]: x_tables: remove unused size argument to check/destroy functions The size is verified by x_tables and isn't needed by the modules anymore. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 8 ++------ net/ipv4/netfilter/arp_tables.c | 5 +---- net/ipv4/netfilter/arpt_mangle.c | 2 +- net/ipv4/netfilter/ip_nat_rule.c | 2 -- net/ipv4/netfilter/ip_tables.c | 14 +++----------- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 +--- net/ipv4/netfilter/ipt_ECN.c | 1 - net/ipv4/netfilter/ipt_LOG.c | 1 - net/ipv4/netfilter/ipt_MASQUERADE.c | 1 - net/ipv4/netfilter/ipt_NETMAP.c | 1 - net/ipv4/netfilter/ipt_REDIRECT.c | 1 - net/ipv4/netfilter/ipt_REJECT.c | 1 - net/ipv4/netfilter/ipt_SAME.c | 4 +--- net/ipv4/netfilter/ipt_TCPMSS.c | 1 - net/ipv4/netfilter/ipt_TOS.c | 1 - net/ipv4/netfilter/ipt_TTL.c | 1 - net/ipv4/netfilter/ipt_ULOG.c | 1 - net/ipv4/netfilter/ipt_ah.c | 1 - net/ipv4/netfilter/ipt_ecn.c | 3 +-- net/ipv4/netfilter/ipt_hashlimit.c | 4 +--- net/ipv4/netfilter/ipt_owner.c | 1 - net/ipv4/netfilter/ipt_recent.c | 5 ++--- net/ipv6/netfilter/ip6_tables.c | 10 ++-------- net/ipv6/netfilter/ip6t_HL.c | 1 - net/ipv6/netfilter/ip6t_LOG.c | 1 - net/ipv6/netfilter/ip6t_REJECT.c | 1 - net/ipv6/netfilter/ip6t_ah.c | 1 - net/ipv6/netfilter/ip6t_dst.c | 1 - net/ipv6/netfilter/ip6t_frag.c | 1 - net/ipv6/netfilter/ip6t_hbh.c | 1 - net/ipv6/netfilter/ip6t_ipv6header.c | 1 - net/ipv6/netfilter/ip6t_owner.c | 1 - net/ipv6/netfilter/ip6t_rt.c | 1 - net/netfilter/xt_CONNMARK.c | 1 - net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_DSCP.c | 1 - net/netfilter/xt_MARK.c | 2 -- net/netfilter/xt_SECMARK.c | 2 +- net/netfilter/xt_connbytes.c | 1 - net/netfilter/xt_connmark.c | 3 +-- net/netfilter/xt_conntrack.c | 3 +-- net/netfilter/xt_dccp.c | 1 - net/netfilter/xt_dscp.c | 1 - net/netfilter/xt_esp.c | 1 - net/netfilter/xt_helper.c | 3 +-- net/netfilter/xt_limit.c | 1 - net/netfilter/xt_mark.c | 1 - net/netfilter/xt_multiport.c | 4 ---- net/netfilter/xt_physdev.c | 1 - net/netfilter/xt_policy.c | 3 +-- net/netfilter/xt_quota.c | 2 +- net/netfilter/xt_sctp.c | 1 - net/netfilter/xt_state.c | 3 +-- net/netfilter/xt_statistic.c | 2 +- net/netfilter/xt_string.c | 4 +--- net/netfilter/xt_tcpudp.c | 2 -- net/sched/act_ipt.c | 4 +--- 57 files changed, 26 insertions(+), 106 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9cef0e91542b..9d97102a9347 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -174,12 +174,10 @@ struct xt_match const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize); + void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *match, void **dstptr, int *size, int convert); @@ -221,12 +219,10 @@ struct xt_target const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo, - unsigned int targinfosize); + void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ int (*compat)(void *target, void **dstptr, int *size, int convert); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c6bd270bf46a..4f10b06413a1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -491,8 +491,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -559,8 +557,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) t = arpt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 05fb2421bb26..d12b1df252a1 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -67,7 +67,7 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int targinfosize, unsigned int hook_mask) + void *targinfo, unsigned int hook_mask) { const struct arpt_mangle *mangle = targinfo; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aa0e4f462a5..e59f5a8ecb6b 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -172,7 +172,6 @@ static int ipt_snat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -189,7 +188,6 @@ static int ipt_dnat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 8ce5b6f76447..a0f36806998c 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -464,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -518,7 +517,6 @@ check_match(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -579,8 +577,6 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -652,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1599,7 +1594,6 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - dm->u.match_size - sizeof(*dm), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -1658,8 +1652,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, goto out; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, - t->data, t->u.target_size - sizeof(*t), - de->comefrom)) { + t->data, de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); goto out; @@ -2182,7 +2175,6 @@ icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_icmp *icmpinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a08383cf9e7a..41589665fc5d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -372,7 +372,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -449,8 +448,7 @@ checkentry(const char *tablename, } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +static void destroy(const struct xt_target *target, void *targinfo) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 1c3da4a48e5f..23f9c7ebe7eb 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -106,7 +106,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index a8d356c6191f..7dc820df8bc5 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -439,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9659793c66c0..bc65168a3437 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -42,7 +42,6 @@ masquerade_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index fd5e74a19fb5..beb2914225ff 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -33,7 +33,6 @@ check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 839fe99f71d4..f03d43671c6d 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -36,7 +36,6 @@ redirect_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1dfd8e56be8b..b81821edd893 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -276,7 +276,6 @@ static int check(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index cf801749490f..efbcb1198832 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -52,7 +52,6 @@ same_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { unsigned int count, countess, rangeip, index = 0; @@ -116,8 +115,7 @@ same_check(const char *tablename, } static void -same_destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +same_destroy(const struct xt_target *target, void *targinfo) { struct ipt_same_info *mr = targinfo; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 6d668dcfc22a..ac8a35eeea3f 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -207,7 +207,6 @@ ipt_tcpmss_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 043df0137084..471a4c438b0a 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -49,7 +49,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 164007107b5e..214d9d9c428f 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -67,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 4c5f0a117862..2b104ea54f48 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 2927135873d7..1798f86bc534 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -74,7 +74,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_ah *ahinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index b28250414933..dafbdec0efc0 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 3bd2368e1fc9..b5b74b07370c 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct ipt_hashlimit_info *r = matchinfo; @@ -529,8 +528,7 @@ hashlimit_checkentry(const char *tablename, } static void -hashlimit_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +hashlimit_destroy(const struct xt_match *match, void *matchinfo) { struct ipt_hashlimit_info *r = matchinfo; diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 5ac6ac023b5e..78c336f12a9e 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -56,7 +56,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_owner_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 682c0946201e..32ae8d7ac506 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -238,7 +238,7 @@ out: static int ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -294,8 +294,7 @@ out: } static void -ipt_recent_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +ipt_recent_destroy(const struct xt_match *match, void *matchinfo) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 38cd7ffda9a0..d1c315364ee7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -504,8 +504,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -558,7 +557,6 @@ check_match(struct ip6t_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -619,8 +617,6 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -692,8 +688,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -1349,7 +1344,6 @@ icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_icmp *icmpinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index c85d124f9a3d..e54ea92d107b 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -66,7 +66,6 @@ static int ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip6t_HL_info *info = targinfo; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index acb91733e1fd..0cf537d30185 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -451,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 343acd3cbf5e..311eae82feb3 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -223,7 +223,6 @@ static int check(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2f7bb20c758b..ec1b1608156c 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -102,7 +102,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 9422413d0571..223c335467cc 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -182,7 +182,6 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 06768c84bd31..78d9c8b9e28a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -119,7 +119,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 374f1be85c0d..72defc816563 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -182,7 +182,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 9375eeb1369f..3093c398002f 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 5d047990cd44..4eb9bbc4ebc3 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c @@ -57,7 +57,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index fbb0184a41d8..bcb2e168a5bc 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -197,7 +197,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index c2125f6ee128..0e4249ddc17b 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -89,7 +89,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_connmark_target_info *matchinfo = targinfo; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 4b9cc65bb82b..4b0e14bb1726 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_connsecmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 9d23c9580d80..a7cc75aeb38d 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -72,7 +72,6 @@ static int checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 95a171c87994..782f8d8c3edf 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -74,7 +74,6 @@ checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info *markinfo = targinfo; @@ -91,7 +90,6 @@ checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info_v1 *markinfo = targinfo; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 8a04dcf2611e..451b67c4bb53 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d725e8b84503..dcc497ea8183 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -125,7 +125,6 @@ static int check(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index a97b2d455b79..c9104d05a19c 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -55,7 +55,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_connmark_info *cm = matchinfo; @@ -75,7 +74,7 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 1540885174ee..39c57e9f7563 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -208,7 +208,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -222,7 +221,7 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 5ca6f5288f46..3e6cf430e518 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -131,7 +131,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_dccp_info *info = matchinfo; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index d84075c30159..26c7f4ad102a 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,6 @@ static int checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 7b19bc9ea205..7c95f149d942 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -79,7 +79,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct xt_esp *espinfo = matchinfo; diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index db453a7a154e..5d7818b73e3a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -139,7 +139,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; @@ -156,7 +155,7 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index e8d5e7ac695a..b9c9ff3a06ea 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_rateinfo *r = matchinfo; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 39f9b079f5d4..e8059cd17275 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -39,7 +39,6 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_mark_info *minfo = matchinfo; diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index e74f9bb98b3c..d3aefd380930 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -176,7 +176,6 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -191,7 +190,6 @@ checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -206,7 +204,6 @@ checkentry6(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index af3d70f96ecd..fd8f954cded5 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -106,7 +106,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_physdev_info *info = matchinfo; diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f5639c451112..e9d81378d653 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -135,8 +135,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index cc44f87cb8e6..b75fa2c70e66 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -41,7 +41,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 5628621170e6..7956acaaa24b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -163,7 +163,6 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_sctp_info *info = matchinfo; diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5f9492e3b2b1..d9010b16a1f9 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -48,7 +48,6 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -62,7 +61,7 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 5181630a87fc..091a9f89f5d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -55,7 +55,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 1a1c1d17d85e..4453252400aa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -46,7 +46,6 @@ static int checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; @@ -69,8 +68,7 @@ static int checkentry(const char *tablename, return 1; } -static void destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +static void destroy(const struct xt_match *match, void *matchinfo) { textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 54aab051af86..e76a68e0bc66 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *tcpinfo = matchinfo; @@ -190,7 +189,6 @@ udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *udpinfo = matchinfo; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 45a3143b8629..d8c9310da6e5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -69,7 +69,6 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(table, NULL, t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t), hook)) { module_put(t->u.kernel.target->me); ret = -EINVAL; @@ -81,8 +80,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int static void ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); } From 53e26658282373b84ba85a0c9807cb762f7738a6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:20 -0700 Subject: [PATCH 0517/1063] [NETFILTER]: nfnetlink: remove unnecessary packed attributes Remove unnecessary packed attributes in nfnetlink structures. Unfortunately in a few cases they have to stay to avoid changing structure sizes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- include/linux/netfilter/nfnetlink_log.h | 6 +++--- include/linux/netfilter/nfnetlink_queue.h | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 9f5b12cf489b..6d8e3e5a80e9 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,7 +43,7 @@ struct nfattr u_int16_t nfa_len; u_int16_t nfa_type; /* we use 15 bits for the type, and the highest * bit to indicate whether the payload is nested */ -} __attribute__ ((packed)); +}; /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from * rtnetlink.h, it's time to put this in a generic file */ @@ -79,7 +79,7 @@ struct nfgenmsg { u_int8_t nfgen_family; /* AF_xxx */ u_int8_t version; /* nfnetlink version */ u_int16_t res_id; /* resource id */ -} __attribute__ ((packed)); +}; #define NFNETLINK_V0 0 diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a7497c7436df..87b92f8b988f 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -19,18 +19,18 @@ struct nfulnl_msg_packet_hdr { u_int16_t hw_protocol; /* hw protocol (network order) */ u_int8_t hook; /* netfilter hook */ u_int8_t _pad; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfulnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 9e774373244c..36af0360b56d 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -22,12 +22,12 @@ struct nfqnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -} __attribute__ ((packed)); +}; struct nfqnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -} __attribute__ ((packed)); +}; enum nfqnl_attr_type { NFQA_UNSPEC, @@ -49,7 +49,7 @@ enum nfqnl_attr_type { struct nfqnl_msg_verdict_hdr { u_int32_t verdict; u_int32_t id; -} __attribute__ ((packed)); +}; enum nfqnl_msg_config_cmds { @@ -64,7 +64,7 @@ struct nfqnl_msg_config_cmd { u_int8_t command; /* nfqnl_msg_config_cmds */ u_int8_t _pad; u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ -} __attribute__ ((packed)); +}; enum nfqnl_config_mode { NFQNL_COPY_NONE, From 91270cf81765152f6e77953440beb4d3b34a71b5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:38 -0700 Subject: [PATCH 0518/1063] [NETFILTER]: x_tables: add data member to struct xt_match Shared match functions can use this to make runtime decisions basen on the used match. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9d97102a9347..03d1027fb0e8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -185,6 +185,9 @@ struct xt_match /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; + /* Free to use by each match */ + unsigned long data; + char *table; unsigned int matchsize; unsigned int hooks; From 5fa2a7601f994bdd034e871b7ea1abd6969fbb6c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:43:55 -0700 Subject: [PATCH 0519/1063] [NETFILTER]: ip6_tables: consolidate dst and hbh matches The matches are identical besides one looking for NEXTHDR_HOP, the other for NEXTHDR_DEST. Remove ip6t_dst.c and handle both in ip6t_hbh.c. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/Makefile | 2 +- net/ipv6/netfilter/ip6t_dst.c | 219 ---------------------------------- net/ipv6/netfilter/ip6t_hbh.c | 48 ++++---- 3 files changed, 25 insertions(+), 244 deletions(-) delete mode 100644 net/ipv6/netfilter/ip6t_dst.c diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index eeeb57d4c9c5..ac1dfebde175 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -5,7 +5,7 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o -obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o +obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c deleted file mode 100644 index 223c335467cc..000000000000 --- a/net/ipv6/netfilter/ip6t_dst.c +++ /dev/null @@ -1,219 +0,0 @@ -/* Kernel module to match Hop-by-Hop and Destination parameters. */ - -/* (C) 2001-2002 Andras Kis-Szabo - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#define HOPBYHOP 0 - -MODULE_LICENSE("GPL"); -#if HOPBYHOP -MODULE_DESCRIPTION("IPv6 HbH match"); -#else -MODULE_DESCRIPTION("IPv6 DST match"); -#endif -MODULE_AUTHOR("Andras Kis-Szabo "); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* - * (Type & 0xC0) >> 6 - * 0 -> ignorable - * 1 -> must drop the packet - * 2 -> send ICMP PARM PROB regardless and drop packet - * 3 -> Send ICMP if not a multicast address and drop packet - * (Type & 0x20) >> 5 - * 0 -> invariant - * 1 -> can change the routing - * (Type & 0x1F) Type - * 0 -> Pad1 (only 1 byte!) - * 1 -> PadN LENGTH info (total length = length + 2) - * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) - * 5 -> RTALERT 2 x x - */ - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct ipv6_opt_hdr _optsh, *oh; - const struct ip6t_opts *optinfo = matchinfo; - unsigned int temp; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int ret = 0; - u8 _opttype, *tp = NULL; - u8 _optlen, *lp = NULL; - unsigned int optlen; - -#if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) -#else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) -#endif - return 0; - - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - if (oh == NULL) { - *hotdrop = 1; - return 0; - } - - hdrlen = ipv6_optlen(oh); - if (skb->len - ptr < hdrlen) { - /* Packet smaller than it's length field */ - return 0; - } - - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); - - DEBUGP("len %02X %04X %02X ", - optinfo->hdrlen, hdrlen, - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); - - ret = (oh != NULL) && - (!(optinfo->flags & IP6T_OPTS_LEN) || - ((optinfo->hdrlen == hdrlen) ^ - !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); - - ptr += 2; - hdrlen -= 2; - if (!(optinfo->flags & IP6T_OPTS_OPTS)) { - return ret; - } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { - DEBUGP("Not strict - not implemented"); - } else { - DEBUGP("Strict "); - DEBUGP("#%d ", optinfo->optsnr); - for (temp = 0; temp < optinfo->optsnr; temp++) { - /* type field exists ? */ - if (hdrlen < 1) - break; - tp = skb_header_pointer(skb, ptr, sizeof(_opttype), - &_opttype); - if (tp == NULL) - break; - - /* Type check */ - if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { - DEBUGP("Tbad %02X %02X\n", - *tp, - (optinfo->opts[temp] & 0xFF00) >> 8); - return 0; - } else { - DEBUGP("Tok "); - } - /* Length check */ - if (*tp) { - u16 spec_len; - - /* length field exists ? */ - if (hdrlen < 2) - break; - lp = skb_header_pointer(skb, ptr + 1, - sizeof(_optlen), - &_optlen); - if (lp == NULL) - break; - spec_len = optinfo->opts[temp] & 0x00FF; - - if (spec_len != 0x00FF && spec_len != *lp) { - DEBUGP("Lbad %02X %04X\n", *lp, - spec_len); - return 0; - } - DEBUGP("Lok "); - optlen = *lp + 2; - } else { - DEBUGP("Pad1\n"); - optlen = 1; - } - - /* Step to the next */ - DEBUGP("len%04X \n", optlen); - - if ((ptr > skb->len - optlen || hdrlen < optlen) && - (temp < optinfo->optsnr - 1)) { - DEBUGP("new pointer is too large! \n"); - break; - } - ptr += optlen; - hdrlen -= optlen; - } - if (temp == optinfo->optsnr) - return ret; - else - return 0; - } - - return 0; -} - -/* Called when user tries to insert an entry of this type. */ -static int -checkentry(const char *tablename, - const void *info, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) -{ - const struct ip6t_opts *optsinfo = matchinfo; - - if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { - DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); - return 0; - } - return 1; -} - -static struct ip6t_match opts_match = { -#if HOPBYHOP - .name = "hbh", -#else - .name = "dst", -#endif - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ip6t_dst_init(void) -{ - return ip6t_register_match(&opts_match); -} - -static void __exit ip6t_dst_fini(void) -{ - ip6t_unregister_match(&opts_match); -} - -module_init(ip6t_dst_init); -module_exit(ip6t_dst_fini); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 72defc816563..d32a205e3af2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -19,15 +19,10 @@ #include #include -#define HOPBYHOP 1 - MODULE_LICENSE("GPL"); -#if HOPBYHOP -MODULE_DESCRIPTION("IPv6 HbH match"); -#else -MODULE_DESCRIPTION("IPv6 DST match"); -#endif +MODULE_DESCRIPTION("IPv6 opts match"); MODULE_AUTHOR("Andras Kis-Szabo "); +MODULE_ALIAS("ip6t_dst"); #if 0 #define DEBUGP printk @@ -71,11 +66,7 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; -#if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) -#else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) -#endif + if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0) return 0; oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); @@ -193,26 +184,35 @@ checkentry(const char *tablename, return 1; } -static struct ip6t_match opts_match = { -#if HOPBYHOP - .name = "hbh", -#else - .name = "dst", -#endif - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, +static struct xt_match opts_match[] = { + { + .name = "hbh", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, + .data = NEXTHDR_HOP, + }, + { + .name = "dst", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, + .data = NEXTHDR_DEST, + }, }; static int __init ip6t_hbh_init(void) { - return ip6t_register_match(&opts_match); + return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); } static void __exit ip6t_hbh_fini(void) { - ip6t_unregister_match(&opts_match); + xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); } module_init(ip6t_hbh_init); From ce556b3a591fff3bebf8c5590a86aa98e1b2f153 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 22 Aug 2006 00:44:14 -0700 Subject: [PATCH 0520/1063] [NETFILTER]: xt_tcpmss: minor cleanups - remove unused define - remove useless wrapper function - use new line for expression after condition Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_tcpmss.c | 48 +++++++++++++++------------------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 7baa9ebc46c1..a3682fe2f192 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -18,21 +18,22 @@ #include #include -#define TH_SYN 0x02 - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); -/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */ -static inline int -mssoption_match(u_int16_t min, u_int16_t max, - const struct sk_buff *skb, - unsigned int protoff, - int invert, - int *hotdrop) +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) { + const struct xt_tcpmss_match_info *info = matchinfo; struct tcphdr _tcph, *th; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ u8 _opt[15 * 4 - sizeof(_tcph)], *op; @@ -64,35 +65,22 @@ mssoption_match(u_int16_t min, u_int16_t max, mssval = (op[i+2] << 8) | op[i+3]; - return (mssval >= min && mssval <= max) ^ invert; + return (mssval >= info->mss_min && + mssval <= info->mss_max) ^ info->invert; } - if (op[i] < 2) i++; - else i += op[i+1]?:1; + if (op[i] < 2) + i++; + else + i += op[i+1] ? : 1; } out: - return invert; + return info->invert; - dropit: +dropit: *hotdrop = 1; return 0; } -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct xt_tcpmss_match_info *info = matchinfo; - - return mssoption_match(info->mss_min, info->mss_max, skb, protoff, - info->invert, hotdrop); -} - static struct xt_match xt_tcpmss_match[] = { { .name = "tcpmss", From 3fd091e73b81f131e1567c4d4a1ec042940bf2f7 Mon Sep 17 00:00:00 2001 From: Vladislav Yasevich Date: Tue, 22 Aug 2006 13:29:17 -0700 Subject: [PATCH 0521/1063] [SCTP]: Remove multiple levels of msecs to jiffies conversions. The SCTP sysctl entries are displayed in milliseconds, but stored internally in jiffies. This results in multiple levels of msecs to jiffies conversion and as a result produces a truncation error. This patch makes things consistent in that we store and display defaults in milliseconds and only convert once for use by association. This patch also adds some sane min/max values so that we don't go off the deep end. Signed-off-by: Vladislav Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 13 ++-- include/net/sctp/structs.h | 12 +-- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 15 ++-- net/sctp/sysctl.c | 140 ++++++++++++++++------------------- net/sctp/transport.c | 2 +- 6 files changed, 84 insertions(+), 100 deletions(-) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 57166bfdf8eb..6c632e26f72d 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -264,10 +264,10 @@ enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; /* Heartbeat interval - 30 secs */ -#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30 * HZ) +#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30*1000) /* Delayed sack timer - 200ms */ -#define SCTP_DEFAULT_TIMEOUT_SACK ((200 * HZ) / 1000) +#define SCTP_DEFAULT_TIMEOUT_SACK (200) /* RTO.Initial - 3 seconds * RTO.Min - 1 second @@ -275,9 +275,9 @@ enum { SCTP_MAX_GABS = 16 }; * RTO.Alpha - 1/8 * RTO.Beta - 1/4 */ -#define SCTP_RTO_INITIAL (3 * HZ) -#define SCTP_RTO_MIN (1 * HZ) -#define SCTP_RTO_MAX (60 * HZ) +#define SCTP_RTO_INITIAL (3 * 1000) +#define SCTP_RTO_MIN (1 * 1000) +#define SCTP_RTO_MAX (60 * 1000) #define SCTP_RTO_ALPHA 3 /* 1/8 when converted to right shifts. */ #define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */ @@ -290,8 +290,7 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEF_MAX_INIT 6 #define SCTP_DEF_MAX_SEND 10 -#define SCTP_DEFAULT_COOKIE_LIFE_SEC 60 /* seconds */ -#define SCTP_DEFAULT_COOKIE_LIFE_USEC 0 /* microseconds */ +#define SCTP_DEFAULT_COOKIE_LIFE (60 * 1000) /* 60 seconds */ #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0412e730c765..c6d93bb0dcd2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -128,9 +128,9 @@ extern struct sctp_globals { * RTO.Alpha - 1/8 (3 when converted to right shifts.) * RTO.Beta - 1/4 (2 when converted to right shifts.) */ - unsigned long rto_initial; - unsigned long rto_min; - unsigned long rto_max; + unsigned int rto_initial; + unsigned int rto_min; + unsigned int rto_max; /* Note: rto_alpha and rto_beta are really defined as inverse * powers of two to facilitate integer operations. @@ -145,13 +145,13 @@ extern struct sctp_globals { int cookie_preserve_enable; /* Valid.Cookie.Life - 60 seconds */ - unsigned long valid_cookie_life; + unsigned int valid_cookie_life; /* Delayed SACK timeout 200ms default*/ - unsigned long sack_timeout; + unsigned int sack_timeout; /* HB.interval - 30 seconds */ - unsigned long hb_interval; + unsigned int hb_interval; /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1ab03a27a76e..5692ef5485d3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1049,7 +1049,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_rto_beta = SCTP_RTO_BETA; /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = 60 * HZ; + sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; /* Whether Cookie Preservative is enabled(1) or not(0) */ sctp_cookie_preserve_enable = 1; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3b6e82cb372f..7c1dbb1d10df 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); + sp->initmsg.sinit_max_init_timeo = sctp_rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); - sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); - sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); + sp->rtoinfo.srto_initial = sctp_rto_initial; + sp->rtoinfo.srto_max = sctp_rto_max; + sp->rtoinfo.srto_min = sctp_rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. @@ -3061,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = - jiffies_to_msecs(sctp_valid_cookie_life); + sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3072,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); + sp->hbinterval = sctp_hb_interval; sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); + sp->sackdelay = sctp_sack_timeout; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dc6f3ff32358..633cd178654b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -45,9 +45,10 @@ #include #include -static ctl_handler sctp_sysctl_jiffies_ms; -static long rto_timer_min = 1; -static long rto_timer_max = 86400000; /* One day */ +static int zero = 0; +static int one = 1; +static int timer_max = 86400000; /* ms in one day */ +static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; @@ -56,45 +57,45 @@ static ctl_table sctp_table[] = { .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_MAX_BURST, @@ -102,7 +103,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, @@ -110,7 +114,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_SNDBUF_POLICY, @@ -118,7 +125,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -126,7 +134,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -134,7 +143,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, @@ -142,18 +154,21 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_PRESERVE_ENABLE, @@ -161,23 +176,26 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -185,7 +203,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -193,7 +212,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -201,8 +221,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void) { unregister_sysctl_table(sctp_sysctl_header); } - -/* Strategy function to convert jiffies to milliseconds. */ -static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) { - - if (oldval) { - size_t olen; - - if (oldlenp) { - if (get_user(olen, oldlenp)) - return -EFAULT; - - if (olen != sizeof (int)) - return -EINVAL; - } - if (put_user((*(int *)(table->data) * 1000) / HZ, - (int __user *)oldval) || - (oldlenp && put_user(sizeof (int), oldlenp))) - return -EFAULT; - } - if (newval && newlen) { - int new; - - if (newlen != sizeof (int)) - return -EINVAL; - - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - *(int *)(table->data) = (new * HZ) / 1000; - } - return 1; -} diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2763aa93de1a..3e5936a5f671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rtt = 0; - peer->rto = sctp_rto_initial; + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; From 2809486424df58043b380aeb9d7f402c031c46f6 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 13:52:17 -0700 Subject: [PATCH 0522/1063] [NETFILTER]: x_tables: Fix typos after conversion to use mass registation helper Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/netfilter/xt_CONNSECMARK.c | 8 ++++---- net/netfilter/xt_SECMARK.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 4b0e14bb1726..467386266674 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -130,14 +130,14 @@ static struct xt_target xt_connsecmark_target[] = { static int __init xt_connsecmark_init(void) { need_conntrack(); - return xt_register_targets(xt_connsecmark_targets, - ARRAY_SIZE(xt_connsecmark_targets)); + return xt_register_targets(xt_connsecmark_target, + ARRAY_SIZE(xt_connsecmark_target)); } static void __exit xt_connsecmark_fini(void) { - xt_unregister_targets(xt_connsecmark_targets, - ARRAY_SIZE(xt_connsecmark_targets)); + xt_unregister_targets(xt_connsecmark_target, + ARRAY_SIZE(xt_connsecmark_target)); } module_init(xt_connsecmark_init); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 451b67c4bb53..add752196290 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -111,7 +111,7 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target xt_secmark_target = { +static struct xt_target xt_secmark_target[] = { { .name = "SECMARK", .family = AF_INET, From a57d27fc7107ddcc655ba2812cfebfce3163fd62 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Aug 2006 22:20:14 -0700 Subject: [PATCH 0523/1063] [RTNETLINK]: Don't return error on no-metrics. Instead just cancel the nested attribute and return 0. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eeff0b23e944..8f225499e32e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -202,8 +202,10 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) } } - if (!valid) - goto nla_put_failure; + if (!valid) { + nla_nest_cancel(skb, mx); + return 0; + } return nla_nest_end(skb, mx); From 5e032e32ecc2e6cb0385dc115ca9bfe5e19a9539 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:12:24 -0700 Subject: [PATCH 0524/1063] [IPV6] NDISC: Take source address into account for redirects. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/ndisc.c | 3 ++- net/ipv6/route.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 3f170f667c7b..249ce4545ef0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -110,6 +110,7 @@ extern int rt6_route_rcv(struct net_device *dev, struct in6_addr *gwaddr); extern void rt6_redirect(struct in6_addr *dest, + struct in6_addr *src, struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 419d65163819..32f28dec399e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1346,7 +1346,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); if (neigh) { - rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, + rt6_redirect(dest, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr, neigh, lladdr, on_link); neigh_release(neigh); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5d6e9083ca2c..a9b08a2422e0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1279,7 +1279,8 @@ static int ip6_route_del(struct fib6_config *cfg) /* * Handle redirects */ -void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, +void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, + struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, int on_link) { struct rt6_info *rt, *nrt = NULL; @@ -1304,7 +1305,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, dest, NULL); + fn = fib6_lookup(&table->tb6_root, dest, src); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* From a6279458c534d01ccc39498aba61c93083ee0372 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:18:26 -0700 Subject: [PATCH 0525/1063] [IPV6] NDISC: Search over all possible rules on receipt of redirect. Split up function for finding routes for redirects. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 91 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a9b08a2422e0..8d00a9d77f01 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1279,19 +1279,18 @@ static int ip6_route_del(struct fib6_config *cfg) /* * Handle redirects */ -void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, - struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) -{ - struct rt6_info *rt, *nrt = NULL; - struct fib6_node *fn; - struct fib6_table *table; - struct netevent_redirect netevent; +struct ip6rd_flowi { + struct flowi fl; + struct in6_addr gateway; +}; - /* TODO: Very lazy, might need to check all tables */ - table = fib6_get_table(RT6_TABLE_MAIN); - if (table == NULL) - return; +static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, + struct flowi *fl, + int flags) +{ + struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; + struct rt6_info *rt; + struct fib6_node *fn; /* * Get the "current" route for this destination and @@ -1305,7 +1304,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, dest, src); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* @@ -1320,29 +1319,67 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (neigh->dev != rt->rt6i_dev) + if (fl->oif != rt->rt6i_dev->ifindex) continue; - if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) + if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; break; } - if (rt) - dst_hold(&rt->u.dst); - else if (rt6_need_strict(dest)) { - while ((fn = fn->parent) != NULL) { - if (fn->fn_flags & RTN_ROOT) - break; - if (fn->fn_flags & RTN_RTINFO) - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); if (!rt) { + if (rt6_need_strict(&fl->fl6_dst)) { + while ((fn = fn->parent) != NULL) { + if (fn->fn_flags & RTN_ROOT) + break; + if (fn->fn_flags & RTN_RTINFO) + goto restart; + } + } + rt = &ip6_null_entry; + } + dst_hold(&rt->u.dst); + + read_unlock_bh(&table->tb6_lock); + + return rt; +}; + +static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, + struct in6_addr *src, + struct in6_addr *gateway, + struct net_device *dev) +{ + struct ip6rd_flowi rdfl = { + .fl = { + .oif = dev->ifindex, + .nl_u = { + .ip6_u = { + .daddr = *dest, + .saddr = *src, + }, + }, + }, + .gateway = *gateway, + }; + int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0; + + return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); +} + +void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, + struct in6_addr *saddr, + struct neighbour *neigh, u8 *lladdr, int on_link) +{ + struct rt6_info *rt, *nrt = NULL; + struct netevent_redirect netevent; + + rt = ip6_route_redirect(dest, src, saddr, neigh->dev); + + if (rt == &ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); - return; + goto out; } /* From af184765848c280c7e6190f45c827c5ea3881126 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:18:57 -0700 Subject: [PATCH 0526/1063] [IPV6] NDISC: Initialize fl with outbound interface to lookup rules properly. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 32f28dec399e..ed01f9a330d6 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -412,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n) */ static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr) + struct in6_addr *saddr, struct in6_addr *daddr, + int oif) { memset(fl, 0, sizeof(*fl)); ipv6_addr_copy(&fl->fl6_src, saddr); @@ -420,6 +421,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; + fl->oif = oif; security_sk_classify_flow(ndisc_socket->sk, fl); } @@ -452,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -542,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -617,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, int len; int err; - ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr); + ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr, + dev->ifindex); dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); if (!dst) @@ -1383,7 +1388,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); + ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, + dev->ifindex); dst = ip6_route_output(NULL, &fl); if (dst == NULL) From cf6b1982599cbb60f410adeda659b0b29cdf7ad7 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:19:18 -0700 Subject: [PATCH 0527/1063] [IPV6] ROUTE: Introduce a helper to check route validity. Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 65514f21c186..0a18cb6b1cbb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -726,6 +726,14 @@ fail: return err; } +static inline int ip6_rt_check(struct rt6key *rt_key, + struct in6_addr *fl_addr, + struct in6_addr *addr_cache) +{ + return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); +} + static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, struct flowi *fl) @@ -741,8 +749,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * that we do not support routing by source, TOS, * and MSG_DONTROUTE --ANK (980726) * - * 1. If route was host route, check that - * cached destination is current. + * 1. ip6_rt_check(): If route was host route, + * check that cached destination is current. * If it is network route, we still may * check its validity using saved pointer * to the last used address: daddr_cache. @@ -753,11 +761,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) - || (fl->oif && fl->oif != dst->dev->ifindex)) { + if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || + (fl->oif && fl->oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } From 8e1ef0a95b87e8b4292b2ba733e8cb854ea2d2fe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 29 Aug 2006 17:15:09 -0700 Subject: [PATCH 0528/1063] [IPV6]: Cache source address as well in ipv6_pinfo{}. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/net/ip6_route.h | 9 ++++++--- net/dccp/ipv6.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 7 ++++++- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 3 +++ net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 7 ++++++- 9 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 297853c841b4..02d14a3ff2af 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -242,6 +242,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; +#ifdef CONFIG_IPV6_SUBTREES + struct in6_addr *saddr_cache; +#endif __u32 flow_label; __u32 frag_size; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 249ce4545ef0..0d40f84df21b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -144,21 +144,24 @@ extern rwlock_t rt6_lock; * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; sk_setup_caps(sk, dst); np->daddr_cache = daddr; +#ifdef CONFIG_IPV6_SUBTREES + np->saddr_cache = saddr; +#endif np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { write_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr); + __ip6_dst_store(sk, dst, daddr, saddr); write_unlock(&sk->sk_dst_lock); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 231bc7c7e749..f9c5e12d7038 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -231,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -872,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2ff600cfe3a4..57ee5ddea96f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -659,7 +659,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c73508e090a6..8561b9da6db6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -193,7 +193,12 @@ ipv4_connected: ip6_dst_store(sk, dst, ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); sk->sk_state = TCP_ESTABLISHED; out: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7a51a258615d..827f41d1478b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -186,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a18cb6b1cbb..2a376b7d91b4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -762,6 +762,9 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +#ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +#endif (fl->oif && fl->oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7f1b660493b7..2b18918f3011 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -954,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ newsk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eb9e1b39c8f8..b9cc55ccb000 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -847,7 +847,12 @@ do_append_data: if (connected) { ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); } else { dst_release(dst); } From 66729e18df08ee20a9824148236b89f56371659e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:20:34 -0700 Subject: [PATCH 0529/1063] [IPV6] ROUTE: Make sure we have fn->leaf when adding a node on subtree. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 667b1b1ea25d..11f9660a4796 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -80,6 +80,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); #endif static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); +static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); static int fib6_walk_continue(struct fib6_walker_t *w); @@ -697,7 +698,7 @@ void fib6_force_start_gc(void) int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) { - struct fib6_node *fn; + struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), @@ -706,6 +707,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (fn == NULL) goto out; + pn = fn; + #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen) { struct fib6_node *sn; @@ -751,10 +754,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) /* Now link new subtree to main tree */ sfn->parent = fn; fn->subtree = sfn; - if (fn->leaf == NULL) { - fn->leaf = rt; - atomic_inc(&rt->rt6i_ref); - } } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, @@ -764,6 +763,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) goto st_failure; } + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } fn = sn; } #endif @@ -777,8 +780,25 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } out: - if (err) + if (err) { +#ifdef CONFIG_IPV6_SUBTREES + /* + * If fib6_add_1 has cleared the old leaf pointer in the + * super-tree leaf node we have to find a new one for it. + */ + if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn->leaf = fib6_find_prefix(pn); +#if RT6_DEBUG >= 2 + if (!pn->leaf) { + BUG_TRAP(pn->leaf != NULL); + pn->leaf = &ip6_null_entry; + } +#endif + atomic_inc(&pn->leaf->rt6i_ref); + } +#endif dst_free(&rt->u.dst); + } return err; #ifdef CONFIG_IPV6_SUBTREES From 2285adc1e6c9f964f9625e7edcd233fccd7a7c92 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:20:54 -0700 Subject: [PATCH 0530/1063] [IPV6] ROUTE: Prune clones from main tree as well. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 11f9660a4796..35b91ff95db2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -776,7 +776,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (err == 0) { fib6_start_gc(rt); if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(fn, rt); + fib6_prune_clones(pn, rt); } out: From 3fc5e0440be7fab3abae4e801b0ef17e9b3b58c4 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:21:12 -0700 Subject: [PATCH 0531/1063] [IPV6] ROUTE: Fix looking up a route on subtree. Even on RTN_ROOT node, we need to process its subtree first. Fix NULL pointer dereference in fib6_locate(). Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 35b91ff95db2..5408b64f3b5f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -850,33 +850,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, break; } - while ((fn->fn_flags & RTN_ROOT) == 0) { -#ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) { - struct fib6_node *st; - struct lookup_args *narg; - - narg = args + 1; - - if (narg->addr) { - st = fib6_lookup_1(fn->subtree, narg); - - if (st && !(st->fn_flags & RTN_ROOT)) - return st; - } - } -#endif - - if (fn->fn_flags & RTN_RTINFO) { + while(fn) { + if (SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; key = (struct rt6key *) ((u8 *) fn->leaf + args->offset); - if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) - return fn; + if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { +#ifdef CONFIG_IPV6_SUBTREES + if (fn->subtree) + fn = fib6_lookup_1(fn->subtree, args + 1); +#endif + if (!fn || fn->fn_flags & RTN_RTINFO) + return fn; + } } + if (fn->fn_flags & RTN_ROOT) + break; + fn = fn->parent; } @@ -953,10 +946,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { BUG_TRAP(saddr!=NULL); - if (fn == NULL) - fn = fn->subtree; - if (fn) - fn = fib6_locate_1(fn, saddr, src_len, + if (fn && fn->subtree) + fn = fib6_locate_1(fn->subtree, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); } #endif From 825e288ef4c55a379a97e104c825eb9b74874099 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:21:29 -0700 Subject: [PATCH 0532/1063] [IPV6] ROUTE: Make sure we do not exceed args in fib6_lookup_1(). Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5408b64f3b5f..19ee7375daa9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -829,6 +829,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node *fn; int dir; + if (unlikely(args->offset == 0)) + return NULL; + /* * Descend on a tree */ @@ -879,16 +882,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr) { - struct lookup_args args[2]; struct fib6_node *fn; - - args[0].offset = offsetof(struct rt6_info, rt6i_dst); - args[0].addr = daddr; - + struct lookup_args args[] = { + { + .offset = offsetof(struct rt6_info, rt6i_dst), + .addr = daddr, + }, #ifdef CONFIG_IPV6_SUBTREES - args[1].offset = offsetof(struct rt6_info, rt6i_src); - args[1].addr = saddr; + { + .offset = offsetof(struct rt6_info, rt6i_src), + .addr = saddr, + }, #endif + { + .offset = 0, /* sentinel */ + } + }; fn = fib6_lookup_1(root, args); From fefc2a6c201aeafc1d0329a140de502d49f69d04 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:21:50 -0700 Subject: [PATCH 0533/1063] [IPV6] ROUTE: Allow searching subtree only. Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 19ee7375daa9..b706424e70b8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -899,7 +899,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, } }; - fn = fib6_lookup_1(root, args); + fn = fib6_lookup_1(root, daddr ? args : args + 1); if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) fn = root; From 7fc33165a74301b2c5c90b2f2a1f6907cbd5c6f1 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:22:24 -0700 Subject: [PATCH 0534/1063] [IPV6] ROUTE: Put SUBTREE() as FIB6_SUBTREE() into ip6_fib.h for future use. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ net/ipv6/ip6_fib.c | 20 +++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9610b887ffb5..6a3f26a04509 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -60,6 +60,11 @@ struct fib6_node __u32 fn_sernum; }; +#ifndef CONFIG_IPV6_SUBTREES +#define FIB6_SUBTREE(fn) NULL +#else +#define FIB6_SUBTREE(fn) ((fn)->subtree) +#endif /* * routing information diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b706424e70b8..6536e33d8353 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -73,10 +73,8 @@ static DEFINE_RWLOCK(fib6_walker_lock); #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S -#define SUBTREE(fn) ((fn)->subtree) #else #define FWS_INIT FWS_L -#define SUBTREE(fn) NULL #endif static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); @@ -854,7 +852,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, } while(fn) { - if (SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { + if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; key = (struct rt6key *) ((u8 *) fn->leaf + @@ -985,7 +983,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) if(fn->right) return fn->right->leaf; - fn = SUBTREE(fn); + fn = FIB6_SUBTREE(fn); } return NULL; } @@ -1016,7 +1014,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) if (fn->right) child = fn->right, children |= 1; if (fn->left) child = fn->left, children |= 2; - if (children == 3 || SUBTREE(fn) + if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ || (children && fn->fn_flags&RTN_ROOT) @@ -1035,9 +1033,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); - SUBTREE(pn) = NULL; + FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); @@ -1085,7 +1083,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) + if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -1228,8 +1226,8 @@ static int fib6_walk_continue(struct fib6_walker_t *w) switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: - if (SUBTREE(fn)) { - w->node = SUBTREE(fn); + if (FIB6_SUBTREE(fn)) { + w->node = FIB6_SUBTREE(fn); continue; } w->state = FWS_L; @@ -1263,7 +1261,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) pn = fn->parent; w->node = pn; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); w->state = FWS_L; continue; From 982f56f3a9be4651520c0fdd3d80a5d02e95a178 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:22:39 -0700 Subject: [PATCH 0535/1063] [IPV6] ROUTE: Search subtree when backtracking. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- net/ipv6/route.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d00a9d77f01..bd4cf175ff10 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -481,17 +481,23 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK() \ -if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \ - while ((fn = fn->parent) != NULL) { \ - if (fn->fn_flags & RTN_TL_ROOT) { \ - dst_hold(&rt->u.dst); \ - goto out; \ +#define BACKTRACK(saddr) \ +do { \ + if (rt == &ip6_null_entry) { \ + struct fib6_node *pn; \ + while (fn) { \ + if (fn->fn_flags & RTN_TL_ROOT) \ + goto out; \ + pn = fn->parent; \ + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ + fn = fib6_lookup(pn->subtree, NULL, saddr); \ + else \ + fn = pn; \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ } \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ } \ -} +} while(0) static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, struct flowi *fl, int flags) @@ -504,7 +510,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, restart: rt = fn->leaf; rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); - BACKTRACK(); + BACKTRACK(&fl->fl6_src); dst_hold(&rt->u.dst); out: read_unlock_bh(&table->tb6_lock); @@ -638,7 +644,7 @@ restart_2: restart: rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); - BACKTRACK(); + BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -733,7 +739,7 @@ restart_2: restart: rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); - BACKTRACK(); + BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; From 150730d5a53b1bbb486101b2a5fb82ff0d3f916e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:22:55 -0700 Subject: [PATCH 0536/1063] [IPV6] ROUTE: Purge clones on other trees when deleting a route. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala --- net/ipv6/ip6_fib.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6536e33d8353..f0fdaf182b3f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1169,8 +1169,18 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) BUG_TRAP(fn->fn_flags&RTN_RTINFO); - if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(fn, rt); + if (!(rt->rt6i_flags&RTF_CACHE)) { + struct fib6_node *pn = fn; +#ifdef CONFIG_IPV6_SUBTREES + /* clones of this route might be in another subtree */ + if (rt->rt6i_src.plen) { + while (!(pn->fn_flags&RTN_ROOT)) + pn = pn->parent; + pn = pn->parent; + } +#endif + fib6_prune_clones(pn, rt); + } /* * Walk the leaf entries looking for ourself From cb15d9c224fcc03b32396c1c7416e777c2dcca34 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:23:11 -0700 Subject: [PATCH 0537/1063] [IPV6] NDISC: Search subtrees when backtracking on receipt of redirects. Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala --- net/ipv6/route.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bd4cf175ff10..fd626d420cd1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1332,17 +1332,10 @@ restart: break; } - if (!rt) { - if (rt6_need_strict(&fl->fl6_dst)) { - while ((fn = fn->parent) != NULL) { - if (fn->fn_flags & RTN_ROOT) - break; - if (fn->fn_flags & RTN_RTINFO) - goto restart; - } - } + if (!rt) rt = &ip6_null_entry; - } + BACKTRACK(&fl->fl6_src); +out: dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); From c0bece9f2aec546c3750ae3972f80e024a923f34 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:23:25 -0700 Subject: [PATCH 0538/1063] [IPV6] ROUTE: Add credits about subtree fixes. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 1 + net/ipv6/route.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f0fdaf182b3f..fbca60950b14 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -18,6 +18,7 @@ * Yuji SEKIYA @USAGI: Support default route on router node; * remove ip6_null_entry from the top of * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ #include #include diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fd626d420cd1..fd6f2ec4fa09 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -22,6 +22,8 @@ * routers in REACHABLE, STALE, DELAY or PROBE states). * - always select the same router if it is (probably) * reachable. otherwise, round-robin the list. + * Ville Nuorvala + * Fixed routing subtrees. */ #include From 4e96c2b4180aff4f080b77314712073c6ca430e7 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:23:39 -0700 Subject: [PATCH 0539/1063] [IPV6] KCONFIG: Add subtrees support. This is for developers only. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala --- net/ipv6/Kconfig | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 36a6c2b79889..14f0b336519f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -136,6 +136,20 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_SUBTREES + bool "IPv6: source address based routing" + depends on IPV6 && EXPERIMENTAL + ---help--- + Enable routing by source address or prefix. + + The destination address is still the primary routing key, so mixing + normal and source prefix specific routes in the same routing table + may sometimes lead to unintended routing behavior. This can be + avoided by defining different routing tables for the normal and + source prefix specific routes. + + If unsure, say N. + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on IPV6 && EXPERIMENTAL From 77d16f450ae0452d7d4b009f78debb1294fb435c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:25:05 -0700 Subject: [PATCH 0540/1063] [IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx flags Unify RT6_F_xxx and RT6_SELECT_F_xxx flags into RT6_LOOKUP_F_xxx flags, and put them into ip6_route.h Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala --- include/net/ip6_fib.h | 3 --- include/net/ip6_route.h | 4 ++++ net/ipv6/fib6_rules.c | 2 +- net/ipv6/route.c | 32 ++++++++++++-------------------- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6a3f26a04509..e4438de3bd6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -173,9 +173,6 @@ struct fib6_table { #define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif -#define RT6_F_STRICT 1 -#define RT6_F_HAS_SADDR 2 - typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *, struct flowi *, int); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0d40f84df21b..297909570041 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,10 @@ struct route_info { #include #include +#define RT6_LOOKUP_F_IFACE 0x1 +#define RT6_LOOKUP_F_REACHABLE 0x2 +#define RT6_LOOKUP_F_HAS_SADDR 0x4 + struct pol_chain { int type; int priority; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 2c4fbc855e6c..7b4908cc52b3 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -117,7 +117,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) return 0; - if ((flags & RT6_F_HAS_SADDR) && + if ((flags & RT6_LOOKUP_F_HAS_SADDR) && !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fd6f2ec4fa09..20691285aee5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -76,9 +76,6 @@ #define CLONE_OFFLINK_ROUTE 0 -#define RT6_SELECT_F_IFACE 0x1 -#define RT6_SELECT_F_REACHABLE 0x2 - static int ip6_rt_max_size = 4096; static int ip6_rt_gc_min_interval = HZ / 2; static int ip6_rt_gc_timeout = 60*HZ; @@ -340,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int m, n; m = rt6_check_dev(rt, oif); - if (!m && (strict & RT6_SELECT_F_IFACE)) + if (!m && (strict & RT6_LOOKUP_F_IFACE)) return -1; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; @@ -348,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, n = rt6_check_neigh(rt); if (n > 1) m |= 16; - else if (!n && strict & RT6_SELECT_F_REACHABLE) + else if (!n && strict & RT6_LOOKUP_F_REACHABLE) return -1; return m; } @@ -388,7 +385,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, } if (!match && - (strict & RT6_SELECT_F_REACHABLE) && + (strict & RT6_LOOKUP_F_REACHABLE) && last && last != rt0) { /* no entries matched; do round-robin */ static DEFINE_SPINLOCK(lock); @@ -511,7 +508,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); + rt = rt6_device_match(rt, fl->oif, flags); BACKTRACK(&fl->fl6_src); dst_hold(&rt->u.dst); out: @@ -537,7 +534,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, }, }; struct dst_entry *dst; - int flags = strict ? RT6_F_STRICT : 0; + int flags = strict ? RT6_LOOKUP_F_IFACE : 0; dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); if (dst->error == 0) @@ -633,10 +630,9 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - if (flags & RT6_F_STRICT) - strict = RT6_SELECT_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: read_lock_bh(&table->tb6_lock); @@ -712,10 +708,7 @@ void ip6_route_input(struct sk_buff *skb) }, .proto = iph->nexthdr, }; - int flags = 0; - - if (rt6_need_strict(&iph->daddr)) - flags |= RT6_F_STRICT; + int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0; skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); } @@ -728,10 +721,9 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - if (flags & RT6_F_STRICT) - strict = RT6_SELECT_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: read_lock_bh(&table->tb6_lock); @@ -797,7 +789,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) int flags = 0; if (rt6_need_strict(&fl->fl6_dst)) - flags |= RT6_F_STRICT; + flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(fl, flags, ip6_pol_route_output); } @@ -1362,7 +1354,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, }, .gateway = *gateway, }; - int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0; + int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0; return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); } From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:05:15 -0700 Subject: [PATCH 0541/1063] [XFRM]: Add XFRM_MODE_xxx for future use. Transformation mode is used as either IPsec transport or tunnel. It is required to add two more items, route optimization and inbound trigger for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 6 ++++-- include/net/xfrm.h | 2 +- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 6 +++--- net/ipv4/ipcomp.c | 8 ++++---- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_output.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv4/xfrm4_state.c | 2 +- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 4 ++-- net/ipv6/ipcomp6.c | 6 +++--- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_output.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- net/ipv6/xfrm6_state.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_policy.c | 11 ++++++----- net/xfrm/xfrm_user.c | 4 ++-- 21 files changed, 42 insertions(+), 39 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 46a15c7a1a13..5154064b6d95 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -120,7 +120,9 @@ enum #define XFRM_MODE_TRANSPORT 0 #define XFRM_MODE_TUNNEL 1 -#define XFRM_MODE_MAX 2 +#define XFRM_MODE_ROUTEOPTIMIZATION 2 +#define XFRM_MODE_IN_TRIGGER 3 +#define XFRM_MODE_MAX 4 /* Netlink configuration messages. */ enum { @@ -247,7 +249,7 @@ struct xfrm_usersa_info { __u32 seq; __u32 reqid; __u16 family; - __u8 mode; /* 0=transport,1=tunnel */ + __u8 mode; /* XFRM_MODE_xxx */ __u8 replay_window; __u8 flags; #define XFRM_STATE_NOECN 1 diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 00bf86e6e82b..762795624b10 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,7 +298,7 @@ struct xfrm_tmpl __u32 reqid; -/* Mode: transport/tunnel */ +/* Mode: transport, tunnel etc. */ __u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 008e69d2e423..99542977e47e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b428489f6ccd..e87377e1d6b6 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -248,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (!x->props.mode) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -267,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -383,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 5bb9c9f03fb6..17342430a843 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) ip_send_check(iph); return 0; } @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a6..040e8475f295 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 4a96a9e3ef3b..5fd115f0c547 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -54,7 +54,7 @@ static int xfrm4_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8f50eae47d03..a5bed741de2c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -96,7 +96,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966e..97b0c7589711 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -42,7 +42,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode && x->props.saddr.a4 == 0) { + if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) { struct rtable *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c83..f110af5b1319 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 00ffa7bc6c9f..60954fc7eb36 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -398,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2ebfd281e721..2b8e52e1d0ab 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -237,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -358,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a81e9e9d93bd..19eba8d9f851 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); mutex_lock(&ipcomp6_resource_mutex); @@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0405d74ff910..ee2f6b3908b6 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { /* XXX */ + if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6d111743e508..26f18869f77b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -47,7 +47,7 @@ static int xfrm6_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -80,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 73cd250aecbb..81355bb50328 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -114,7 +114,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = (struct in6_addr*)&xfrm[i]->id.daddr; local = (struct in6_addr*)&xfrm[i]->props.saddr; tunnel = 1; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b33296b3f6de..a1a1f5476442 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -42,7 +42,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { + if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { struct rt6_info *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c8f9369c2a87..59685ee8f700 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int xfrm6_tunnel_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/key/af_key.c b/net/key/af_key.c index 797c744a8438..19e047b0e678 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1765,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) } /* addresses present only in tunnel mode */ - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -1997,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i int req_size; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode) + if (t->mode == XFRM_MODE_TUNNEL) req_size += 2*socklen; else size -= 2*socklen; @@ -2013,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 32c963c90573..a0d58971391d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -779,7 +779,7 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; } @@ -1005,7 +1005,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && (tmpl->aalgos & (1<props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } static inline int @@ -1015,14 +1016,14 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) break; } return start; @@ -1047,7 +1048,7 @@ EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) return 1; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f70e158874d2..0d580ac19771 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -174,8 +174,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->mode) { - case 0: - case 1: + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_TUNNEL: break; default: From 5794708f11551b6d19b10673abf4b0202f66b44d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:06:24 -0700 Subject: [PATCH 0542/1063] [XFRM]: Introduce a helper to compare id protocol. Put the helper to header for future use. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/xfrm/xfrm_state.c | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 762795624b10..5b364b0a6a28 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -835,6 +836,11 @@ static inline int xfrm_state_kern(struct xfrm_state *x) return atomic_read(&x->tunnel_users); } +static inline int xfrm_id_proto_match(u8 proto, u8 userproto) +{ + return (userproto == IPSEC_PROTO_ANY || proto == userproto); +} + /* * xfrm algorithm information */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1c796087ee78..34c038cbdf46 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -294,7 +294,7 @@ void xfrm_state_flush(u8 proto) restart: list_for_each_entry(x, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -772,7 +772,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + if (xfrm_id_proto_match(x->id.proto, proto)) count++; } } @@ -783,7 +783,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); if (err) From dc00a525603650a1471c823a1e48c6505c2f9765 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:49:52 -0700 Subject: [PATCH 0543/1063] [XFRM] STATE: Allow non IPsec protocol. It will be added two more transformation protocols (routing header and destination options header) for Mobile IPv6. xfrm_id_proto_match() can be handle zero as all, IPSEC_PROTO_ANY as all IPsec and otherwise as exact one. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 ++++- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5b364b0a6a28..2a7d2132a1ae 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -838,7 +838,10 @@ static inline int xfrm_state_kern(struct xfrm_state *x) static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { - return (userproto == IPSEC_PROTO_ANY || proto == userproto); + return (!userproto || proto == userproto || + (userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH || + proto == IPPROTO_ESP || + proto == IPPROTO_COMP))); } /* diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0d580ac19771..41f3d51ffc33 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -542,7 +542,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); + (void) xfrm_state_walk(0, dump_one_state, &info); cb->args[0] = info.this_idx; return skb->len; From 622dc8281a80374873686514e46f852093d91106 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:52:01 -0700 Subject: [PATCH 0544/1063] [XFRM]: Expand XFRM_MAX_DEPTH for route optimization. XFRM_MAX_DEPTH is a limit of transformation states to be applied to the same flow. Two more extension headers are used by Mobile IPv6 transformation. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2a7d2132a1ae..aa3be68041be 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -314,7 +314,7 @@ struct xfrm_tmpl __u32 calgos; }; -#define XFRM_MAX_DEPTH 4 +#define XFRM_MAX_DEPTH 6 struct xfrm_policy { From 6c44e6b7ab500d7e3e3f406c83325671be51a752 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:53:57 -0700 Subject: [PATCH 0545/1063] [XFRM] STATE: Add source address list. Support source address based searching. Mobile IPv6 will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 26 ++++++++++++++++++++++++++ net/ipv4/xfrm4_state.c | 3 +++ net/ipv6/xfrm6_state.c | 3 +++ net/xfrm/xfrm_state.c | 21 +++++++++++++++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa3be68041be..88145e3348d0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -95,6 +95,7 @@ struct xfrm_state { /* Note: bydst is re-used during gc */ struct list_head bydst; + struct list_head bysrc; struct list_head byspi; atomic_t refcnt; @@ -236,6 +237,7 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; struct list_head *state_bydst; + struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, @@ -420,6 +422,30 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 97b0c7589711..c56b258fad73 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -122,6 +122,9 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); + h = __xfrm4_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a1a1f5476442..2fb07850449f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -126,6 +126,9 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); + h = __xfrm6_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34c038cbdf46..2a9992894e69 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -45,6 +45,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Also, it can be used by ah/esp icmp error handler to find offending SA. */ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; DECLARE_WAIT_QUEUE_HEAD(km_waitq); @@ -200,6 +201,7 @@ struct xfrm_state *xfrm_state_alloc(void) atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->bysrc); INIT_LIST_HEAD(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; @@ -240,6 +242,8 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&xfrm_state_lock); list_del(&x->bydst); __xfrm_state_put(x); + list_del(&x->bysrc); + __xfrm_state_put(x); if (x->id.spi) { list_del(&x->byspi); __xfrm_state_put(x); @@ -415,6 +419,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); list_add(&x->byspi, xfrm_state_byspi+h); @@ -448,11 +454,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + list_add(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); + if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + } + if (!mod_timer(&x->timer, jiffies + HZ)) xfrm_state_hold(x); @@ -1075,6 +1089,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) err = -ENOBUFS; else { afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; } @@ -1097,6 +1112,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) else { xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; + afinfo->state_bysrc = NULL; afinfo->state_bydst = NULL; } } @@ -1218,6 +1234,7 @@ void __init xfrm_state_init(void) for (i=0; i Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [PATCH 0546/1063] [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_state.c | 9 +++++++ net/ipv6/xfrm6_state.c | 21 +++++++++++++++ net/xfrm/xfrm_state.c | 37 +++++++++++++++++++++++--- net/xfrm/xfrm_user.c | 59 +++++++++++++++++++++++++++++++++++++----- 6 files changed, 119 insertions(+), 10 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 5154064b6d95..66343d3d4b91 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -234,6 +234,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, + XFRMA_SRCADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 88145e3348d0..d9c40e713184 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -244,6 +244,7 @@ struct xfrm_state_afinfo { struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); + struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); @@ -937,6 +938,7 @@ extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index c56b258fad73..616be131b4e3 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -80,6 +80,14 @@ __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } +/* placeholder until ipv4's code is written */ +static struct xfrm_state * +__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + return NULL; +} + static struct xfrm_state * __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, @@ -137,6 +145,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, + .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, .find_acq = __xfrm4_find_acq, }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 2fb07850449f..9c95b9d3e110 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,6 +63,26 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +static struct xfrm_state * +__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + struct xfrm_state *x = NULL; + unsigned h; + + h = __xfrm6_src_hash(saddr); + list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { + if (x->props.family == AF_INET6 && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; + } + } + return NULL; +} + static struct xfrm_state * __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) { @@ -140,6 +160,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, + .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .find_acq = __xfrm6_find_acq, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2a9992894e69..11f480b12952 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, + int use_spi) +{ + if (use_spi) + return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + else + return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) @@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; afinfo = xfrm_state_get_afinfo(family); @@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x) spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) + if (use_spi && !x1) x1 = afinfo->find_acq( x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); err = -ESRCH; if (!x1) @@ -674,6 +686,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, } EXPORT_SYMBOL(xfrm_state_lookup); +struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 41f3d51ffc33..b5f8ab71aa54 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -87,6 +87,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } +static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct rtattr *rt = xfrma[type - 1]; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) + return -EINVAL; + + if (addrp) + *addrp = RTA_DATA(rt); + + return 0; +} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -418,16 +434,48 @@ out: return err; } +static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = NULL; + int err; + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); + if (err) + goto out; + + if (!saddr) { + err = -EINVAL; + goto out; + } + + x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, + p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err; + int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) - return -ESRCH; + return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -578,10 +626,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) goto out_noput; From aee5adb4307c4c63a4dc5f3b49984d76f8a71b5b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:57:28 -0700 Subject: [PATCH 0547/1063] [XFRM] STATE: Add a hook to find offset to be inserted header in outbound. On current kernel, ip6_find_1stfragopt() is used by IPv6 IPsec to find offset to be inserted header in outbound for transport mode. (BTW, no usage may be needed for IPv4 case.) Mobile IPv6 requires another logic for routing header and destination options header respectively. This patch is common platform for the offset and adopts it to IPsec. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv6/ah6.c | 3 ++- net/ipv6/esp6.c | 3 ++- net/ipv6/ipcomp6.c | 1 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_mode_transport.c | 2 +- net/ipv6/xfrm6_output.c | 6 ++++++ 7 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9c40e713184..eed48f832ce1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -265,6 +265,7 @@ struct xfrm_type void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); + int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; @@ -960,6 +961,8 @@ extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr); extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, + u8 **prevhdr); #ifdef CONFIG_XFRM extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 60954fc7eb36..6c0aa51319a5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -435,7 +435,8 @@ static struct xfrm_type ah6_type = .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, - .output = ah6_output + .output = ah6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ah6_protocol = { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2b8e52e1d0ab..ae50b9511151 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -379,7 +379,8 @@ static struct xfrm_type esp6_type = .destructor = esp6_destroy, .get_max_size = esp6_get_max_size, .input = esp6_input, - .output = esp6_output + .output = esp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol esp6_protocol = { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 19eba8d9f851..ad9c6e824e62 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -461,6 +461,7 @@ static struct xfrm_type ipcomp6_type = .destructor = ipcomp6_destroy, .input = ipcomp6_input, .output = ipcomp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ipcomp6_protocol = diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index dd4d1ce77769..e1a741612888 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 711d713e36d8..a5dce216024d 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -35,7 +35,7 @@ static int xfrm6_transport_output(struct sk_buff *skb) skb_push(skb, x->props.header_len); iph = skb->nh.ipv6h; - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); skb->nh.raw = prevhdr - x->props.header_len; skb->h.raw = skb->data + hdr_len; memmove(skb->data, iph, hdr_len); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 26f18869f77b..b4628fbf8ff5 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,12 @@ #include #include +int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, + u8 **prevhdr) +{ + return ip6_find_1stfragopt(skb, prevhdr); +} + static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; From 1d71627d699eca831c1fbfb66ea67bb1fba41415 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:59:44 -0700 Subject: [PATCH 0548/1063] [XFRM] STATE: Introduce route optimization mode. Route optimization is used with routing header and destination options header for Mobile IPv6. At outbound it makes header space like IPsec transport. At inbound it does nothing because exhdrs.c functions have responsibility to update skbuff information for these headers. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 7 +++ net/ipv6/Makefile | 1 + net/ipv6/xfrm6_mode_ro.c | 94 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 net/ipv6/xfrm6_mode_ro.c diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 14f0b336519f..1188d9560242 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -127,6 +127,13 @@ config INET6_XFRM_MODE_TUNNEL If unsure, say Y. +config INET6_XFRM_MODE_ROUTEOPTIMIZATION + tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + select XFRM + ---help--- + Support for MIPv6 route optimization mode. + config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9eebf6091279..87e912e31922 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o +obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c new file mode 100644 index 000000000000..c11c335312f9 --- /dev/null +++ b/net/ipv6/xfrm6_mode_ro.c @@ -0,0 +1,94 @@ +/* + * xfrm6_mode_ro.c - Route optimization mode for IPv6. + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Add route optimization header space. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the route optimization header. + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_ro_output(struct sk_buff *skb) +{ + struct xfrm_state *x = skb->dst->xfrm; + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + skb->nh.raw = prevhdr - x->props.header_len; + skb->h.raw = skb->data + hdr_len; + memmove(skb->data, iph, hdr_len); + return 0; +} + +/* + * Do nothing about routing optimization header unlike IPsec. + */ +static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) +{ + return 0; +} + +static struct xfrm_mode xfrm6_ro_mode = { + .input = xfrm6_ro_input, + .output = xfrm6_ro_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_ROUTEOPTIMIZATION, +}; + +static int __init xfrm6_ro_init(void) +{ + return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); +} + +static void __exit xfrm6_ro_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_ro_init); +module_exit(xfrm6_ro_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); From f3bd484021d9486b826b422a017d75dd0bd258ad Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:00:48 -0700 Subject: [PATCH 0549/1063] [XFRM]: Restrict authentication algorithm only when inbound transformation protocol is IPsec. For Mobile IPv6 usage, routing header or destination options header is used and it doesn't require this comparison. It is checked only for IPsec template. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a0d58971391d..f1cdcfb90959 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1004,7 +1004,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - (tmpl->aalgos & (1<props.aalgo)) && + ((tmpl->aalgos & (1<props.aalgo)) || + !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && xfrm_state_addr_cmp(tmpl, x, family)); } From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [PATCH 0550/1063] [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_input.c | 108 +++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_state.c | 1 + 5 files changed, 113 insertions(+) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 66343d3d4b91..a7c9e4cfb15b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -256,6 +256,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 +#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eed48f832ce1..0d735a5aba61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -955,6 +955,8 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); +extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index e1a741612888..7b7b90d9c3d0 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_input_addr); EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index ee2f6b3908b6..a40a05789013 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } + +int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto) +{ + struct xfrm_state *x = NULL; + int wildcard = 0; + struct in6_addr any; + xfrm_address_t *xany; + struct xfrm_state *xfrm_vec_one = NULL; + int nh = 0; + int i = 0; + + ipv6_addr_set(&any, 0, 0, 0, 0); + xany = (xfrm_address_t *)&any; + + for (i = 0; i < 3; i++) { + xfrm_address_t *dst, *src; + switch (i) { + case 0: + dst = daddr; + src = saddr; + break; + case 1: + /* lookup state with wild-card source address */ + wildcard = 1; + dst = daddr; + src = xany; + break; + case 2: + default: + /* lookup state with wild-card addresses */ + wildcard = 1; /* XXX */ + dst = xany; + src = xany; + break; + } + + x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + if (!x) + continue; + + spin_lock(&x->lock); + + if (wildcard) { + if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + if (xfrm_state_check_expire(x)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + nh = x->type->input(x, skb); + if (nh <= 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_vec_one = x; + break; + } + + if (!xfrm_vec_one) + goto drop; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + sp = secpath_dup(skb->sp); + if (!sp) + goto drop; + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len > XFRM_MAX_DEPTH) + goto drop; + + skb->sp->xvec[skb->sp->len] = xfrm_vec_one; + skb->sp->len ++; + + return 1; +drop: + if (xfrm_vec_one) + xfrm_state_put(xfrm_vec_one); + return -1; +} diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 11f480b12952..f05371556cce 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && From 9e51fd371a022318c5b64b831c43026e89bc4f75 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:09:09 -0700 Subject: [PATCH 0551/1063] [XFRM]: Rename secpath_has_tunnel to secpath_has_nontransport. On current kernel inbound transformation state is allowed transport and disallowed tunnel mode when mismatch is occurred between tempates and states. As the result of adding two more modes by Mobile IPv6, this function name is misleading. Inbound transformation can allow only transport mode when mismatch is occurred between template and secpath. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1cdcfb90959..56abb5c057d4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1046,7 +1046,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_tunnel(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(struct sec_path *sp, int k) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) @@ -1087,7 +1087,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, xfrm_policy_lookup); if (!pol) - return !skb->sp || !secpath_has_tunnel(skb->sp, 0); + return !skb->sp || !secpath_has_nontransport(skb->sp, 0); pol->curlft.use_time = (unsigned long)xtime.tv_sec; @@ -1111,7 +1111,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, goto reject; } - if (secpath_has_tunnel(sp, k)) + if (secpath_has_nontransport(sp, k)) goto reject; xfrm_pol_put(pol); From 99505a843673faeae962a8cde128c7c034ba6b5e Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:10:33 -0700 Subject: [PATCH 0552/1063] [XFRM] STATE: Add a hook to obtain local/remote outbound address. Outbound transformation replaces both source and destination address with state's end-point addresses at the same time when IPsec tunnel mode. It is also required to change them for Mobile IPv6 route optimization, but we should care about the following differences: - changing result is not end-point but care-of address - either source or destination is replaced for each state This hook is a common platform to change outbound address. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 ++ net/ipv6/xfrm6_policy.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0d735a5aba61..aa3ac994477b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -266,6 +266,8 @@ struct xfrm_type int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); + xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); + xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 81355bb50328..9328fc88708a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -59,6 +59,22 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } +static inline struct in6_addr* +__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->remote_addr) ? + (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->id.daddr; +} + +static inline struct in6_addr* +__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->local_addr) ? + (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->props.saddr; +} + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -115,8 +131,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = (struct in6_addr*)&xfrm[i]->id.daddr; - local = (struct in6_addr*)&xfrm[i]->props.saddr; + remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); + local = __xfrm6_bundle_addr_local(xfrm[i], local); tunnel = 1; } header_len += xfrm[i]->props.header_len; From 1b5c229987dc4d0c92a38fac0cde2aeec08cd775 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:11:50 -0700 Subject: [PATCH 0553/1063] [XFRM] STATE: Support non-fragment outbound transformation headers. For originated outbound IPv6 packets which will fragment, ip6_append_data() should know length of extension headers before sending them and the length is carried by dst_entry. IPv6 IPsec headers fragment then transformation was designed to place all headers after fragment header. OTOH Mobile IPv6 extension headers do not fragment then it is a good idea to make dst_entry have non-fragment length to tell it to ip6_append_data(). Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/dst.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 24 ++++++++++++++++++++++-- 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 36d54fc248b0..a8d825f90305 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -54,6 +54,7 @@ struct dst_entry unsigned long expires; unsigned short header_len; /* more space at head required */ + unsigned short nfheader_len; /* more non-fragment space at head required */ unsigned short trailer_len; /* space to reserve at tail */ u32 metrics[RTAX_MAX]; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa3ac994477b..aa93cc1f6299 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -260,6 +260,8 @@ struct xfrm_type char *description; struct module *owner; __u8 proto; + __u8 flags; +#define XFRM_TYPE_NON_FRAGMENT 1 int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a5bed741de2c..e517981ceadd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -135,6 +135,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2a376b7d91b4..258e3e45f5e0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -971,7 +971,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 9328fc88708a..a3f68c8b737e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -75,6 +75,24 @@ __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) (struct in6_addr*)&x->props.saddr; } +static inline void +__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen += x->props.header_len; + else + *len += x->props.header_len; +} + +static inline void +__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen -= x->props.header_len; + else + *len -= x->props.header_len; +} + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -99,6 +117,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int int i; int err = 0; int header_len = 0; + int nfheader_len = 0; int trailer_len = 0; dst = dst_prev = NULL; @@ -135,7 +154,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int local = __xfrm6_bundle_addr_local(xfrm[i], local); tunnel = 1; } - header_len += xfrm[i]->props.header_len; + __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; if (tunnel) { @@ -170,6 +189,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = nfheader_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -188,7 +208,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt6.rt6i_src = rt0->rt6i_src; x->u.rt6.rt6i_idev = rt0->rt6i_idev; in6_dev_hold(rt0->rt6i_idev); - header_len -= x->u.dst.xfrm->props.header_len; + __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); trailer_len -= x->u.dst.xfrm->props.trailer_len; } From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [PATCH 0554/1063] [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/xfrm/xfrm_state.c | 6 ++++++ net/xfrm/xfrm_user.c | 28 +++++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index a7c9e4cfb15b..b53f799189af 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -235,6 +235,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ + XFRMA_COADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa93cc1f6299..872a2a4022b2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -134,6 +134,9 @@ struct xfrm_state /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + /* Data for care-of address */ + xfrm_address_t *coaddr; + /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f05371556cce..3da89c01ea71 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -603,6 +604,11 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5f8ab71aa54..939808de9e20 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -187,11 +187,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; + if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + goto out; err = -EINVAL; switch (p->mode) { case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: break; default: @@ -276,6 +279,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } +static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + xfrm_address_t *p, *uaddrp; + + if (!rta) + return 0; + + uaddrp = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uaddrp, sizeof(*p)); + *addrpp = p; + return 0; +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -365,7 +386,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - + if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + goto error; err = xfrm_init_state(x); if (err) goto error; @@ -569,6 +591,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } + + if (x->coaddr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; From 9afaca057980c02771f4657c455cc7592fcd7373 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:20:16 -0700 Subject: [PATCH 0555/1063] [XFRM] IPV6: Update outbound state timestamp for each sending. With this patch transformation state is updated last used time for each sending. Xtime is used for it like other state lifetime expiration. Mobile IPv6 enabled nodes will want to know traffic status of each binding (e.g. judgement to request binding refresh by correspondent node, or to keep home/care-of nonce alive by mobile node). The last used timestamp is an important hint about it. Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/ipv6/xfrm6_output.c | 2 ++ net/xfrm/xfrm_user.c | 3 +++ 4 files changed, 9 insertions(+) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b53f799189af..1d8c1f22c12d 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -236,6 +236,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ + XFRMA_LASTUSED, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 872a2a4022b2..248874ecf8df 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -167,6 +167,9 @@ struct xfrm_state struct xfrm_lifetime_cur curlft; struct timer_list timer; + /* Last used time */ + u64 lastused; + /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b4628fbf8ff5..db58104e710b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -75,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; + if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) + x->lastused = (u64)xtime.tv_sec; spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 939808de9e20..f643063a1cbd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -595,6 +595,9 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) if (x->coaddr) RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (x->lastused) + RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; From e53820de0f81da1429048634cadc6ef5f50c2f8b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:12:01 -0700 Subject: [PATCH 0556/1063] [XFRM] IPV6: Restrict bundle reusing For outbound transformation, bundle is checked whether it is suitable for current flow to be reused or not. In such IPv6 case as below, transformation may apply incorrect bundle for the flow instead of creating another bundle: - The policy selector has destination prefix length < 128 (Two or more addresses can be matched it) - Its bundle holds dst entry of default route whose prefix length < 128 (Previous traffic was used such route as next hop) - The policy and the bundle were used a transport mode state and this time flow address is not matched the bundled state. This issue is found by Mobile IPv6 usage to protect mobility signaling by IPsec, but it is not a Mobile IPv6 specific. This patch adds strict check to xfrm_bundle_ok() for each state mode and address when prefix length is less than 128. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 19 ++++++++++++++++++- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 4 +++- net/xfrm/xfrm_policy.c | 8 ++++++-- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 248874ecf8df..7f1630630dcf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -869,6 +869,23 @@ xfrm_state_addr_check(struct xfrm_state *x, return 0; } +static __inline__ int +xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl, + unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_state_addr_check(x, + (xfrm_address_t *)&fl->fl4_dst, + (xfrm_address_t *)&fl->fl4_src); + case AF_INET6: + return __xfrm6_state_addr_check(x, + (xfrm_address_t *)&fl->fl6_dst, + (xfrm_address_t *)&fl->fl6_src); + } + return 0; +} + static inline int xfrm_state_kern(struct xfrm_state *x) { return atomic_read(&x->tunnel_users); @@ -1014,7 +1031,7 @@ extern void xfrm_policy_flush(void); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); extern void xfrm_flush_all_bundles(void); -extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family); +extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e517981ceadd..42d8ded0f96a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -33,7 +33,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET)) { + xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index a3f68c8b737e..729b4748d6d3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -50,7 +50,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(xdst, fl, AF_INET6)) { + xfrm_bundle_ok(xdst, fl, AF_INET6, + (xdst->u.rt6.rt6i_dst.plen != 128 || + xdst->u.rt6.rt6i_src.plen != 128))) { dst_clone(dst); break; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 56abb5c057d4..ad2a5cba1f5b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1167,7 +1167,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1304,6 +1304,10 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) + return 0; + mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { last = xdst; From 654b32c6aad19d2fd363813cd8a1a1e64daf611b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:12:56 -0700 Subject: [PATCH 0557/1063] [XFRM]: Fix message about transformation user interface. Transformation user interface is not only for IPsec. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/Kconfig | 6 +++--- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0c1c04322baf..43228f7fd3a0 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -6,11 +6,11 @@ config XFRM depends on NET config XFRM_USER - tristate "IPsec user configuration interface" + tristate "Transformation user configuration interface" depends on INET && XFRM ---help--- - Support for IPsec user configuration interface used - by native Linux tools. + Support for Transformation(XFRM) user configuration interface + like IPsec used by native Linux tools. If unsure, say Y. diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f643063a1cbd..3a83c5987c26 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2054,7 +2054,7 @@ static int __init xfrm_user_init(void) { struct sock *nlsk; - printk(KERN_INFO "Initializing IPsec netlink socket\n"); + printk(KERN_INFO "Initializing XFRM netlink socket\n"); nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, THIS_MODULE); From ee53826801a8fa7a0e333895421ef6d0e5fbfbf0 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:13:46 -0700 Subject: [PATCH 0558/1063] [IPV6]: Add Kconfig to enable Mobile IPv6. Add Kconfig to enable Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 1188d9560242..21e0cc808f44 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -98,6 +98,15 @@ config INET6_IPCOMP If unsure, say Y. +config IPV6_MIP6 + bool "IPv6: Mobility (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + select XFRM + ---help--- + Support for IPv6 Mobility described in RFC 3775. + + If unsure, say N. + config INET6_XFRM_TUNNEL tristate select INET6_TUNNEL From 642ec62eee5bdc158e01029220c8a23c685778fb Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:15:07 -0700 Subject: [PATCH 0559/1063] [IPV6] MIP6: Add routing header type 2 definition. Add routing header type 2 definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/ipv6.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 02d14a3ff2af..d995662e94c4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ #define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* * routing header @@ -73,6 +74,18 @@ struct rt0_hdr { #define rt0_type rt_hdr.type }; +/* + * routing header type 2 + */ + +struct rt2_hdr { + struct ipv6_rt_hdr rt_hdr; + __u32 reserved; + struct in6_addr addr; + +#define rt2_type rt_hdr.type +}; + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ From 65d4ed92219b28875efb52de5700da8c3dfa83e1 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:16:22 -0700 Subject: [PATCH 0560/1063] [IPV6] MIP6: Add inbound interface of routing header type 2. Add inbound interface of routing header type 2 for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/addrconf.h | 7 +++++ net/ipv6/exthdrs.c | 69 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3d71251b3eca..5fc8627435eb 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,6 +61,13 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); +/* XXX: this is a placeholder till addrconf supports */ +#ifdef CONFIG_IPV6_MIP6 +static inline int ipv6_chk_home_addr(struct in6_addr *addr) +{ + return 0; +} +#endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 05afa6b1912b..8d3a0e17314d 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,6 +43,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include @@ -219,7 +222,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); - struct in6_addr *addr; + struct in6_addr *addr = NULL; struct in6_addr daddr; int n, i; @@ -244,6 +247,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard type 2 header unless it was + * processed by own + */ + if (!addr) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; @@ -253,17 +273,29 @@ looped_back: return 1; } - if (hdr->type != IPV6_SRCRT_TYPE_0) { + switch (hdr->type) { + case IPV6_SRCRT_TYPE_0: + if (hdr->hdrlen & 0x01) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); + return -1; + } + break; +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard invalid RTH type 2 */ + if (hdr->hdrlen != 2 || hdr->segments_left != 1) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } - - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); - return -1; - } /* * This is the routing header forwarding algorithm from @@ -303,6 +335,27 @@ looped_back: addr = rthdr->addr; addr += i - 1; + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, + (xfrm_address_t *)&skb->nh.ipv6h->saddr, + IPPROTO_ROUTING) < 0) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + if (!ipv6_chk_home_addr(addr)) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + if (ipv6_addr_is_multicast(addr)) { IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); From 280a9d340057ce1b3cca63084df22f4ef5b35fba Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:17:12 -0700 Subject: [PATCH 0561/1063] [IPV6] MIP6: Add socket option and ancillary data interface of routing header type 2. Add socket option and ancillary data interface of routing header type 2. Mobile IPv6 application will use this to send binding acknowledgement with the header without relation of confirmed route optimization (binding). Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 11 +++++++---- net/ipv6/ipv6_sockglue.c | 10 +++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8561b9da6db6..7206747022fc 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -648,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); - /* - * TYPE 0 - */ - if (rthdr->type) { + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: +#endif + break; + default: err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a5eaaf693abf..4f3bb7fcc8b5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -407,8 +407,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* routing header option needs extra check */ if (optname == IPV6_RTHDR && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; - if (rthdr->type) + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: +#endif + break; + default: goto sticky_done; + } + if ((rthdr->hdrlen & 1) || (rthdr->hdrlen >> 1) != rthdr->segments_left) goto sticky_done; From c61a404325093250b676f40ad8f4dd00f3bcab5f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:18:35 -0700 Subject: [PATCH 0562/1063] [IPV6]: Find option offset by type. This is a helper to search option offset from extension header which can carry TLV option like destination options header. Mobile IPv6 home address option will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ece7e8a84ffd..c4ea12710576 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -506,6 +506,8 @@ extern int ipv6_skip_exthdr(const struct sk_buff *, int start, extern int ipv6_ext_hdr(u8 nexthdr); +extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); + extern struct ipv6_txoptions * ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8d3a0e17314d..50ff49e518bc 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,6 +49,49 @@ #include +int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +{ + int packet_len = skb->tail - skb->nh.raw; + struct ipv6_opt_hdr *hdr; + int len; + + if (offset + 2 > packet_len) + goto bad; + hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + len = ((hdr->hdrlen + 1) << 3); + + if (offset + len > packet_len) + goto bad; + + offset += 2; + len -= 2; + + while (len > 0) { + int opttype = skb->nh.raw[offset]; + int optlen; + + if (opttype == type) + return offset; + + switch (opttype) { + case IPV6_TLV_PAD0: + optlen = 1; + break; + default: + optlen = skb->nh.raw[offset + 1] + 2; + if (optlen > len) + goto bad; + break; + } + offset += optlen; + len -= optlen; + } + /* not_found */ + return -1; + bad: + return -1; +} + /* * Parsing tlv encoded headers. * From a80ff03e05e4343d647780c116b02ec86078fd24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:19:50 -0700 Subject: [PATCH 0563/1063] [IPV6]: Allow to replace skbuff by TLV parser. In receiving Mobile IPv6 home address option which is a TLV carried by destination options header, kernel will try to mangle source adderss of packet. Think of cloned skbuff it is required to replace it by the parser just like routing header case. This is a framework to achieve that to allow TLV parser to replace inbound skbuff pointer. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 +- net/ipv6/exthdrs.c | 29 +++++++++++++++++++---------- net/ipv6/ip6_input.c | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c4ea12710576..8e6ec6063f8c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -229,7 +229,7 @@ extern int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff *skb); +extern int ipv6_parse_hopopts(struct sk_buff **skbp); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 50ff49e518bc..1cdd0f0b5d34 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) struct tlvtype_proc { int type; - int (*func)(struct sk_buff *skb, int offset); + int (*func)(struct sk_buff **skbp, int offset); }; /********************* @@ -111,8 +111,10 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -137,8 +139,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; int off = skb->h.raw - skb->nh.raw; int len = ((skb->h.raw[1]+1)<<3); @@ -168,13 +171,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skb, off) == 0) + if (curr->func(skbp, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) + if (ip6_tlvopt_unknown(skbp, off) == 0) return 0; } break; @@ -213,7 +216,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); opt->nhoff = opt->dst1; return 1; @@ -517,8 +521,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff *skb, int optoff) +static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + if (skb->nh.raw[optoff+1] == 2) { IP6CB(skb)->ra = optoff; return 1; @@ -531,8 +537,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) +static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { @@ -581,8 +588,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff *skb) +int ipv6_parse_hopopts(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -598,7 +606,8 @@ int ipv6_parse_hopopts(struct sk_buff *skb) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { + skb = *skbp; skb->h.raw += (skb->h.raw[1]+1)<<3; opt->nhoff = sizeof(struct ipv6hdr); return 1; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 25c2a9e03895..6b8e6d76a58b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(skb) < 0) { + if (ipv6_parse_hopopts(&skb) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } From 842426e719f86cd5709617208efae93ff1a1e2d8 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 19:21:34 -0700 Subject: [PATCH 0564/1063] [IPV6] MIP6: Add home address option definition. Add home address option definition for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/linux/ipv6.h | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/include/linux/in6.h b/include/linux/in6.h index 304aaedea305..086ec2ac8c5f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -142,6 +142,7 @@ struct in6_flowlabel_req #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_JUMBO 194 +#define IPV6_TLV_HAO 201 /* home address option */ /* * IPV6 socket options diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d995662e94c4..5bf4406e26d4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -86,6 +86,16 @@ struct rt2_hdr { #define rt2_type rt_hdr.type }; +/* + * home address option in destination options header + */ + +struct ipv6_destopt_hao { + __u8 type; + __u8 length; + struct in6_addr addr; +} __attribute__ ((__packed__)); + struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ From a831f5bbc89a9978795504be9e1ff412043f8f77 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:24:48 -0700 Subject: [PATCH 0565/1063] [IPV6] MIP6: Add inbound interface of home address option. Add inbound function of home address option by registering it to TLV table for destination options header. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++ net/ipv6/exthdrs.c | 84 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5bf4406e26d4..db3b2ba0f4f8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,6 +226,9 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif __u16 lastopt; __u32 nhoff; __u16 flags; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1cdd0f0b5d34..6a6466bb5f26 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -196,8 +196,80 @@ bad: Destination options header. *****************************/ +#ifdef CONFIG_IPV6_MIP6 +static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) +{ + struct sk_buff *skb = *skbp; + struct ipv6_destopt_hao *hao; + struct inet6_skb_parm *opt = IP6CB(skb); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; + struct in6_addr tmp_addr; + int ret; + + if (opt->dsthao) { + LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + goto discard; + } + opt->dsthao = opt->dst1; + opt->dst1 = 0; + + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); + + if (hao->length != 16) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao invalid option length = %d\n", hao->length); + goto discard; + } + + if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { + LIMIT_NETDEBUG( + KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + goto discard; + } + + ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, + (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); + if (unlikely(ret < 0)) + goto discard; + + if (skb_cloned(skb)) { + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + if (skb2 == NULL) + goto discard; + + kfree_skb(skb); + + /* update all variable using below by copied skbuff */ + *skbp = skb = skb2; + hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); + ipv6h = (struct ipv6hdr *)skb2->nh.raw; + } + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + + ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); + ipv6_addr_copy(&ipv6h->saddr, &hao->addr); + ipv6_addr_copy(&hao->addr, &tmp_addr); + + if (skb->tstamp.off_sec == 0) + __net_timestamp(skb); + + return 1; + + discard: + kfree_skb(skb); + return 0; +} +#endif + static struct tlvtype_proc tlvprocdestopt_lst[] = { - /* No destination options are defined now */ +#ifdef CONFIG_IPV6_MIP6 + { + .type = IPV6_TLV_HAO, + .func = ipv6_dest_hao, + }, +#endif {-1, NULL} }; @@ -205,6 +277,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); +#ifdef CONFIG_IPV6_MIP6 + __u16 dstbuf; +#endif if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { @@ -215,11 +290,18 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; +#ifdef CONFIG_IPV6_MIP6 + dstbuf = opt->dst1; +#endif if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); +#ifdef CONFIG_IPV6_MIP6 + opt->nhoff = dstbuf; +#else opt->nhoff = opt->dst1; +#endif return 1; } From 8dd7368dd97def967bbb3aec67b882e8dfd1a528 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Aug 2006 19:25:55 -0700 Subject: [PATCH 0566/1063] [IPV6]: Put dsthao after flags in order to pack inet6_skb_parm better. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index db3b2ba0f4f8..1d6d3ccc9413 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -226,12 +226,12 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; -#ifdef CONFIG_IPV6_MIP6 - __u16 dsthao; -#endif __u16 lastopt; __u32 nhoff; __u16 flags; +#ifdef CONFIG_IPV6_MIP6 + __u16 dsthao; +#endif #define IP6SKB_XFRM_TRANSFORMED 1 }; From 793832361fe7e9c3fcae2edd1d293c583a0a095c Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:27:25 -0700 Subject: [PATCH 0567/1063] [IPV6] MIP6: Revert address to send ICMPv6 error. IPv6 source address is replaced in receiving packet with home address option carried by destination options header. To send ICMPv6 error back, original address which is received one on wire should be used. This function checks such header is included and reverts them. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e3a8e27af950..4ec876066b3f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } +#ifdef CONFIG_IPV6_MIP6 +static void mip6_addr_swap(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct inet6_skb_parm *opt = IP6CB(skb); + struct ipv6_destopt_hao *hao; + struct in6_addr tmp; + int off; + + if (opt->dsthao) { + off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); + if (likely(off >= 0)) { + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off); + ipv6_addr_copy(&tmp, &iph->saddr); + ipv6_addr_copy(&iph->saddr, &hao->addr); + ipv6_addr_copy(&hao->addr, &tmp); + } + } +} +#else +static inline void mip6_addr_swap(struct sk_buff *skb) {} +#endif + /* * Send an ICMP message in response to a packet in error */ @@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, return; } + mip6_addr_swap(skb); + memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); From 27637df92e25dfb45dd71a93a2f4bf9c080fa627 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:29:47 -0700 Subject: [PATCH 0568/1063] [IPV6] IPSEC: Support sending with Mobile IPv6 extension headers. Mobile IPv6 defines home address option as an option of destination options header. It is placed before fragment header then ip6_find_1stfragopt() is fixed to know about it. Home address option also carries final source address of the flow, then outbound AH calculation should take care of it like routing header case. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 109 ++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_output.c | 18 +++++-- 2 files changed, 122 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6c0aa51319a5..0f2b4e330aa9 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -74,6 +74,68 @@ bad: return 0; } +#ifdef CONFIG_IPV6_MIP6 +/** + * ipv6_rearrange_destopt - rearrange IPv6 destination options header + * @iph: IPv6 header + * @destopt: destionation options header + */ +static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) +{ + u8 *opt = (u8 *)destopt; + int len = ipv6_optlen(destopt); + int off = 0; + int optlen = 0; + + off += 2; + len -= 2; + + while (len > 0) { + + switch (opt[off]) { + + case IPV6_TLV_PAD0: + optlen = 1; + break; + default: + if (len < 2) + goto bad; + optlen = opt[off+1]+2; + if (len < optlen) + goto bad; + + /* Rearrange the source address in @iph and the + * addresses in home address option for final source. + * See 11.3.2 of RFC 3775 for details. + */ + if (opt[off] == IPV6_TLV_HAO) { + struct in6_addr final_addr; + struct ipv6_destopt_hao *hao; + + hao = (struct ipv6_destopt_hao *)&opt[off]; + if (hao->length != sizeof(hao->addr)) { + if (net_ratelimit()) + printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); + goto bad; + } + ipv6_addr_copy(&final_addr, &hao->addr); + ipv6_addr_copy(&hao->addr, &iph->saddr); + ipv6_addr_copy(&iph->saddr, &final_addr); + } + break; + } + + off += optlen; + len -= optlen; + } + if (len == 0) + return; + +bad: + return; +} +#endif + /** * ipv6_rearrange_rthdr - rearrange IPv6 routing header * @iph: IPv6 header @@ -113,7 +175,11 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) ipv6_addr_copy(&iph->daddr, &final_addr); } +#ifdef CONFIG_IPV6_MIP6 +static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) +#else static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) +#endif { union { struct ipv6hdr *iph; @@ -128,6 +194,28 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) while (exthdr.raw < end) { switch (nexthdr) { +#ifdef CONFIG_IPV6_MIP6 + case NEXTHDR_HOP: + if (!zero_out_mutable_opts(exthdr.opth)) { + LIMIT_NETDEBUG( + KERN_WARNING "overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); + return -EINVAL; + } + break; + case NEXTHDR_DEST: + if (dir == XFRM_POLICY_OUT) + ipv6_rearrange_destopt(iph, exthdr.opth); + if (!zero_out_mutable_opts(exthdr.opth)) { + LIMIT_NETDEBUG( + KERN_WARNING "overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); + return -EINVAL; + } + break; +#else case NEXTHDR_HOP: case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { @@ -138,6 +226,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) return -EINVAL; } break; +#endif case NEXTHDR_ROUTING: ipv6_rearrange_rthdr(iph, exthdr.rth); @@ -164,6 +253,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; char tmp_base[8]; struct { +#ifdef CONFIG_IPV6_MIP6 + struct in6_addr saddr; +#endif struct in6_addr daddr; char hdrs[0]; } *tmp_ext; @@ -188,10 +280,18 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) err = -ENOMEM; goto error; } +#ifdef CONFIG_IPV6_MIP6 + memcpy(tmp_ext, &top_iph->saddr, extlen); + err = ipv6_clear_mutable_options(top_iph, + extlen - sizeof(*tmp_ext) + + sizeof(*top_iph), + XFRM_POLICY_OUT); +#else memcpy(tmp_ext, &top_iph->daddr, extlen); err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + sizeof(*top_iph)); +#endif if (err) goto error_free_iph; } @@ -222,7 +322,11 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { +#ifdef CONFIG_IPV6_MIP6 + memcpy(&top_iph->saddr, tmp_ext, extlen); +#else memcpy(&top_iph->daddr, tmp_ext, extlen); +#endif error_free_iph: kfree(tmp_ext); } @@ -282,8 +386,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!tmp_hdr) goto out; memcpy(tmp_hdr, skb->nh.raw, hdr_len); +#ifdef CONFIG_IPV6_MIP6 + if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) + goto free_out; +#else if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) goto free_out; +#endif skb->nh.ipv6h->priority = 0; skb->nh.ipv6h->flow_lbl[0] = 0; skb->nh.ipv6h->flow_lbl[1] = 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 258e3e45f5e0..c14ea1ecf379 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -475,17 +475,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) switch (**nexthdr) { case NEXTHDR_HOP: + break; case NEXTHDR_ROUTING: + found_rhdr = 1; + break; case NEXTHDR_DEST: - if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; - if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); +#ifdef CONFIG_IPV6_MIP6 + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; +#endif + if (found_rhdr) + return offset; break; default : return offset; } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); } return offset; From 2c8d7ca0f76103855ad1f2a930e05683b64a00eb Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 20:31:11 -0700 Subject: [PATCH 0569/1063] [IPV6] MIP6: Add routing header type 2 transformation. Add routing header type 2 transformation for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 31 ++++++++ net/ipv6/Makefile | 2 + net/ipv6/af_inet6.c | 9 +++ net/ipv6/mip6.c | 181 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 include/net/mip6.h create mode 100644 net/ipv6/mip6.c diff --git a/include/net/mip6.h b/include/net/mip6.h new file mode 100644 index 000000000000..644b8b673048 --- /dev/null +++ b/include/net/mip6.h @@ -0,0 +1,31 @@ +/* + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + * YOSHIFUJI Hideaki @USAGI + */ +#ifndef _NET_MIP6_H +#define _NET_MIP6_H + +extern int mip6_init(void); +extern void mip6_fini(void); + +#endif diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 87e912e31922..0213c6612b58 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -14,6 +14,8 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o +ipv6-$(CONFIG_IPV6_MIP6) += mip6.o + ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 57ee5ddea96f..fc9c8a99bea6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -59,6 +59,9 @@ #ifdef CONFIG_IPV6_TUNNEL #include #endif +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include #include @@ -857,6 +860,9 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); +#ifdef CONFIG_IPV6_MIP6 + mip6_init(); +#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -919,6 +925,9 @@ static void __exit inet6_exit(void) udp6_proc_exit(); tcp6_proc_exit(); raw6_proc_exit(); +#endif +#ifdef CONFIG_IPV6_MIP6 + mip6_fini(); #endif /* Cleanup code parts. */ sit_cleanup(); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c new file mode 100644 index 000000000000..63e548b6f81e --- /dev/null +++ b/net/ipv6/mip6.c @@ -0,0 +1,181 @@ +/* + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + */ + +#include +#include +#include +#include +#include +#include +#include + +static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) +{ + return x->coaddr; +} + +static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; + + if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return rt2->rt_hdr.nexthdr; +} + +/* Routing Header type 2 is inserted. + * IP Header's dst address is replaced with Routing Header's Home Address. + */ +static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct rt2_hdr *rt2; + u8 nexthdr; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_ROUTING; + + rt2 = (struct rt2_hdr *)skb->h.raw; + rt2->rt_hdr.nexthdr = nexthdr; + rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; + rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; + rt2->rt_hdr.segments_left = 1; + memset(&rt2->reserved, 0, sizeof(rt2->reserved)); + + BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + + memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); + memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); + + return 0; +} + +static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + return offset; + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_rthdr_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct rt2_hdr); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for routing + * header type 2 unlike IPsec protocols. + */ +static void mip6_rthdr_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_rthdr_type = +{ + .description = "MIP6RT", + .owner = THIS_MODULE, + .proto = IPPROTO_ROUTING, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_rthdr_init_state, + .destructor = mip6_rthdr_destroy, + .input = mip6_rthdr_input, + .output = mip6_rthdr_output, + .hdr_offset = mip6_rthdr_offset, + .remote_addr = mip6_xfrm_addr, +}; + +int __init mip6_init(void) +{ + printk(KERN_INFO "Mobile IPv6\n"); + + if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); + goto mip6_rthdr_xfrm_fail; + } + return 0; + + mip6_rthdr_xfrm_fail: + return -EAGAIN; +} + +void __exit mip6_fini(void) +{ + if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); +} From 3d126890dd67beffec27c1b6f51c040fc8d0b526 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 20:32:34 -0700 Subject: [PATCH 0570/1063] [IPV6] MIP6: Add destination options header transformation. Add destination options header transformation for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 3 + net/ipv6/mip6.c | 167 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/include/net/mip6.h b/include/net/mip6.h index 644b8b673048..42b65bace122 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -25,6 +25,9 @@ #ifndef _NET_MIP6_H #define _NET_MIP6_H +#define MIP6_OPT_PAD_1 0 +#define MIP6_OPT_PAD_N 1 + extern int mip6_init(void); extern void mip6_fini(void); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 63e548b6f81e..a8adf891fe0e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -35,6 +35,165 @@ static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr return x->coaddr; } +static inline unsigned int calc_padlen(unsigned int len, unsigned int n) +{ + return (n - len + 16) & 0x7; +} + +static inline void *mip6_padn(__u8 *data, __u8 padlen) +{ + if (!data) + return NULL; + if (padlen == 1) { + data[0] = MIP6_OPT_PAD_1; + } else if (padlen > 1) { + data[0] = MIP6_OPT_PAD_N; + data[1] = padlen - 2; + if (padlen > 2) + memset(data+2, 0, data[1]); + } + return data + padlen; +} + +static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; + + if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return destopt->nexthdr; +} + +/* Destination Option Header is inserted. + * IP Header's src address is replaced with Home Address Option in + * Destination Option Header. + */ +static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct ipv6_destopt_hdr *dstopt; + struct ipv6_destopt_hao *hao; + u8 nexthdr; + int len; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_DSTOPTS; + + dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; + dstopt->nexthdr = nexthdr; + + hao = mip6_padn((char *)(dstopt + 1), + calc_padlen(sizeof(*dstopt), 6)); + + hao->type = IPV6_TLV_HAO; + hao->length = sizeof(*hao) - 2; + BUG_TRAP(hao->length == 16); + + len = ((char *)hao - (char *)dstopt) + sizeof(*hao); + + memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); + memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); + + BUG_TRAP(len == x->props.header_len); + dstopt->hdrlen = (x->props.header_len >> 3) - 1; + + return 0; +} + +static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* + * HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_destopt_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct ipv6_destopt_hdr) + + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + + sizeof(struct ipv6_destopt_hao); + BUG_TRAP(x->props.header_len == 24); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for + * destination options header unlike IPsec protocols. + */ +static void mip6_destopt_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_destopt_type = +{ + .description = "MIP6DESTOPT", + .owner = THIS_MODULE, + .proto = IPPROTO_DSTOPTS, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_destopt_init_state, + .destructor = mip6_destopt_destroy, + .input = mip6_destopt_input, + .output = mip6_destopt_output, + .hdr_offset = mip6_destopt_offset, + .local_addr = mip6_xfrm_addr, +}; + static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; @@ -164,6 +323,10 @@ int __init mip6_init(void) { printk(KERN_INFO "Mobile IPv6\n"); + if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); + goto mip6_destopt_xfrm_fail; + } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); goto mip6_rthdr_xfrm_fail; @@ -171,6 +334,8 @@ int __init mip6_init(void) return 0; mip6_rthdr_xfrm_fail: + xfrm_unregister_type(&mip6_destopt_type, AF_INET6); + mip6_destopt_xfrm_fail: return -EAGAIN; } @@ -178,4 +343,6 @@ void __exit mip6_fini(void) { if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); + if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); } From e23c7194a8a21e96b99106bdabde94614c4b84d6 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:33:28 -0700 Subject: [PATCH 0571/1063] [XFRM] STATE: Add Mobile IPv6 route optimization protocols to netlink interface. Add Mobile IPv6 route optimization protocols to netlink interface. Route optimization states carry care-of address. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3a83c5987c26..770bd2410749 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -28,6 +28,9 @@ #include #include #include +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#include +#endif static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -173,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + if (xfrma[XFRMA_ALG_COMP-1] || + xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1] || + xfrma[XFRMA_ENCAP-1] || + xfrma[XFRMA_SEC_CTX-1] || + !xfrma[XFRMA_COADDR-1]) + goto out; + break; +#endif + default: goto out; }; From 2b741653b6c824fe7520ee92b6795f11c5f24b24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:34:26 -0700 Subject: [PATCH 0572/1063] [IPV6] MIP6: Add Mobility header definition. Add Mobility header definition for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/net/flow.h | 9 +++++++++ include/net/ipv6.h | 1 + include/net/mip6.h | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+) diff --git a/include/linux/in6.h b/include/linux/in6.h index 086ec2ac8c5f..d776829b443f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -134,6 +134,7 @@ struct in6_flowlabel_req #define IPPROTO_ICMPV6 58 /* ICMPv6 */ #define IPPROTO_NONE 59 /* IPv6 no next header */ #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IPPROTO_MH 135 /* IPv6 mobility header */ /* * IPv6 TLV options. diff --git a/include/net/flow.h b/include/net/flow.h index 21d988b2058a..e0522914316e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -72,12 +72,21 @@ struct flowi { } dnports; __u32 spi; + +#ifdef CONFIG_IPV6_MIP6 + struct { + __u8 type; + } mht; +#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi +#ifdef CONFIG_IPV6_MIP6 +#define fl_mh_type uli_u.mht.type +#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8e6ec6063f8c..72bf47b2a4e0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -40,6 +40,7 @@ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ #define NEXTHDR_NONE 59 /* No next header */ #define NEXTHDR_DEST 60 /* Destination options header. */ +#define NEXTHDR_MOBILITY 135 /* Mobility header. */ #define NEXTHDR_MAX 255 diff --git a/include/net/mip6.h b/include/net/mip6.h index 42b65bace122..fd43178faace 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -28,6 +28,29 @@ #define MIP6_OPT_PAD_1 0 #define MIP6_OPT_PAD_N 1 +/* + * Mobility Header + */ +struct ip6_mh { + __u8 ip6mh_proto; + __u8 ip6mh_hdrlen; + __u8 ip6mh_type; + __u8 ip6mh_reserved; + __u16 ip6mh_cksum; + /* Followed by type specific messages */ + __u8 data[0]; +} __attribute__ ((__packed__)); + +#define IP6_MH_TYPE_BRR 0 /* Binding Refresh Request */ +#define IP6_MH_TYPE_HOTI 1 /* HOTI Message */ +#define IP6_MH_TYPE_COTI 2 /* COTI Message */ +#define IP6_MH_TYPE_HOT 3 /* HOT Message */ +#define IP6_MH_TYPE_COT 4 /* COT Message */ +#define IP6_MH_TYPE_BU 5 /* Binding Update */ +#define IP6_MH_TYPE_BACK 6 /* Binding ACK */ +#define IP6_MH_TYPE_BERROR 7 /* Binding Error */ +#define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR + extern int mip6_init(void); extern void mip6_fini(void); From 7be96f7628469e56f91d51f13b03e9bcff113c7f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:35:31 -0700 Subject: [PATCH 0573/1063] [IPV6] MIP6: Add receiving mobility header functions through raw socket. Like ICMPv6, mobility header is handled through raw socket. In inbound case, check only whether ICMPv6 error should be sent as a reply or not by kernel. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 4 +++ net/ipv6/mip6.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/raw.c | 29 +++++++++++++++- 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/net/mip6.h b/include/net/mip6.h index fd43178faace..68263c6d9996 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -25,6 +25,9 @@ #ifndef _NET_MIP6_H #define _NET_MIP6_H +#include +#include + #define MIP6_OPT_PAD_1 0 #define MIP6_OPT_PAD_N 1 @@ -53,5 +56,6 @@ struct ip6_mh { extern int mip6_init(void); extern void mip6_fini(void); +extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); #endif diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index a8adf891fe0e..7b5f89321482 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -26,7 +26,10 @@ #include #include #include +#include +#include #include +#include #include #include @@ -55,6 +58,86 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) return data + padlen; } +static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +{ + icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); +} + +static int mip6_mh_len(int type) +{ + int len = 0; + + switch (type) { + case IP6_MH_TYPE_BRR: + len = 0; + break; + case IP6_MH_TYPE_HOTI: + case IP6_MH_TYPE_COTI: + case IP6_MH_TYPE_BU: + case IP6_MH_TYPE_BACK: + len = 1; + break; + case IP6_MH_TYPE_HOT: + case IP6_MH_TYPE_COT: + case IP6_MH_TYPE_BERROR: + len = 2; + break; + } + return len; +} + +int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +{ + struct ip6_mh *mh; + int mhlen; + + if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || + !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) + return -1; + + mh = (struct ip6_mh *)skb->h.raw; + + if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); + return -1; + } + mhlen = (mh->ip6mh_hdrlen + 1) << 3; + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb->csum)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); + skb->ip_summed = CHECKSUM_NONE; + } + } + if (skb->ip_summed == CHECKSUM_NONE) { + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb_checksum(skb, 0, mhlen, 0))) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + NIP6(skb->nh.ipv6h->saddr), + NIP6(skb->nh.ipv6h->daddr)); + return -1; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (mh->ip6mh_proto != IPPROTO_NONE) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); + mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); + return -1; + } + + return 0; +} + static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d4af1cb5e19f..ecca8aae3c4b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,6 +50,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include #include @@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { + int filtered; + delivered = 1; - if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { + switch (nexthdr) { + case IPPROTO_ICMPV6: + filtered = icmpv6_filter(sk, skb); + break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + /* XXX: To validate MH only once for each packet, + * this is placed here. It should be after checking + * xfrm policy, however it doesn't. The checking xfrm + * policy is placed in rawv6_rcv() because it is + * required for each socket. + */ + filtered = mip6_mh_filter(sk, skb); + break; +#endif + default: + filtered = 0; + break; + } + + if (filtered < 0) + break; + if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ From 6e8f4d48b265225bdf437bbf3151b0d6700dda22 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:36:47 -0700 Subject: [PATCH 0574/1063] [IPV6] MIP6: Add sending mobility header functions through raw socket. Mobility header is built by user-space and sent through raw socket. Kernel just extracts its type to flow. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/raw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ecca8aae3c4b..d09329ca3267 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; +#ifdef CONFIG_IPV6_MIP6 + u8 len = 0; +#endif int probed = 0; int i; @@ -640,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) probed = 1; } break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (iov->iov_base && iov->iov_len < 1) + break; + /* check if type field is readable or not. */ + if (iov->iov_len > 2 - len) { + u8 __user *p = iov->iov_base; + get_user(fl->fl_mh_type, &p[2 - len]); + probed = 1; + } else + len += iov->iov_len; + + break; +#endif default: probed = 1; break; From 2ce4272a699c731b9736d76126dc742353e381db Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:39:03 -0700 Subject: [PATCH 0575/1063] [IPV6] MIP6: Transformation support mobility header. Transformation support mobility header. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++++ net/ipv6/xfrm6_policy.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7f1630630dcf..13488e7ba68c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -546,6 +546,11 @@ u16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + port = htons(fl->fl_mh_type); + break; +#endif default: port = 0; /*XXX*/ } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 729b4748d6d3..98c2fe449b3f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -18,6 +18,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; @@ -270,6 +273,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { + struct ip6_mh *mh; + mh = (struct ip6_mh *)exthdr; + + fl->fl_mh_type = mh->ip6mh_type; + } + fl->proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ case IPPROTO_AH: case IPPROTO_ESP: From df0ba92a99ca757039dfa84a929281ea3f7a50e8 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:41:00 -0700 Subject: [PATCH 0576/1063] [XFRM]: Trace which secpath state is reject factor. For Mobile IPv6 usage, it is required to trace which secpath state is reject factor in order to notify it to user space (to know the address which cannot be used route optimized communication). Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 55 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 13488e7ba68c..9ebbdc1dd471 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -273,6 +273,7 @@ struct xfrm_type void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); + int (*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ad2a5cba1f5b..d125a2649037 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -988,6 +988,23 @@ error: } EXPORT_SYMBOL(xfrm_lookup); +static inline int +xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +{ + struct xfrm_state *x; + int err; + + if (!skb->sp || idx < 0 || idx >= skb->sp->len) + return 0; + x = skb->sp->xvec[idx]; + if (!x->type->reject) + return 0; + xfrm_state_hold(x); + err = x->type->reject(x, skb, fl); + xfrm_state_put(x); + return err; +} + /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -1010,6 +1027,13 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, xfrm_state_addr_cmp(tmpl, x, family)); } +/* + * 0 or more than 0 is returned when validation is succeeded (either bypass + * because of optional transport mode, or next index of the mathced secpath + * state with the template. + * -1 is returned when no matching template is found. + * Otherwise "-2 - errored_index" is returned. + */ static inline int xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, unsigned short family) @@ -1024,8 +1048,11 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (start == -1) + start = -2-idx; break; + } } return start; } @@ -1046,11 +1073,14 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { + if (idxp) + *idxp = k; return 1; + } } return 0; @@ -1062,6 +1092,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_policy *pol; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); + int xerr_idx = -1; + int *xerr_idxp = &xerr_idx; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; @@ -1086,8 +1118,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); - if (!pol) - return !skb->sp || !secpath_has_nontransport(skb->sp, 0); + if (!pol) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) { + xfrm_secpath_reject(xerr_idx, skb, &fl); + return 0; + } + return 1; + } pol->curlft.use_time = (unsigned long)xtime.tv_sec; @@ -1107,11 +1144,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, */ for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) + if (k < 0) { + if (k < -1 && xerr_idxp) + *xerr_idxp = -(2+k); goto reject; + } } - if (secpath_has_nontransport(sp, k)) + if (secpath_has_nontransport(sp, k, xerr_idxp)) goto reject; xfrm_pol_put(pol); @@ -1119,6 +1159,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } reject: + xfrm_secpath_reject(xerr_idx, skb, &fl); xfrm_pol_put(pol); return 0; } From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [PATCH 0577/1063] [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 +++++++++++ include/net/xfrm.h | 2 ++ net/xfrm/xfrm_state.c | 19 ++++++++++++++++++ net/xfrm/xfrm_user.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 1d8c1f22c12d..4009f4445fa9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -166,6 +166,10 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE + + XFRM_MSG_REPORT, +#define XFRM_MSG_REPORT XFRM_MSG_REPORT + __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -325,12 +329,18 @@ struct xfrm_usersa_flush { __u8 proto; }; +struct xfrm_user_report { + __u8 proto; + struct xfrm_selector sel; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -346,6 +356,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS + XFRMNLGRP_REPORT, +#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9ebbdc1dd471..0b223eed4c9b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -381,6 +381,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); + int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1043,6 +1044,7 @@ extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3da89c01ea71..a26ef6952c30 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 770bd2410749..7303b820bea4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1491,6 +1491,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -2058,12 +2059,57 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); + ur = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, + xfrm_address_t *addr) +{ + struct sk_buff *skb; + size_t len; + + len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, }; static int __init xfrm_user_init(void) From 70182ed23d2559345aadb3cfb6a68a7c1cc0aa39 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:45:55 -0700 Subject: [PATCH 0578/1063] [IPV6] MIP6: Report to user-space when home address option is rejected. Report to user-space when home address option is rejected. In receiving this message user-space application will send Mobile IPv6 binding error. It is rate-limited by kernel. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 7b5f89321482..31445d09261e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -138,6 +139,18 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) return 0; } +struct mip6_report_rate_limiter { + spinlock_t lock; + struct timeval stamp; + int iif; + struct in6_addr src; + struct in6_addr dst; +}; + +static struct mip6_report_rate_limiter mip6_report_rl = { + .lock = SPIN_LOCK_UNLOCKED +}; + static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; @@ -189,6 +202,75 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +static inline int mip6_report_rl_allow(struct timeval *stamp, + struct in6_addr *dst, + struct in6_addr *src, int iif) +{ + int allow = 0; + + spin_lock_bh(&mip6_report_rl.lock); + if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || + mip6_report_rl.stamp.tv_usec != stamp->tv_usec || + mip6_report_rl.iif != iif || + !ipv6_addr_equal(&mip6_report_rl.src, src) || + !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { + mip6_report_rl.stamp.tv_sec = stamp->tv_sec; + mip6_report_rl.stamp.tv_usec = stamp->tv_usec; + mip6_report_rl.iif = iif; + ipv6_addr_copy(&mip6_report_rl.src, src); + ipv6_addr_copy(&mip6_report_rl.dst, dst); + allow = 1; + } + spin_unlock_bh(&mip6_report_rl.lock); + return allow; +} + +static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) +{ + struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + struct ipv6_destopt_hao *hao = NULL; + struct xfrm_selector sel; + int offset; + struct timeval stamp; + int err = 0; + + if (likely(opt->dsthao)) { + offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); + if (likely(offset >= 0)) + hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset); + } + + skb_get_timestamp(skb, &stamp); + + if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr, + hao ? &hao->addr : &skb->nh.ipv6h->saddr, + opt->iif)) + goto out; + + memset(&sel, 0, sizeof(sel)); + memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr, + sizeof(sel.daddr)); + sel.prefixlen_d = 128; + memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, + sizeof(sel.saddr)); + sel.prefixlen_s = 128; + sel.family = AF_INET6; + sel.proto = fl->proto; + sel.dport = xfrm_flowi_dport(fl); + if (sel.dport) + sel.dport_mask = ~((__u16)0); + sel.sport = xfrm_flowi_sport(fl); + if (sel.sport) + sel.sport_mask = ~((__u16)0); + sel.ifindex = fl->oif; + + err = km_report(IPPROTO_DSTOPTS, &sel, + (hao ? (xfrm_address_t *)&hao->addr : NULL)); + + out: + return err; +} + static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **nexthdr) { @@ -273,6 +355,7 @@ static struct xfrm_type mip6_destopt_type = .destructor = mip6_destopt_destroy, .input = mip6_destopt_input, .output = mip6_destopt_output, + .reject = mip6_destopt_reject, .hdr_offset = mip6_destopt_offset, .local_addr = mip6_xfrm_addr, }; From 01be8e5d59d7e6da5c425a31b43709c2a4a69b5d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:47:44 -0700 Subject: [PATCH 0579/1063] [IPV6] MIP6: Ignore to report if mobility headers is rejected. Ignore to report user-space for known mobility headers rejected by destination options header transformation. Mobile IPv6 specification (RFC3775) says that mobility header is used with destination options header carrying home address option only for binding update message. Other type message cannot be used and node must drop it silently (and must not send binding error) if receving such packet. To achieve it, (1) application should use transformation policy and wild-card states to catch binding update message prior other packets (2) kernel doesn't report the reject to user-space not to send binding error message by application. This patch is for (2). Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 31445d09261e..70854035c131 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -234,6 +234,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct struct timeval stamp; int err = 0; + if (unlikely(fl->proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + goto out; + if (likely(opt->dsthao)) { offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(offset >= 0)) From c11f1a15c522ddd3bbd2c32b5ce3e0b1831b22f2 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:38:14 -0700 Subject: [PATCH 0580/1063] [XFRM] POLICY: Add Kconfig to support sub policy. Add Kconfig to support sub policy. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 43228f7fd3a0..0faab6332586 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -14,6 +14,16 @@ config XFRM_USER If unsure, say Y. +config XFRM_SUB_POLICY + bool "Transformation sub policy support (EXPERIMENTAL)" + depends on XFRM && EXPERIMENTAL + ---help--- + Support sub policy for developers. By using sub policy with main + one, two policies can be applied to the same packet at once. + Policy which lives shorter time in kernel should be a sub. + + If unsure, say N. + config NET_KEY tristate "PF_KEY sockets" select XFRM From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:43:30 -0700 Subject: [PATCH 0581/1063] [XFRM] POLICY: sub policy support. Sub policy is introduced. Main and sub policy are applied the same flow. (Policy that current kernel uses is named as main.) It is required another transformation policy management to keep IPsec and Mobile IPv6 lives separate. Policy which lives shorter time in kernel should be a sub i.e. normally main is for IPsec and sub is for Mobile IPv6. (Such usage as two IPsec policies on different database can be used, too.) Limitation or TODOs: - Sub policy is not supported for per socket one (it is always inserted as main). - Current kernel makes cached outbound with flowi to skip searching database. However this patch makes it disabled only when "two policies are used and the first matched one is bypass case" because neither flowi nor bundle information knows about transformation template size. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 7 ++ include/net/xfrm.h | 45 ++++++-- net/xfrm/xfrm_policy.c | 252 +++++++++++++++++++++++++++++++++++------ 3 files changed, 260 insertions(+), 44 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4009f4445fa9..492fb9818747 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -102,6 +102,13 @@ struct xfrm_stats { __u32 integrity_failed; }; +enum +{ + XFRM_POLICY_TYPE_MAIN = 0, + XFRM_POLICY_TYPE_SUB = 1, + XFRM_POLICY_TYPE_MAX = 2 +}; + enum { XFRM_POLICY_IN = 0, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0b223eed4c9b..4655ca25f808 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,6 +341,7 @@ struct xfrm_policy atomic_t refcnt; struct timer_list timer; + u8 type; u32 priority; u32 index; struct xfrm_selector selector; @@ -389,6 +390,19 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km); extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; +#ifdef CONFIG_XFRM_SUB_POLICY +extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; + +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]); +} +#else +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir]); +} +#endif static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -404,6 +418,20 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy) __xfrm_policy_destroy(policy); } +#ifdef CONFIG_XFRM_SUB_POLICY +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + int i; + for (i = npols - 1; i >= 0; --i) + xfrm_pol_put(pols[i]); +} +#else +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + xfrm_pol_put(pols[0]); +} +#endif + #define XFRM_DST_HSIZE 1024 static __inline__ @@ -737,8 +765,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - - return (!xfrm_policy_list[dir] && !skb->sp) || + + return (xfrm_policy_lists_empty(dir) && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -758,7 +786,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_list[XFRM_POLICY_OUT] || + return xfrm_policy_lists_empty(XFRM_POLICY_OUT) || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } @@ -1023,18 +1051,19 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); -void xfrm_policy_flush(void); +struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); +void xfrm_policy_flush(u8 type); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(void); +extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); extern void xfrm_flush_all_bundles(void); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d125a2649037..96de6c76ed57 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -32,6 +32,24 @@ static DEFINE_RWLOCK(xfrm_policy_lock); struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_list); +#ifdef CONFIG_XFRM_SUB_POLICY +struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_list_sub); + +#define XFRM_POLICY_LISTS(type) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \ + xfrm_policy_list) +#define XFRM_POLICY_LISTHEAD(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \ + xfrm_policy_list[dir]) +#define XFRM_POLICY_LISTHEADP(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \ + &xfrm_policy_list[dir]) +#else +#define XFRM_POLICY_LISTS(type) xfrm_policy_list +#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir] +#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir] +#endif static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -397,7 +415,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(u8 type, int dir) { u32 idx; struct xfrm_policy *p; @@ -408,7 +426,7 @@ static u32 xfrm_gen_index(int dir) idx_generator += 8; if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { + for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { if (p->index == idx) break; } @@ -425,7 +443,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { + for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { @@ -452,7 +470,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->next = *p; *p = policy; atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -493,13 +511,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && (xfrm_sec_ctx_match(ctx, pol->security))) { xfrm_pol_hold(pol); @@ -518,12 +537,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if (pol->index == id) { xfrm_pol_hold(pol); if (delete) @@ -541,15 +560,16 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +void xfrm_policy_flush(u8 type) { struct xfrm_policy *xp; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type); int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; + while ((xp = p_list[dir]) != NULL) { + p_list[dir] = xp->next; write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(xp); @@ -562,7 +582,7 @@ void xfrm_policy_flush(void) } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { struct xfrm_policy *xp; @@ -572,7 +592,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), read_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) count++; } @@ -582,7 +602,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { error = func(xp, dir%XFRM_POLICY_MAX, --count, data); if (error) goto out; @@ -597,13 +617,13 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) { struct xfrm_policy *pol; read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { struct xfrm_selector *sel = &pol->selector; int match; @@ -620,6 +640,25 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, } } read_unlock_bh(&xfrm_policy_lock); + + return pol; +} + +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + +#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol) + goto end; +#endif + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + +#ifdef CONFIG_XFRM_SUB_POLICY + end: +#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; } @@ -665,8 +704,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); + + pol->next = p_list[dir]; + p_list[dir] = pol; xfrm_pol_hold(pol); } @@ -675,7 +716,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, { struct xfrm_policy **polp; - for (polp = &xfrm_policy_list[dir]; + for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); *polp != NULL; polp = &(*polp)->next) { if (*polp == pol) { *polp = pol->next; @@ -704,12 +745,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -738,6 +784,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -764,9 +811,9 @@ int __xfrm_sk_clone_policy(struct sock *sk) /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; @@ -809,6 +856,38 @@ fail: return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + return cnx; + + fail: + for (cnx--; cnx>=0; cnx--) + xfrm_state_put(xfrm[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -855,6 +934,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols; + int pol_dead; + int xfrm_nr; + int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -866,12 +950,18 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, restart: genid = atomic_read(&flow_cache_genid); policy = NULL; + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; + pol_dead = 0; + xfrm_nr = 0; + if (sk && sk->sk_policy[1]) policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) return 0; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -883,6 +973,9 @@ restart: family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -891,11 +984,13 @@ restart: goto error; case XFRM_POLICY_ALLOW: +#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } +#endif /* Try to find matching bundle. * @@ -911,7 +1006,36 @@ restart: if (dst) break; - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (pols[1]->action == XFRM_POLICY_BLOCK) { + err = -EPERM; + goto error; + } + npols ++; + xfrm_nr += pols[1]->xfrm_nr; + } + } + + /* + * Because neither flowi nor bundle information knows about + * transformation template size. On more than one policy usage + * we can realize whether all of them is bypass or not after + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ + if (xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pols_put(pols, npols); + return 0; + } + +#endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -924,7 +1048,7 @@ restart: set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -932,7 +1056,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); goto restart; } err = nx; @@ -942,7 +1066,7 @@ restart: } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; } @@ -956,8 +1080,14 @@ restart: goto error; } + for (pi = 0; pi < npols; pi++) { + read_lock_bh(&pols[pi]->lock); + pol_dead |= pols[pi]->dead; + read_unlock_bh(&pols[pi]->lock); + } + write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { + if (unlikely(pol_dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -977,12 +1107,12 @@ restart: } *dst_p = dst; dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; error: dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); *dst_p = NULL; return err; } @@ -1090,6 +1220,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1; @@ -1128,22 +1262,50 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = pol; + npols ++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + npols ++; + } + } +#endif + if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) + goto reject; + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) + goto reject_error; + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); if (k < 0) { if (k < -1 && xerr_idxp) *xerr_idxp = -(2+k); @@ -1154,13 +1316,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (secpath_has_nontransport(sp, k, xerr_idxp)) goto reject; - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } reject: xfrm_secpath_reject(xerr_idx, skb, &fl); - xfrm_pol_put(pol); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1246,6 +1409,23 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) read_lock_bh(&xfrm_policy_lock); for (i=0; i<2*XFRM_POLICY_MAX; i++) { +#ifdef CONFIG_XFRM_SUB_POLICY + for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + +#endif for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { write_lock(&pol->lock); dstp = &pol->bundles; From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:48:31 -0700 Subject: [PATCH 0582/1063] [XFRM]: Add sorting interface for state and template. Under two transformation policies it is required to merge them. This is a platform to sort state for outbound and templates for inbound respectively. It will be used when Mobile IPv6 and IPsec are used at the same time. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 20 ++++++++++++++++++++ net/xfrm/xfrm_policy.c | 16 ++++++++++++++-- net/xfrm/xfrm_state.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4655ca25f808..d341603e4ba8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -254,6 +254,8 @@ struct xfrm_state_afinfo { struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); + int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); + int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1002,6 +1004,24 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +#ifdef CONFIG_XFRM_SUB_POLICY +extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family); +extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family); +#else +static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family) +{ + return -ENOSYS; +} + +static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family) +{ + return -ENOSYS; +} +#endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 96de6c76ed57..1732159ffd01 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -861,6 +861,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; @@ -871,7 +873,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, error = -ENOBUFS; goto fail; } - ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); if (ret < 0) { error = ret; goto fail; @@ -879,11 +882,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, cnx += ret; } + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + return cnx; fail: for (cnx--; cnx>=0; cnx--) - xfrm_state_put(xfrm[cnx]); + xfrm_state_put(tpp[cnx]); return error; } @@ -1280,6 +1287,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct sec_path *sp; static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; @@ -1297,6 +1305,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + tpp = stp; + } /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26ef6952c30..622e92a08d0b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, } EXPORT_SYMBOL(xfrm_find_acq); +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_tmpl_sort); + +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_state_sort); +#endif + /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) From f7b6983f0feeefcd2a594138adcffe640593d8de Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:49:28 -0700 Subject: [PATCH 0583/1063] [XFRM] POLICY: Support netlink socket interface for sub policy. Sub policy can be used through netlink socket. PF_KEY uses main only and it is TODO to support sub. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 7 +++ include/net/xfrm.h | 1 + net/key/af_key.c | 18 ++++-- net/xfrm/xfrm_user.c | 134 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 142 insertions(+), 18 deletions(-) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 492fb9818747..14ecd19f4cdc 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -230,6 +230,12 @@ enum xfrm_ae_ftype_t { #define XFRM_AE_MAX (__XFRM_AE_MAX - 1) }; +struct xfrm_userpolicy_type { + __u8 type; + __u16 reserved1; + __u8 reserved2; +}; + /* Netlink message attributes. */ enum xfrm_attr_type_t { XFRMA_UNSPEC, @@ -248,6 +254,7 @@ enum xfrm_attr_type_t { XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ XFRMA_LASTUSED, + XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d341603e4ba8..c75b3287d8f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -203,6 +203,7 @@ struct km_event u32 proto; u32 byid; u32 aevent; + u32 type; } data; u32 seq; diff --git a/net/key/af_key.c b/net/key/af_key.c index 19e047b0e678..83b443ddc72f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1731,7 +1731,8 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) + if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, + (void*)&reqid) != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); + xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, + &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (dir >= XFRM_POLICY_MAX) return -EINVAL; - xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, + xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; - return xfrm_policy_walk(dump_sp, &data); + return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); } static int key_notify_policy_flush(struct km_event *c) @@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; - xfrm_policy_flush(); + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; @@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) + return 0; + switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); @@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: + if (c->data.type != XFRM_POLICY_TYPE_MAIN) + break; return key_notify_policy_flush(c); default: printk("pfkey: Unknown policy event %d\n", c->event); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7303b820bea4..c59a78d2923a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -786,6 +786,22 @@ static int verify_policy_dir(__u8 dir) return 0; } +static int verify_policy_type(__u8 type) +{ + switch (type) { + case XFRM_POLICY_TYPE_MAIN: +#ifdef CONFIG_XFRM_SUB_POLICY + case XFRM_POLICY_TYPE_SUB: +#endif + break; + + default: + return -EINVAL; + }; + + return 0; +} + static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { switch (p->share) { @@ -879,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) return 0; } +static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct xfrm_userpolicy_type *upt; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + if (rt) { + if (rt->rta_len < sizeof(*upt)) + return -EINVAL; + + upt = RTA_DATA(rt); + type = upt->type; + } + + err = verify_policy_type(type); + if (err) + return err; + + *tp = type; + return 0; +} + static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -917,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); + err = copy_from_user_policy_type(&xp->type, xfrma); + if (err) + goto error; + if (!(err = copy_from_user_tmpl(xp, xfrma))) err = copy_from_user_sec_ctx(xp, xfrma); - - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } + if (err) + goto error; return xp; + error: + *errp = err; + kfree(xp); + return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) @@ -1037,6 +1080,29 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return 0; } +#ifdef CONFIG_XFRM_SUB_POLICY +static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + struct xfrm_userpolicy_type upt; + + memset(&upt, 0, sizeof(upt)); + upt.type = xp->type; + + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + + return 0; + +rtattr_failure: + return -1; +} + +#else +static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + return 0; +} +#endif + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -1060,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -1081,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); +#ifdef CONFIG_XFRM_SUB_POLICY + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); +#endif cb->args[0] = info.this_idx; return skb->len; @@ -1117,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1124,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, delete); + xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1146,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } if (xp == NULL) @@ -1329,9 +1405,16 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { -struct km_event c; + struct km_event c; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; - xfrm_policy_flush(); + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + + xfrm_policy_flush(type); + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; @@ -1344,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, 0); + xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1364,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); security_xfrm_policy_free(&tmp); } @@ -1818,6 +1906,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, goto nlmsg_failure; if (copy_to_user_state_sec_ctx(x, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -1898,6 +1988,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, } copy_from_user_policy(xp, p); + xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); if (!xp->security) { @@ -1931,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -2002,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2019,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_userpolicy_type upt; +#endif int len = NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); @@ -2028,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); + nlh->nlmsg_flags = 0; + +#ifdef CONFIG_XFRM_SUB_POLICY + memset(&upt, 0, sizeof(upt)); + upt.type = c->data.type; + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); +#endif nlh->nlmsg_len = skb->tail - b; @@ -2035,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: +#ifdef CONFIG_XFRM_SUB_POLICY +rtattr_failure: +#endif kfree_skb(skb); return -1; } From 58c949d1b9551f3e4ba9dde4aeda341ecf5e42b5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:51:02 -0700 Subject: [PATCH 0584/1063] [XFRM] IPV6: Add sort functions to combine templates/states for IPsec. Add sort functions to combine templates/states for IPsec. Think of outbound transformation order we should be careful with transport AH which must be the last of all transport ones. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/xfrm6_state.c | 97 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 9c95b9d3e110..e0b8f3c5caa2 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -156,12 +156,109 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, return x0; } +static int +__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +{ + int i; + int j = 0; + + /* Rule 1: select IPsec transport except AH */ + for (i = 0; i < n; i++) { + if (src[i]->props.mode == XFRM_MODE_TRANSPORT && + src[i]->id.proto != IPPROTO_AH) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; + + /* XXX: Rule 2: select MIPv6 RO or inbound trigger */ + + /* Rule 3: select IPsec transport AH */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->props.mode == XFRM_MODE_TRANSPORT && + src[i]->id.proto == IPPROTO_AH) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; + + /* Rule 4: select IPsec tunnel */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->props.mode == XFRM_MODE_TUNNEL) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (likely(j == n)) + goto end; + + /* Final rule */ + for (i = 0; i < n; i++) { + if (src[i]) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + + end: + return 0; +} + +static int +__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +{ + int i; + int j = 0; + + /* Rule 1: select IPsec transport */ + for (i = 0; i < n; i++) { + if (src[i]->mode == XFRM_MODE_TRANSPORT) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; + + /* XXX: Rule 2: select MIPv6 RO or inbound trigger */ + + /* Rule 3: select IPsec tunnel */ + for (i = 0; i < n; i++) { + if (src[i] && + src[i]->mode == XFRM_MODE_TUNNEL) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (likely(j == n)) + goto end; + + /* Final rule */ + for (i = 0; i < n; i++) { + if (src[i]) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + + end: + return 0; +} + static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .find_acq = __xfrm6_find_acq, + .tmpl_sort = __xfrm6_tmpl_sort, + .state_sort = __xfrm6_state_sort, }; void __init xfrm6_state_init(void) From 64d9fdda8e1bdf416b2d9203c3ad9c249ea301be Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:54:07 -0700 Subject: [PATCH 0585/1063] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting. Support Mobile IPv6 extension headers sorting for two transformation policies. Mobile IPv6 extension headers should be placed after IPsec transport mode, but before transport AH when outbound. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/xfrm6_state.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index e0b8f3c5caa2..6269584e610e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -173,7 +173,19 @@ __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) if (j == n) goto end; - /* XXX: Rule 2: select MIPv6 RO or inbound trigger */ + /* Rule 2: select MIPv6 RO or inbound trigger */ +#ifdef CONFIG_IPV6_MIP6 + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || + src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; +#endif /* Rule 3: select IPsec transport AH */ for (i = 0; i < n; i++) { @@ -226,7 +238,19 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) if (j == n) goto end; - /* XXX: Rule 2: select MIPv6 RO or inbound trigger */ + /* Rule 2: select MIPv6 RO or inbound trigger */ +#ifdef CONFIG_IPV6_MIP6 + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || + src[i]->mode == XFRM_MODE_IN_TRIGGER)) { + dst[j++] = src[i]; + src[i] = NULL; + } + } + if (j == n) + goto end; +#endif /* Rule 3: select IPsec tunnel */ for (i = 0; i < n; i++) { From 2770834c9f44afd1bfa13914c7285470775af657 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:13:10 -0700 Subject: [PATCH 0586/1063] [XFRM]: Pull xfrm_state_bydst hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 16 ------ net/ipv4/xfrm4_state.c | 53 -------------------- net/ipv6/xfrm6_state.c | 56 --------------------- net/xfrm/xfrm_state.c | 110 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 100 insertions(+), 135 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c75b3287d8f8..cc83443f301e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,7 +243,6 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bydst; struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); @@ -252,9 +251,6 @@ struct xfrm_state_afinfo { xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); - struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -455,18 +451,6 @@ unsigned __xfrm6_dst_hash(xfrm_address_t *addr) return h; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_dst_hash(addr); - case AF_INET6: - return __xfrm6_dst_hash(addr); - } - return 0; -} - static __inline__ unsigned __xfrm4_src_hash(xfrm_address_t *addr) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 616be131b4e3..9dc1afc17b6d 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -88,65 +88,12 @@ __xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, return NULL; } -static struct xfrm_state * -__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm4_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET && - daddr->a4 == x->id.daddr.a4 && - mode == x->props.mode && - proto == x->id.proto && - saddr->a4 == x->props.saddr.a4 && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - x0->sel.daddr.a4 = daddr->a4; - x0->sel.saddr.a4 = saddr->a4; - x0->sel.prefixlen_d = 32; - x0->sel.prefixlen_s = 32; - x0->props.saddr.a4 = saddr->a4; - x0->km.state = XFRM_STATE_ACQ; - x0->id.daddr.a4 = daddr->a4; - x0->id.proto = proto; - x0->props.family = AF_INET; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->props.family = AF_INET; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); - h = __xfrm4_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, - .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 6269584e610e..40fcaab7e028 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,61 +101,6 @@ __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } -static struct xfrm_state * -__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm6_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - mode == x->props.mode && - proto == x->id.proto && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, - (struct in6_addr *)saddr); - x0->sel.prefixlen_d = 128; - x0->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, - (struct in6_addr *)saddr); - x0->km.state = XFRM_STATE_ACQ; - ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, - (struct in6_addr *)daddr); - x0->id.proto = proto; - x0->props.family = AF_INET6; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); - h = __xfrm6_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -280,7 +225,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, - .find_acq = __xfrm6_find_acq, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 622e92a08d0b..80f5f9dc2b9e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -48,6 +48,18 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_dst_hash(addr); + case AF_INET6: + return __xfrm6_dst_hash(addr); + } + return 0; +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -489,6 +501,89 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +/* xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +{ + unsigned int h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr) || + !ipv6_addr_equal((struct in6_addr *) + x->props.saddr.a6, + (struct in6_addr *)saddr)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, + (struct in6_addr *)saddr); + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, + (struct in6_addr *)saddr); + ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr); + break; + }; + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + xfrm_state_hold(x); + list_add_tail(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + wake_up(&km_waitq); + } + + return x; +} + static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, int use_spi) @@ -533,9 +628,9 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x->id.proto, + &x->id.daddr, &x->props.saddr, 0); __xfrm_state_insert(x); err = 0; @@ -716,14 +811,11 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + return x; } EXPORT_SYMBOL(xfrm_find_acq); @@ -1181,7 +1273,6 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { - afinfo->state_bydst = xfrm_state_bydst; afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; @@ -1206,7 +1297,6 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; afinfo->state_bysrc = NULL; - afinfo->state_bydst = NULL; } } write_unlock_bh(&xfrm_state_afinfo_lock); From edcd582152090bfb0ccb4ad444c151798a73eda8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:42:45 -0700 Subject: [PATCH 0587/1063] [XFRM]: Pull xfrm_state_by{spi,src} hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 78 --------------- net/ipv4/xfrm4_state.c | 28 ------ net/ipv6/xfrm6_state.c | 40 -------- net/xfrm/xfrm_state.c | 210 +++++++++++++++++++++++++++++++---------- 4 files changed, 159 insertions(+), 197 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index cc83443f301e..dd3b84b9c04e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,14 +243,10 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bysrc; - struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); - struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); - struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -431,80 +427,6 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) } #endif -#define XFRM_DST_HSIZE 1024 - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) -{ - return __xfrm4_dst_hash(addr); -} - -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) -{ - return __xfrm6_dst_hash(addr); -} - -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_src_hash(addr); - case AF_INET6: - return __xfrm6_src_hash(addr); - } - return 0; -} - -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); - case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); - } - return 0; /*XXX*/ -} - extern void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 9dc1afc17b6d..6a2a4ab42772 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -62,38 +62,10 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET; } -static struct xfrm_state * -__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm4_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET && - spi == x->id.spi && - daddr->a4 == x->id.daddr.a4 && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -/* placeholder until ipv4's code is written */ -static struct xfrm_state * -__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - return NULL; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, - .state_lookup = __xfrm4_state_lookup, - .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 40fcaab7e028..d88cd92c864e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,44 +63,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } -static struct xfrm_state * -__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - struct xfrm_state *x = NULL; - unsigned h; - - h = __xfrm6_src_hash(saddr); - list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -static struct xfrm_state * -__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm6_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET6 && - spi == x->id.spi && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -223,8 +185,6 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, - .state_lookup = __xfrm6_state_lookup, - .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 80f5f9dc2b9e..4a3832f81c37 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -38,6 +38,8 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); +#define XFRM_DST_HSIZE 1024 + /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * @@ -48,6 +50,48 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a4); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) { @@ -60,6 +104,36 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a4^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_spi_hash(addr, spi, proto); + case AF_INET6: + return __xfrm6_spi_hash(addr, spi, proto); + } + return 0; /*XXX*/ +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -342,6 +416,83 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(saddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6) || + !ipv6_addr_equal((struct in6_addr *)saddr, + (struct in6_addr *) + x->props.saddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + if (use_spi) + return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(&x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -353,14 +504,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; - } - spin_lock_bh(&xfrm_state_lock); list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && @@ -406,8 +550,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { + (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto, family)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -457,7 +601,6 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } @@ -584,34 +727,20 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re return x; } -static inline struct xfrm_state * -__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, - int use_spi) -{ - if (use_spi) - return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); - else - return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); -} - static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -637,7 +766,6 @@ int xfrm_state_add(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (!err) xfrm_flush_all_bundles(); @@ -653,17 +781,12 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) @@ -683,7 +806,6 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -776,14 +898,10 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); + x = __xfrm_state_lookup(daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -793,14 +911,10 @@ xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1272,11 +1386,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bysrc = xfrm_state_bysrc; - afinfo->state_byspi = xfrm_state_byspi; + else xfrm_state_afinfo[afinfo->family] = afinfo; - } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1293,11 +1404,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { + else xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bysrc = NULL; - } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; From 8f126e37c0b250310a48a609bedf92a19a5559ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 02:45:07 -0700 Subject: [PATCH 0588/1063] [XFRM]: Convert xfrm_state hash linkage to hlists. Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +-- net/xfrm/xfrm_state.c | 92 ++++++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dd3b84b9c04e..3405e5d9d51c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -94,9 +94,9 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ - struct list_head bydst; - struct list_head bysrc; - struct list_head byspi; + struct hlist_node bydst; + struct hlist_node bysrc; + struct hlist_node byspi; atomic_t refcnt; spinlock_t lock; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4a3832f81c37..fe3c8c38d5e1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -46,9 +46,9 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; static __inline__ unsigned __xfrm4_dst_hash(xfrm_address_t *addr) @@ -141,7 +141,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); static int xfrm_state_gc_flush_bundles; @@ -178,8 +178,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; if (xfrm_state_gc_flush_bundles) { xfrm_state_gc_flush_bundles = 0; @@ -187,13 +187,13 @@ static void xfrm_state_gc_task(void *data) } spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + gc_list.first = xfrm_state_gc_list.first; + INIT_HLIST_HEAD(&xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) xfrm_state_gc_destroy(x); - } + wake_up(&km_waitq); } @@ -287,9 +287,9 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->bysrc); - INIT_LIST_HEAD(&x->byspi); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -314,7 +314,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -327,12 +327,12 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - list_del(&x->bydst); + hlist_del(&x->bydst); __xfrm_state_put(x); - list_del(&x->bysrc); + hlist_del(&x->bysrc); __xfrm_state_put(x); if (x->id.spi) { - list_del(&x->byspi); + hlist_del(&x->byspi); __xfrm_state_put(x); } spin_unlock(&xfrm_state_lock); @@ -378,12 +378,13 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; - struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { + struct hlist_node *entry; + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -420,8 +421,9 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 { unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -451,8 +453,9 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm { unsigned int h = xfrm_src_hash(saddr, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -499,14 +502,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -575,13 +579,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; @@ -608,19 +613,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - list_add(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } @@ -648,9 +653,10 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x; - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -717,10 +723,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); xfrm_state_hold(x); - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -977,11 +983,14 @@ EXPORT_SYMBOL(xfrm_state_sort); static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - struct xfrm_state *x; for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -1047,7 +1056,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); @@ -1060,12 +1069,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; + struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; } @@ -1076,7 +1086,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), } for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); @@ -1524,9 +1534,9 @@ void __init xfrm_state_init(void) int i; for (i=0; i Date: Thu, 24 Aug 2006 03:08:07 -0700 Subject: [PATCH 0589/1063] [XFRM]: Dynamic xfrm_state hash table sizing. The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller --- include/linux/bootmem.h | 2 +- mm/page_alloc.c | 2 +- net/xfrm/xfrm_state.c | 245 +++++++++++++++++++++++++++++++--------- 3 files changed, 196 insertions(+), 53 deletions(-) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 1021f508d82c..e319c649e4fd 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename, #else #define HASHDIST_DEFAULT 0 #endif -extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ +extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 54a4f5375bba..3b5358a0561f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, return 0; } -__initdata int hashdist = HASHDIST_DEFAULT; +int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA static int __init set_hashdist(char *str) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fe3c8c38d5e1..445263c54c94 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include struct sock *xfrm_nl; @@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); -#define XFRM_DST_HSIZE 1024 - /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head *xfrm_state_bydst __read_mostly; +static struct hlist_head *xfrm_state_bysrc __read_mostly; +static struct hlist_head *xfrm_state_byspi __read_mostly; +static unsigned int xfrm_state_hmask __read_mostly; +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static unsigned int xfrm_state_num; -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); } -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_src_hash(addr); + return __xfrm4_src_hash(addr, hmask); case AF_INET6: - return __xfrm6_src_hash(addr); + return __xfrm6_src_hash(addr, hmask); } return 0; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_src_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); case AF_INET6: - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } return 0; } -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) { - unsigned h; + return __xfrm_dst_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) +{ + unsigned int h; h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +static inline +unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); + return __xfrm4_spi_hash(addr, spi, proto, hmask); case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); + return __xfrm6_spi_hash(addr, spi, proto, hmask); } return 0; /*XXX*/ } +static inline unsigned int +xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); +} + +static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->props.saddr, x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family, nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } +} + +static unsigned long xfrm_hash_new_size(void) +{ + return ((xfrm_state_hmask + 1) << 1) * + sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(); + ndst = xfrm_state_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_state_hash_alloc(nsize); + if (!nsrc) { + xfrm_state_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_state_hash_alloc(nsize); + if (!nspi) { + xfrm_state_hash_free(ndst, nsize); + xfrm_state_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = xfrm_state_hmask; i >= 0; i--) + xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = xfrm_state_bydst; + osrc = xfrm_state_bysrc; + ospi = xfrm_state_byspi; + ohashmask = xfrm_state_hmask; + + xfrm_state_bydst = ndst; + xfrm_state_bysrc = nsrc; + xfrm_state_byspi = nspi; + xfrm_state_hmask = nhashmask; + + spin_unlock_bh(&xfrm_state_lock); + + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_state_hash_free(odst, osize); + xfrm_state_hash_free(osrc, osize); + xfrm_state_hash_free(ospi, osize); + +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->byspi); __xfrm_state_put(x); } + xfrm_state_num--; spin_unlock(&xfrm_state_lock); if (del_timer(&x->timer)) __xfrm_state_put(x); @@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i < xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: @@ -611,7 +743,7 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); @@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x) xfrm_state_hold(x); wake_up(&km_waitq); + + xfrm_state_num++; + + if (x->bydst.next != NULL && + (xfrm_state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > xfrm_state_hmask) + schedule_work(&xfrm_hash_work); } void xfrm_state_insert(struct xfrm_state *x) @@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) { - u32 h; + unsigned int h; struct xfrm_state *x0; if (x->id.spi) @@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; @@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; @@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - int i; + unsigned int sz; + + sz = sizeof(struct hlist_head) * 8; + + xfrm_state_bydst = xfrm_state_hash_alloc(sz); + xfrm_state_bysrc = xfrm_state_hash_alloc(sz); + xfrm_state_byspi = xfrm_state_hash_alloc(sz); + if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) + panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - for (i=0; i Date: Thu, 24 Aug 2006 03:18:09 -0700 Subject: [PATCH 0590/1063] [XFRM]: Add generation count to xfrm_state and xfrm_dst. Each xfrm_state inserted gets a new generation counter value. When a bundle is created, the xfrm_dst objects get the current generation counter of the xfrm_state they will attach to at dst->xfrm. xfrm_bundle_ok() will return false if it sees an xfrm_dst with a generation count different from the generation count of the xfrm_state that dst points to. This provides a facility by which to passively and cheaply invalidate cached IPSEC routes during SA database changes. Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/xfrm6_policy.c | 1 + net/xfrm/xfrm_policy.c | 2 ++ net/xfrm/xfrm_state.c | 3 +++ 5 files changed, 10 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3405e5d9d51c..fd4a300b5baf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -104,6 +104,8 @@ struct xfrm_state struct xfrm_id id; struct xfrm_selector sel; + u32 genid; + /* Key manger bits */ struct { u8 state; @@ -590,6 +592,7 @@ struct xfrm_dst struct rt6_info rt6; } u; struct dst_entry *route; + u32 genid; u32 route_mtu_cached; u32 child_mtu_cached; u32 route_cookie; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 42d8ded0f96a..479598566f1d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -93,6 +93,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 98c2fe449b3f..9391c4c94feb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -149,6 +149,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; if (rt->rt6i_node) xdst->route_cookie = rt->rt6i_node->fn_sernum; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1732159ffd01..7fc6944ee36f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1536,6 +1536,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->genid != dst->xfrm->genid) + return 0; if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 445263c54c94..535d43c14720 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; +static unsigned int xfrm_state_genid; static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { @@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + x->genid = ++xfrm_state_genid; + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); From a624c108e5595b5827796c253481436929cd5344 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:24:33 -0700 Subject: [PATCH 0591/1063] [XFRM]: Put more keys into destination hash function. Besides the daddr, key the hash on family and reqid too. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 89 ++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 47 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 535d43c14720..7e5daafc1863 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given + 2. Hash table by (daddr,family,reqid) to find what SAs exist for given destination/tunnel endpoint. (output) */ @@ -55,41 +55,51 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) { - unsigned int h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) & hmask; - return h; + return ntohl(addr->a4); } -static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) { - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) & hmask; - return h; + return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) -{ - return __xfrm4_dst_hash(addr, hmask); -} - -static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) -{ - return __xfrm6_dst_hash(addr, hmask); -} - -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, + u32 reqid, unsigned short family, + unsigned int hmask) { + unsigned int h = family ^ reqid; switch (family) { case AF_INET: - return __xfrm4_src_hash(addr, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_src_hash(addr, hmask); - } - return 0; + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, + unsigned short family) +{ + return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); +} + +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, + unsigned int hmask) +{ + unsigned int h = family; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(addr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; } static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) @@ -97,22 +107,6 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) -{ - switch (family) { - case AF_INET: - return __xfrm4_dst_hash(addr, hmask); - case AF_INET6: - return __xfrm6_dst_hash(addr, hmask); - } - return 0; -} - -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) -{ - return __xfrm_dst_hash(addr, family, xfrm_state_hmask); -} - static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned int hmask) { @@ -190,7 +184,8 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, + x->props.family, nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -635,7 +630,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -744,15 +739,15 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h; x->genid = ++xfrm_state_genid; + h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); @@ -794,7 +789,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; From 2575b65434d56559bd03854450b9b6aaf19b9c90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:26:44 -0700 Subject: [PATCH 0592/1063] [XFRM]: Simplify xfrm_spi_hash It can use __xfrm{4,6}_addr_hash(). Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7e5daafc1863..98200397e098 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -107,35 +107,20 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline -unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { + unsigned int h = spi ^ proto; switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm6_addr_hash(addr); + break; } - return 0; /*XXX*/ + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:29:04 -0700 Subject: [PATCH 0593/1063] [XFRM]: Do not flush all bundles on SA insert. Instead, simply set all potentially aliasing existing xfrm_state objects to have the current generation counter value. This will make routes get relooked up the next time an existing route mentioning these aliased xfrm_state objects gets used, via xfrm_dst_check(). Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 10 ---------- net/xfrm/xfrm_state.c | 25 ++++++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fd4a300b5baf..a620a43c9eeb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -996,7 +996,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); -extern void xfrm_flush_all_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7fc6944ee36f..cfa5c692f2e8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1478,16 +1478,6 @@ int xfrm_flush_bundles(void) return 0; } -static int always_true(struct dst_entry *dst) -{ - return 1; -} - -void xfrm_flush_all_bundles(void) -{ - xfrm_prune_bundles(always_true); -} - void xfrm_init_pmtu(struct dst_entry *dst) { do { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98200397e098..77ef796c9d0d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x) schedule_work(&xfrm_hash_work); } +/* xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + struct hlist_node *entry; + unsigned int h; + + h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + x->genid = xfrm_state_genid; + } +} + void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); - - xfrm_flush_all_bundles(); } EXPORT_SYMBOL(xfrm_state_insert); @@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x) x->id.proto, &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); - if (!err) - xfrm_flush_all_bundles(); - if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:30:28 -0700 Subject: [PATCH 0594/1063] [XFRM]: Purge dst references to deleted SAs passively. Just let GC and other normal mechanisms take care of getting rid of DST cache references to deleted xfrm_state objects instead of walking all the policy bundles. Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 17 ----------------- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a620a43c9eeb..c7870b6eae01 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -995,7 +995,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_flush_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cfa5c692f2e8..1bcaae4adf3a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1472,7 +1472,7 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77ef796c9d0d..9ff00b7d6ad3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work; static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; - int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data) struct hlist_node *entry, *tmp; struct hlist_head gc_list; - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } - spin_lock_bh(&xfrm_state_gc_lock); gc_list.first = xfrm_state_gc_list.first; INIT_HLIST_HEAD(&xfrm_state_gc_list); @@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x) if (del_timer(&x->rtimer)) __xfrm_state_put(x); - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } - /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. From a47f0ce05ae12ce9acad62896ff703175764104e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:54:22 -0700 Subject: [PATCH 0595/1063] [XFRM]: Kill excessive refcounting of xfrm_state objects. The refcounting done for timers and hash table insertions are just wasted cycles. We can eliminate all of this refcounting because: 1) The implicit refcount when the xfrm_state object is active will always be held while the object is in the hash tables. We never kfree() the xfrm_state until long after we've made sure that it has been unhashed. 2) Timers are even easier. Once we mark that x->km.state as anything other than XFRM_STATE_VALID (__xfrm_state_delete sets it to XFRM_STATE_DEAD), any timer that fires will do nothing and return without rearming the timer. Therefore we can defer the del_timer calls until when the object is about to be freed up during GC. We have to use del_timer_sync() and defer it to GC because we can't do a del_timer_sync() while holding x->lock which all callers of __xfrm_state_delete hold. This makes SA changes even more light-weight. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++--------------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9ff00b7d6ad3..0bc6a4b1ceae 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -266,10 +266,8 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); static void xfrm_state_gc_destroy(struct xfrm_state *x) { - if (del_timer(&x->timer)) - BUG(); - if (del_timer(&x->rtimer)) - BUG(); + del_timer_sync(&x->timer); + del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); @@ -361,9 +359,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - xfrm_state_hold(x); + if (next != LONG_MAX) + mod_timer(&x->timer, jiffies + make_jiffies(next)); + goto out; expired: @@ -378,7 +376,6 @@ expired: out: spin_unlock(&x->lock); - xfrm_state_put(x); } static void xfrm_replay_timer_handler(unsigned long data); @@ -433,19 +430,11 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); hlist_del(&x->bydst); - __xfrm_state_put(x); hlist_del(&x->bysrc); - __xfrm_state_put(x); - if (x->id.spi) { + if (x->id.spi) hlist_del(&x->byspi); - __xfrm_state_put(x); - } xfrm_state_num--; spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - __xfrm_state_put(x); - if (del_timer(&x->rtimer)) - __xfrm_state_put(x); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -676,17 +665,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); } else { @@ -713,26 +698,20 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } - if (!mod_timer(&x->timer, jiffies + HZ)) - xfrm_state_hold(x); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies + HZ); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); wake_up(&km_waitq); @@ -844,10 +823,8 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); - xfrm_state_hold(x); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); - xfrm_state_hold(x); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -955,8 +932,7 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - if (!mod_timer(&x1->timer, jiffies + HZ)) - xfrm_state_hold(x1); + mod_timer(&x1->timer, jiffies + HZ); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -981,8 +957,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - if (!mod_timer(&x->timer, jiffies)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies); return -EINVAL; } @@ -1177,7 +1152,6 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); } @@ -1264,10 +1238,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) km_state_notify(x, &c); if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { - xfrm_state_hold(x); + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; - } } EXPORT_SYMBOL(xfrm_replay_notify); @@ -1285,7 +1257,6 @@ static void xfrm_replay_timer_handler(unsigned long data) } spin_unlock(&x->lock); - xfrm_state_put(x); } int xfrm_replay_check(struct xfrm_state *x, u32 seq) From c1969f294e624d5b642fc8e6ab9468b7c7791fa8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:00:03 -0700 Subject: [PATCH 0596/1063] [XFRM]: Hash xfrm_state objects by source address too. The source address is always non-prefixed so we should use it to help give entropy to the bydst hash. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0bc6a4b1ceae..37213f9f6a02 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -65,26 +65,40 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { unsigned int h = family ^ reqid; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; }; return (h ^ (h >> 16)) & hmask; } -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, +static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, + u32 reqid, unsigned short family) { - return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, @@ -108,25 +122,25 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = spi ^ proto; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_addr_hash(daddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_addr_hash(daddr); break; } return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) @@ -169,8 +183,9 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, - x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family, + nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -587,7 +602,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -696,7 +711,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(&x->props.saddr, x->props.family); @@ -732,11 +748,12 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) struct hlist_node *entry; unsigned int h; - h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && + !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) x->genid = xfrm_state_genid; } } @@ -753,7 +770,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; From 2518c7c2b3d7f0a6b302b4efe17c911f8dd4049f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:45:07 -0700 Subject: [PATCH 0597/1063] [XFRM]: Hash policies when non-prefixed. This idea is from Alexey Kuznetsov. It is common for policies to be non-prefixed. And for that case we can optimize lookups, insert, etc. quite a bit. For each direction, we have a dynamically sized policy hash table for non-prefixed policies. We also have a hash table on policy->index. For prefixed policies, we have a list per-direction which we will consult on lookups when a non-prefix hashtable lookup fails. This still isn't as efficient as I would like it. There are four immediate problems: 1) Lots of excessive refcounting, which can be fixed just like xfrm_state was 2) We do 2 hash probes on insert, one to look for dups and one to allocate a unique policy->index. Althought I wonder how much this matters since xfrm_state inserts do up to 3 hash probes and that seems to perform fine. 3) xfrm_policy_insert() is very complex because of the priority ordering and entry replacement logic. 4) Lots of counter bumping, in addition to policy refcounts, in the form of xfrm_policy_count[]. This is merely used to let code path(s) know that some IPSEC rules exist. So this count is indexed per-direction, maybe that is overkill. Signed-off-by: David S. Miller --- include/net/xfrm.h | 23 +- net/xfrm/xfrm_policy.c | 681 ++++++++++++++++++++++++++++++++--------- 2 files changed, 546 insertions(+), 158 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c7870b6eae01..0acabf2a0a8f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -331,7 +331,8 @@ struct xfrm_tmpl struct xfrm_policy { struct xfrm_policy *next; - struct list_head list; + struct hlist_node bydst; + struct hlist_node byidx; /* This lock only affects elements except for entry. */ rwlock_t lock; @@ -385,21 +386,7 @@ struct xfrm_mgr extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); - -extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -#ifdef CONFIG_XFRM_SUB_POLICY -extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; - -static inline int xfrm_policy_lists_empty(int dir) -{ - return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]); -} -#else -static inline int xfrm_policy_lists_empty(int dir) -{ - return (!xfrm_policy_list[dir]); -} -#endif +extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -678,7 +665,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - return (xfrm_policy_lists_empty(dir) && !skb->sp) || + return (!xfrm_policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -698,7 +685,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return xfrm_policy_lists_empty(XFRM_POLICY_OUT) || + return !xfrm_policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1bcaae4adf3a..087a5443b051 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -30,26 +33,8 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list); -#ifdef CONFIG_XFRM_SUB_POLICY -struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list_sub); - -#define XFRM_POLICY_LISTS(type) \ - ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \ - xfrm_policy_list) -#define XFRM_POLICY_LISTHEAD(type, dir) \ - ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \ - xfrm_policy_list[dir]) -#define XFRM_POLICY_LISTHEADP(type, dir) \ - ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \ - &xfrm_policy_list[dir]) -#else -#define XFRM_POLICY_LISTS(type) xfrm_policy_list -#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir] -#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir] -#endif +unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -57,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; -static struct list_head xfrm_policy_gc_list = - LIST_HEAD_INIT(xfrm_policy_gc_list); +static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); @@ -328,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - atomic_set(&policy->refcnt, 1); + INIT_HLIST_NODE(&policy->bydst); + INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); + atomic_set(&policy->refcnt, 1); init_timer(&policy->timer); policy->timer.data = (unsigned long)policy; policy->timer.function = xfrm_policy_timer; @@ -375,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) static void xfrm_policy_gc_task(void *data) { struct xfrm_policy *policy; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_policy_gc_lock); - list_splice_init(&xfrm_policy_gc_list, &gc_list); + gc_list.first = xfrm_policy_gc_list.first; + INIT_HLIST_HEAD(&xfrm_policy_gc_list); spin_unlock_bh(&xfrm_policy_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - policy = list_entry(entry, struct xfrm_policy, list); + hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); - } } /* Rule must be locked. Release descentant resources, announce @@ -407,70 +392,354 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) } spin_lock(&xfrm_policy_gc_lock); - list_add(&policy->list, &xfrm_policy_gc_list); + hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + +static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; +static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; +static struct hlist_head *xfrm_policy_byidx __read_mostly; +static unsigned int xfrm_idx_hmask __read_mostly; +static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int idx_hash(u32 index) +{ + return __idx_hash(index, xfrm_idx_hmask); +} + +static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +{ + xfrm_address_t *daddr = &sel->daddr; + xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = ntohl(daddr->a4 ^ saddr->a4); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = ntohl(daddr->a4 ^ saddr->a4); + break; + + case AF_INET6: + h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __sel_hash(sel, family, hmask); + + return (hash == hmask + 1 ? + &xfrm_policy_inexact[dir] : + xfrm_policy_bydst[dir].table + hash); +} + +static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __addr_hash(daddr, saddr, family, hmask); + + return xfrm_policy_bydst[dir].table + hash; +} + +static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_dst_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + unsigned int h; + + h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, + pol->family, nhashmask); + hlist_add_head(&pol->bydst, ndsttable+h); + } +} + +static void xfrm_idx_hash_transfer(struct hlist_head *list, + struct hlist_head *nidxtable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + unsigned int h; + + h = __idx_hash(pol->index, nhashmask); + hlist_add_head(&pol->byidx, nidxtable+h); + } +} + +static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) +{ + return ((old_hmask + 1) << 1) - 1; +} + +static void xfrm_bydst_resize(int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize); + int i; + + if (!ndst) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + + xfrm_policy_bydst[dir].table = ndst; + xfrm_policy_bydst[dir].hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); +} + +static void xfrm_byidx_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize); + int i; + + if (!nidx) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); + + xfrm_policy_byidx = nidx; + xfrm_idx_hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); +} + +static inline int xfrm_bydst_should_resize(int dir, int *total) +{ + unsigned int cnt = xfrm_policy_count[dir]; + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + + if (total) + *total += cnt; + + if ((hmask + 1) < xfrm_policy_hashmax && + cnt > hmask) + return 1; + + return 0; +} + +static inline int xfrm_byidx_should_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + + if ((hmask + 1) < xfrm_policy_hashmax && + total > hmask) + return 1; + + return 0; +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + int dir, total; + + mutex_lock(&hash_resize_mutex); + + total = 0; + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + if (xfrm_bydst_should_resize(dir, &total)) + xfrm_bydst_resize(dir); + } + if (xfrm_byidx_should_resize(total)) + xfrm_byidx_resize(total); + + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(u8 type, int dir) { - u32 idx; - struct xfrm_policy *p; static u32 idx_generator; for (;;) { + struct hlist_node *entry; + struct hlist_head *list; + struct xfrm_policy *p; + u32 idx; + int found; + idx = (idx_generator | dir); idx_generator += 8; if (idx == 0) idx = 8; - for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { - if (p->index == idx) + list = xfrm_policy_byidx + idx_hash(idx); + found = 0; + hlist_for_each_entry(p, entry, list, byidx) { + if (p->index == idx) { + found = 1; break; + } } - if (!p) + if (!found) return idx; } } +static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) +{ + u32 *p1 = (u32 *) s1; + u32 *p2 = (u32 *) s2; + int len = sizeof(struct xfrm_selector) / sizeof(u32); + int i; + + for (i = 0; i < len; i++) { + if (p1[i] != p2[i]) + return 1; + } + + return 0; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { - struct xfrm_policy *pol, **p; - struct xfrm_policy *delpol = NULL; - struct xfrm_policy **newpos = NULL; + struct xfrm_policy *pol; + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *entry, *newpos, *last; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && + chain = policy_hash_bysel(&policy->selector, policy->family, dir); + delpol = NULL; + newpos = NULL; + last = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (!delpol && + pol->type == policy->type && + !selector_cmp(&pol->selector, &policy->selector) && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; } - *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - p = &pol->next; + last = &pol->bydst; continue; } if (!newpos) - newpos = p; + newpos = &pol->bydst; if (delpol) break; - p = &pol->next; + last = &pol->bydst; } + if (!newpos) + newpos = last; if (newpos) - p = newpos; + hlist_add_after(newpos, &policy->bydst); + else + hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - policy->next = *p; - *p = policy; + xfrm_policy_count[dir]++; atomic_inc(&flow_cache_genid); + if (delpol) { + hlist_del(&delpol->bydst); + hlist_del(&delpol->byidx); + xfrm_policy_count[dir]--; + } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); + hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -479,10 +748,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); + else if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; - for (policy = policy->next; policy; policy = policy->next) { + entry = &policy->bydst; + hlist_for_each_entry_continue(policy, entry, bydst) { struct dst_entry *dst; write_lock(&policy->lock); @@ -515,67 +787,112 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { - if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && - (xfrm_sec_ctx_match(ctx, pol->security))) { + chain = policy_hash_bysel(sel, sel->family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (pol->type == type && + !selector_cmp(sel, &pol->selector) && + xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { + chain = xfrm_policy_byidx + idx_hash(id); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, byidx) { + if (pol->type == type && pol->index == id) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_byid); void xfrm_policy_flush(u8 type) { - struct xfrm_policy *xp; - struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type); int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = p_list[dir]) != NULL) { - p_list[dir] = xp->next; + struct xfrm_policy *pol; + struct hlist_node *entry; + int i; + + again1: + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_kill(xp); + xfrm_policy_kill(pol); write_lock_bh(&xfrm_policy_lock); + goto again1; } + + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + again2: + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(pol); + + write_lock_bh(&xfrm_policy_lock); + goto again2; + } + } + + xfrm_policy_count[dir] = 0; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); @@ -585,15 +902,27 @@ EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *xp; - int dir; - int count = 0; - int error = 0; + struct xfrm_policy *pol; + struct hlist_node *entry; + int dir, count, error; read_lock_bh(&xfrm_policy_lock); + count = 0; for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) - count++; + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type == type) + count++; + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type == type) + count++; + } + } } if (count == 0) { @@ -602,13 +931,28 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); if (error) goto out; } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); + if (error) + goto out; + } + } } - + error = 0; out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -617,31 +961,61 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ +static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, + u8 type, u16 family, int dir) +{ + struct xfrm_selector *sel = &pol->selector; + int match; + + if (pol->family != family || + pol->type != type) + return 0; + + match = xfrm_selector_match(sel, fl, family); + if (match) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) + return 1; + } + + return 0; +} + static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, u16 family, u8 dir) { - struct xfrm_policy *pol; + struct xfrm_policy *pol, *ret; + xfrm_address_t *daddr, *saddr; + struct hlist_node *entry; + struct hlist_head *chain; + + daddr = xfrm_flowi_daddr(fl, family); + saddr = xfrm_flowi_saddr(fl, family); + if (unlikely(!daddr || !saddr)) + return NULL; read_lock_bh(&xfrm_policy_lock); - for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; - - if (pol->family != family) - continue; - - match = xfrm_selector_match(sel, fl, family); - - if (match) { - if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { + chain = policy_hash_direct(daddr, saddr, family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir)) { + xfrm_pol_hold(pol); + ret = pol; + break; + } + } + if (!ret) { + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir)) { xfrm_pol_hold(pol); + ret = pol; break; } } } read_unlock_bh(&xfrm_policy_lock); - return pol; + return ret; } static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, @@ -657,7 +1031,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); #ifdef CONFIG_XFRM_SUB_POLICY - end: +end: #endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; @@ -704,26 +1078,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); + struct hlist_head *chain = policy_hash_bysel(&pol->selector, + pol->family, dir); - pol->next = p_list[dir]; - p_list[dir] = pol; + hlist_add_head(&pol->bydst, chain); + hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); + xfrm_policy_count[dir]++; xfrm_pol_hold(pol); + + if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **polp; + if (hlist_unhashed(&pol->bydst)) + return NULL; - for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - return pol; - } - } - return NULL; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + + return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) @@ -968,7 +1345,8 @@ restart: if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) + if ((dst_orig->flags & DST_NOXFRM) || + !xfrm_policy_count[XFRM_POLICY_OUT]) return 0; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -1413,50 +1791,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } +static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +{ + struct dst_entry *dst, **dstp; + + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = *gc_list_p; + *gc_list_p = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); +} + static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) { - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; + struct dst_entry *gc_list = NULL; + int dir; read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { -#ifdef CONFIG_XFRM_SUB_POLICY - for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); - } + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + struct hlist_head *table; + int i; -#endif - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) + prune_one_bundle(pol, func, &gc_list); + + table = xfrm_policy_bydst[dir].table; + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) + prune_one_bundle(pol, func, &gc_list); } } read_unlock_bh(&xfrm_policy_lock); while (gc_list) { - dst = gc_list; + struct dst_entry *dst = gc_list; gc_list = dst->next; dst_free(dst); } @@ -1680,6 +2058,9 @@ static struct notifier_block xfrm_dev_notifier = { static void __init xfrm_policy_init(void) { + unsigned int hmask, sz; + int dir; + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN, @@ -1687,6 +2068,26 @@ static void __init xfrm_policy_init(void) if (!xfrm_dst_cache) panic("XFRM: failed to allocate xfrm_dst_cache\n"); + hmask = 8 - 1; + sz = (hmask+1) * sizeof(struct hlist_head); + + xfrm_policy_byidx = xfrm_policy_hash_alloc(sz); + xfrm_idx_hmask = hmask; + if (!xfrm_policy_byidx) + panic("XFRM: failed to allocate byidx hash\n"); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + + htab = &xfrm_policy_bydst[dir]; + htab->table = xfrm_policy_hash_alloc(sz); + htab->hmask = hmask; + if (!htab->table) + panic("XFRM: failed to allocate bydst hash\n"); + } + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); register_netdevice_notifier(&xfrm_dev_notifier); } From 44e36b42a8378be1dcf7e6f8a1cb2710a8903387 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:50:50 -0700 Subject: [PATCH 0598/1063] [XFRM]: Extract common hashing code into xfrm_hash.[ch] Signed-off-by: David S. Miller --- net/xfrm/Makefile | 3 +- net/xfrm/xfrm_hash.c | 41 +++++++++++++ net/xfrm/xfrm_hash.h | 128 +++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 95 +++--------------------------- net/xfrm/xfrm_state.c | 128 ++++++----------------------------------- 5 files changed, 195 insertions(+), 200 deletions(-) create mode 100644 net/xfrm/xfrm_hash.c create mode 100644 net/xfrm/xfrm_hash.h diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 693aac1aa833..de3c1a625a46 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -2,6 +2,7 @@ # Makefile for the XFRM subsystem. # -obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ + xfrm_input.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c new file mode 100644 index 000000000000..37643bb8768a --- /dev/null +++ b/net/xfrm/xfrm_hash.c @@ -0,0 +1,41 @@ +/* xfrm_hash.c: Common hash table code. + * + * Copyright (C) 2006 David S. Miller (davem@davemloft.net) + */ + +#include +#include +#include +#include +#include +#include + +#include "xfrm_hash.h" + +struct hlist_head *xfrm_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +void xfrm_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h new file mode 100644 index 000000000000..d3abb0b7dc62 --- /dev/null +++ b/net/xfrm/xfrm_hash.h @@ -0,0 +1,128 @@ +#ifndef _XFRM_HASH_H +#define _XFRM_HASH_H + +#include +#include + +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a4); +} + +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +{ + return ntohl(addr->a6[2] ^ addr->a6[3]); +} + +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, + u32 reqid, unsigned short family, + unsigned int hmask) +{ + unsigned int h = family ^ reqid; + switch (family) { + case AF_INET: + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + case AF_INET6: + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + } + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, + unsigned short family, + unsigned int hmask) +{ + unsigned int h = family; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(saddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(saddr); + break; + }; + return (h ^ (h >> 16)) & hmask; +} + +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) +{ + unsigned int h = spi ^ proto; + switch (family) { + case AF_INET: + h ^= __xfrm4_addr_hash(daddr); + break; + case AF_INET6: + h ^= __xfrm6_addr_hash(daddr); + break; + } + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +{ + xfrm_address_t *daddr = &sel->daddr; + xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = __xfrm4_daddr_saddr_hash(daddr, saddr); + break; + + case AF_INET6: + h = __xfrm6_daddr_saddr_hash(daddr, saddr); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); +extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); + +#endif /* _XFRM_HASH_H */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 087a5443b051..b446ca31fecc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,12 +22,12 @@ #include #include #include -#include -#include #include #include #include +#include "xfrm_hash.h" + DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -409,62 +409,11 @@ static struct hlist_head *xfrm_policy_byidx __read_mostly; static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; -static inline unsigned int __idx_hash(u32 index, unsigned int hmask) -{ - return (index ^ (index >> 8)) & hmask; -} - static inline unsigned int idx_hash(u32 index) { return __idx_hash(index, xfrm_idx_hmask); } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) -{ - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; - unsigned int h = 0; - - switch (family) { - case AF_INET: - if (sel->prefixlen_d != 32 || - sel->prefixlen_s != 32) - return hmask + 1; - - h = ntohl(daddr->a4 ^ saddr->a4); - break; - - case AF_INET6: - if (sel->prefixlen_d != 128 || - sel->prefixlen_s != 128) - return hmask + 1; - - h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) -{ - unsigned int h = 0; - - switch (family) { - case AF_INET: - h = ntohl(daddr->a4 ^ saddr->a4); - break; - - case AF_INET6: - h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) { unsigned int hmask = xfrm_policy_bydst[dir].hmask; @@ -483,34 +432,6 @@ static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address return xfrm_policy_bydst[dir].table + hash; } -static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} - static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) @@ -553,7 +474,7 @@ static void xfrm_bydst_resize(int dir) unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *odst = xfrm_policy_bydst[dir].table; - struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize); + struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; if (!ndst) @@ -569,7 +490,7 @@ static void xfrm_bydst_resize(int dir) write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); + xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } static void xfrm_byidx_resize(int total) @@ -578,7 +499,7 @@ static void xfrm_byidx_resize(int total) unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = xfrm_policy_byidx; - struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize); + struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx) @@ -594,7 +515,7 @@ static void xfrm_byidx_resize(int total) write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); + xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } static inline int xfrm_bydst_should_resize(int dir, int *total) @@ -2071,7 +1992,7 @@ static void __init xfrm_policy_init(void) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_policy_hash_alloc(sz); + xfrm_policy_byidx = xfrm_hash_alloc(sz); xfrm_idx_hmask = hmask; if (!xfrm_policy_byidx) panic("XFRM: failed to allocate byidx hash\n"); @@ -2082,7 +2003,7 @@ static void __init xfrm_policy_init(void) INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); htab = &xfrm_policy_bydst[dir]; - htab->table = xfrm_policy_hash_alloc(sz); + htab->table = xfrm_hash_alloc(sz); htab->hmask = hmask; if (!htab->table) panic("XFRM: failed to allocate bydst hash\n"); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 37213f9f6a02..4341795eb244 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,11 +18,11 @@ #include #include #include -#include -#include #include #include +#include "xfrm_hash.h" + struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); @@ -55,44 +55,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a4); -} - -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a6[2]^addr->a6[3]); -} - -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a4 ^ saddr->a4); -} - -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); -} - -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, - u32 reqid, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family ^ reqid; - switch (family) { - case AF_INET: - h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - case AF_INET6: - h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -101,76 +63,18 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(addr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(addr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - -static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, + unsigned short family) { return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, - unsigned short family, unsigned int hmask) -{ - unsigned int h = spi ^ proto; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(daddr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(daddr); - break; - } - return (h ^ (h >> 10) ^ (h >> 20)) & hmask; -} - static inline unsigned int xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } -static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} - static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, struct hlist_head *nsrctable, @@ -216,18 +120,18 @@ static void xfrm_hash_resize(void *__unused) mutex_lock(&hash_resize_mutex); nsize = xfrm_hash_new_size(); - ndst = xfrm_state_hash_alloc(nsize); + ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; - nsrc = xfrm_state_hash_alloc(nsize); + nsrc = xfrm_hash_alloc(nsize); if (!nsrc) { - xfrm_state_hash_free(ndst, nsize); + xfrm_hash_free(ndst, nsize); goto out_unlock; } - nspi = xfrm_state_hash_alloc(nsize); + nspi = xfrm_hash_alloc(nsize); if (!nspi) { - xfrm_state_hash_free(ndst, nsize); - xfrm_state_hash_free(nsrc, nsize); + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); goto out_unlock; } @@ -251,9 +155,9 @@ static void xfrm_hash_resize(void *__unused) spin_unlock_bh(&xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); - xfrm_state_hash_free(odst, osize); - xfrm_state_hash_free(osrc, osize); - xfrm_state_hash_free(ospi, osize); + xfrm_hash_free(odst, osize); + xfrm_hash_free(osrc, osize); + xfrm_hash_free(ospi, osize); out_unlock: mutex_unlock(&hash_resize_mutex); @@ -1643,9 +1547,9 @@ void __init xfrm_state_init(void) sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_state_hash_alloc(sz); - xfrm_state_bysrc = xfrm_state_hash_alloc(sz); - xfrm_state_byspi = xfrm_state_hash_alloc(sz); + xfrm_state_bydst = xfrm_hash_alloc(sz); + xfrm_state_bysrc = xfrm_hash_alloc(sz); + xfrm_state_byspi = xfrm_hash_alloc(sz); if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); From b27075735e371f481940a4920b329202334d2259 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 19 Sep 2006 11:13:24 +0300 Subject: [PATCH 0599/1063] IB/mthca: Fix lid used for sending traps The SM LID used to send traps to is incorrectly set to port LID. This is a regression from 2.6.17 -- after a PortInfo MAD is received, no traps are sent to the SM LID. The traps go to the loopback interface instead, and are dropped there. The SM LID should be taken from the sm_lid of the PortInfo response. The bug was introduced by commit 12bbb2b7be7f5564952ebe0196623e97464b8ac5: IB/mthca: Add client reregister event generation Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index d9bc030bcccc..45e106f14807 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev, mthca_update_rate(to_mdev(ibdev), port_num); update_sm_ah(to_mdev(ibdev), port_num, - be16_to_cpu(pinfo->lid), + be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); event.device = ibdev; From 9fd558f454b666aca218a990d44f9e1ffac6ed4d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 22 Sep 2006 15:17:18 -0700 Subject: [PATCH 0600/1063] IB/mthca: Don't use privileged UAR for kernel access Make kernel use UAR2 instead of UAR1 for hardware access: this adds sanity checking from the hardware side, without any performance cost. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_uar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 8e9219842be4..8b728486410d 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev) ret = mthca_alloc_init(&dev->uar_table.alloc, dev->limits.num_uars, dev->limits.num_uars - 1, - dev->limits.reserved_uars); + dev->limits.reserved_uars + 1); if (ret) return ret; From 9217b27b12eb5ab910d14b3376c2b6cd13d87711 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Aug 2006 22:16:06 +0300 Subject: [PATCH 0601/1063] IB/ipoib: Fix flush/start xmit race (from code review) Prevent flush task from freeing the ipoib_neigh pointer, while ipoib_start_xmit() is accessing the ipoib_neigh through the pointer it has loaded from the skb's hardware address. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index cf71d2a5515c..36d76987a481 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -336,7 +336,8 @@ void ipoib_flush_paths(struct net_device *dev) struct ipoib_path *path, *tp; LIST_HEAD(remove_list); - spin_lock_irq(&priv->lock); + spin_lock_irq(&priv->tx_lock); + spin_lock(&priv->lock); list_splice(&priv->path_list, &remove_list); INIT_LIST_HEAD(&priv->path_list); @@ -347,12 +348,15 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); - spin_unlock_irq(&priv->lock); + spin_unlock(&priv->lock); + spin_unlock_irq(&priv->tx_lock); wait_for_completion(&path->done); path_free(dev, path); - spin_lock_irq(&priv->lock); + spin_lock_irq(&priv->tx_lock); + spin_lock(&priv->lock); } - spin_unlock_irq(&priv->lock); + spin_unlock(&priv->lock); + spin_unlock_irq(&priv->tx_lock); } static void path_rec_completion(int status, From ab10867621a96230757eb4a2a19d560b85f62ce9 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:17:19 -0700 Subject: [PATCH 0602/1063] IB/uverbs: Use idr_read_cq() where appropriate There were two functions that open-coded idr_read_cq() in terms of idr_read_uobj() rather than using the helper. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 30923eb68ec7..b81307b625a6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -894,7 +894,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp *resp; - struct ib_uobject *uobj; struct ib_cq *cq; struct ib_wc *wc; int ret = 0; @@ -915,16 +914,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) { + cq = idr_read_cq(cmd.cq_handle, file->ucontext); + if (!cq) { ret = -EINVAL; goto out; } - cq = uobj->object; resp->count = ib_poll_cq(cq, cmd.ne, wc); - put_uobj_read(uobj); + put_cq_read(cq); for (i = 0; i < resp->count; i++) { resp->wc[i].wr_id = wc[i].wr_id; @@ -959,21 +957,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_req_notify_cq cmd; - struct ib_uobject *uobj; struct ib_cq *cq; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) + cq = idr_read_cq(cmd.cq_handle, file->ucontext); + if (!cq) return -EINVAL; - cq = uobj->object; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_uobj_read(uobj); + put_cq_read(cq); return in_len; } From 1ccf6aa19aabc11587d6d7818a5729adfed0e3de Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:17:20 -0700 Subject: [PATCH 0603/1063] IB/uverbs: Fix lockdep warning when QP is created with 2 CQs Lockdep warns when userspace creates a QP that uses different CQs for send completions and receive completions, because both CQs are locked and their mutexes belong to the same lock class. However, we know that the mutexes are distinct and the nesting is safe (there is no possibility of AB-BA deadlock because the mutexes are locked with down_read()), so annotate the situation with SINGLE_DEPTH_NESTING to get rid of the lockdep warning. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b81307b625a6..8b6df7cec0bf 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, } static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context) + struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; @@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, if (!uobj) return NULL; - down_read(&uobj->mutex); + if (nested) + down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); + else + down_read(&uobj->mutex); if (!uobj->live) { put_uobj_read(uobj); return NULL; @@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, return uobj; } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) +static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, + int nested) { struct ib_uobject *uobj; - uobj = idr_read_uobj(idr, id, context); + uobj = idr_read_uobj(idr, id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); + return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) @@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd) put_uobj_read(pd->uobject); } -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); + return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) @@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq) static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); + return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) @@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah) static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); + return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } static void put_qp_read(struct ib_qp *qp) @@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp) static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); + return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) @@ -867,7 +871,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext); + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; @@ -914,7 +918,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, goto out_wc; } - cq = idr_read_cq(cmd.cq_handle, file->ucontext); + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) { ret = -EINVAL; goto out; @@ -962,7 +966,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext); + cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; @@ -1060,9 +1064,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext); + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { ret = -EINVAL; From 2a214182d23594915d6ae517b6368ba2eae055d2 Mon Sep 17 00:00:00 2001 From: James Lentini Date: Fri, 22 Sep 2006 15:17:20 -0700 Subject: [PATCH 0604/1063] IB/mthca: Include the header we really want Signed-off-by: James Lentini Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index deabc14b4ea4..99a94d710935 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -34,7 +34,7 @@ * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ */ -#include +#include #include #include #include From ded7f1a16d50527359be02f8b04f9ba56bc923e6 Mon Sep 17 00:00:00 2001 From: Ishai Rabinovitz Date: Tue, 15 Aug 2006 17:34:52 +0300 Subject: [PATCH 0605/1063] IB/srp: Add port/device attributes Add local_ib_device and local_ib_port attributes to srp scsi_host. These are needed when we want to connect to the same target through multiple distinct ports. Signed-off-by: Ishai Rabinovitz Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fd8344cdc0db..249a98c06aeb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1449,12 +1449,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf) return sprintf(buf, "%d\n", target->zero_req_lim); } -static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); -static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); -static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); -static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); -static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); -static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); +static ssize_t show_local_ib_port(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + return sprintf(buf, "%d\n", target->srp_host->port); +} + +static ssize_t show_local_ib_device(struct class_device *cdev, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(cdev)); + + return sprintf(buf, "%s\n", target->srp_host->dev->dev->name); +} + +static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); +static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); +static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); +static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); +static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); +static CLASS_DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); +static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); static struct class_device_attribute *srp_host_attrs[] = { &class_device_attr_id_ext, @@ -1463,6 +1479,8 @@ static struct class_device_attribute *srp_host_attrs[] = { &class_device_attr_pkey, &class_device_attr_dgid, &class_device_attr_zero_req_lim, + &class_device_attr_local_ib_port, + &class_device_attr_local_ib_device, NULL }; From e4bec827feda76d5e7417a2696a75424834d564f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 22 Sep 2006 15:17:35 -0700 Subject: [PATCH 0606/1063] [IPSEC] esp: Defer output IV initialization to first use. First of all, if the xfrm_state only gets used for input packets this entropy is a complete waste. Secondly, it is often the case that a configuration loads many rules (perhaps even dynamically) and they don't all necessarily ever get used. This get_random_bytes() call was showing up in the profiles for xfrm_state inserts which is how I noticed this. Signed-off-by: David S. Miller --- include/net/esp.h | 5 +++-- net/ipv4/esp4.c | 9 +++++++-- net/ipv6/esp6.c | 9 +++++++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/net/esp.h b/include/net/esp.h index 064366d66eea..713d039f4af7 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -15,13 +15,14 @@ struct esp_data struct { u8 *key; /* Key */ int key_len; /* Key length */ - u8 *ivec; /* ivec buffer */ + int padlen; /* 0..255 */ /* ivlen is offset from enc_data, where encrypted data start. * It is logically different of crypto_tfm_alg_ivsize(tfm). * We assume that it is either zero (no ivec), or * >= crypto_tfm_alg_ivsize(tfm). */ int ivlen; - int padlen; /* 0..255 */ + int ivinitted; + u8 *ivec; /* ivec buffer */ struct crypto_blkcipher *tfm; /* crypto handle */ } conf; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e87377e1d6b6..13b29360d102 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -378,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ae50b9511151..e78680a9985b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -99,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -353,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; From e731c248ba9e8c7025ae8b4a3fa48e4236b82e52 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 24 Aug 2006 23:18:12 +0900 Subject: [PATCH 0607/1063] [IPV6] MIP6: Several obvious clean-ups. - Remove redundant code. Pointed out by Brian Haley . - Unify code paths with/without CONFIG_IPV6_MIP. - Use NIP6_FMT for IPv6 address textual presentation. - Fold long line. Pointed out by David Miller . Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ah6.c | 61 +++++++++++----------------------------------- net/ipv6/exthdrs.c | 1 - net/ipv6/mip6.c | 6 +++-- 3 files changed, 18 insertions(+), 50 deletions(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0f2b4e330aa9..b0d83e8e4252 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des off += optlen; len -= optlen; } - if (len == 0) - return; - + /* Note: ok if len == 0 */ bad: return; } @@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) ipv6_addr_copy(&iph->daddr, &final_addr); } -#ifdef CONFIG_IPV6_MIP6 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) -#else -static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) -#endif { union { struct ipv6hdr *iph; @@ -194,39 +188,20 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) while (exthdr.raw < end) { switch (nexthdr) { -#ifdef CONFIG_IPV6_MIP6 - case NEXTHDR_HOP: - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); - return -EINVAL; - } - break; case NEXTHDR_DEST: +#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); - return -EINVAL; - } - break; -#else - case NEXTHDR_HOP: - case NEXTHDR_DEST: - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); - return -EINVAL; - } - break; #endif + case NEXTHDR_HOP: + if (!zero_out_mutable_opts(exthdr.opth)) { + LIMIT_NETDEBUG( + KERN_WARNING "overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); + return -EINVAL; + } + break; case NEXTHDR_ROUTING: ipv6_rearrange_rthdr(iph, exthdr.rth); @@ -282,16 +257,13 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) } #ifdef CONFIG_IPV6_MIP6 memcpy(tmp_ext, &top_iph->saddr, extlen); +#else + memcpy(tmp_ext, &top_iph->daddr, extlen); +#endif err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + sizeof(*top_iph), XFRM_POLICY_OUT); -#else - memcpy(tmp_ext, &top_iph->daddr, extlen); - err = ipv6_clear_mutable_options(top_iph, - extlen - sizeof(*tmp_ext) + - sizeof(*top_iph)); -#endif if (err) goto error_free_iph; } @@ -386,13 +358,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!tmp_hdr) goto out; memcpy(tmp_hdr, skb->nh.raw, hdr_len); -#ifdef CONFIG_IPV6_MIP6 if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) goto free_out; -#else - if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) - goto free_out; -#endif skb->nh.ipv6h->priority = 0; skb->nh.ipv6h->flow_lbl[0] = 0; skb->nh.ipv6h->flow_lbl[1] = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6a6466bb5f26..084f78c3479b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -87,7 +87,6 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) len -= optlen; } /* not_found */ - return -1; bad: return -1; } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 70854035c131..99d116caecda 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -121,7 +121,8 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) &skb->nh.ipv6h->daddr, mhlen, IPPROTO_MH, skb_checksum(skb, 0, mhlen, 0))) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " + "[" NIP6_FMT " > " NIP6_FMT "]\n", NIP6(skb->nh.ipv6h->saddr), NIP6(skb->nh.ipv6h->daddr)); return -1; @@ -234,7 +235,8 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct struct timeval stamp; int err = 0; - if (unlikely(fl->proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + if (unlikely(fl->proto == IPPROTO_MH && + fl->fl_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { From 2cc67cc731d9b693a08e781e98fec0e3a6d6ba44 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 21 Aug 2006 19:18:57 +0900 Subject: [PATCH 0608/1063] [IPV6] ROUTE: Routing by Traffic Class. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/fib6_rules.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7b4908cc52b3..91f6233d8efd 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -121,6 +121,9 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) return 0; + if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) + return 0; + return 1; } From 75bff8f023e02b045a8f68f36fa7da98dca124b8 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 21 Aug 2006 19:22:01 +0900 Subject: [PATCH 0609/1063] [IPV6] ROUTE: Routing by FWMARK. Based on patch by Jean Lorchat . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/fib_rules.h | 2 +- include/net/flow.h | 2 ++ net/ipv6/Kconfig | 7 +++++++ net/ipv6/fib6_rules.c | 23 +++++++++++++++++++++++ net/ipv6/route.c | 1 + 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6c1c1f..2987549d6044 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,7 +34,7 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, diff --git a/include/net/flow.h b/include/net/flow.h index e0522914316e..3ca210ec1379 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -26,6 +26,7 @@ struct flowi { struct { struct in6_addr daddr; struct in6_addr saddr; + __u32 fwmark; __u32 flowlabel; } ip6_u; @@ -42,6 +43,7 @@ struct flowi { #define fld_scope nl_u.dn_u.scope #define fl6_dst nl_u.ip6_u.daddr #define fl6_src nl_u.ip6_u.saddr +#define fl6_fwmark nl_u.ip6_u.fwmark #define fl6_flowlabel nl_u.ip6_u.flowlabel #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 21e0cc808f44..a2d211da2aba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -173,3 +173,10 @@ config IPV6_MULTIPLE_TABLES ---help--- Support multiple routing tables. +config IPV6_ROUTE_FWMARK + bool "IPv6: use netfilter MARK value as routing key" + depends on IPV6_MULTIPLE_TABLES && NETFILTER + ---help--- + If you say Y here, you will be able to specify different routes for + packets with different mark values (see iptables(8), MARK target). + diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 91f6233d8efd..aebd9e2b85a8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -26,6 +26,9 @@ struct fib6_rule struct fib_rule common; struct rt6key src; struct rt6key dst; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + u8 fwmark; +#endif u8 tclass; }; @@ -124,6 +127,11 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (r->fwmark && (r->fwmark != fl->fl6_fwmark)) + return 0; +#endif + return 1; } @@ -164,6 +172,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, nla_memcpy(&rule6->dst.addr, tb[FRA_DST], sizeof(struct in6_addr)); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; @@ -195,6 +208,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; +#endif + return 1; } @@ -216,6 +234,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), &rule6->src.addr); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (rule6->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); +#endif + return 0; nla_put_failure: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 20691285aee5..649350bd9299 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb) .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, + .fwmark = skb->nfmark, .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, }, }, From 1aaec67f9335a17856dfacdd3e5cc6f4c18faeec Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Sun, 25 Jun 2006 23:54:55 +0900 Subject: [PATCH 0610/1063] [NET]: Add common helper functions to convert IPv6/IPv4 address string to network address structure. These helpers can be used in netfilter, cifs etc. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/inet.h | 2 + net/core/utils.c | 215 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) diff --git a/include/linux/inet.h b/include/linux/inet.h index 6c5587af118d..b7c6da7d6d32 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -46,5 +46,7 @@ #include extern __be32 in_aton(const char *str); +extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); +extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); #endif #endif /* _LINUX_INET_H */ diff --git a/net/core/utils.c b/net/core/utils.c index e31c90e05594..5a06e8a72c17 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -4,6 +4,7 @@ * Authors: * net_random Alan Cox * net_ratelimit Andy Kleen + * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov * @@ -191,3 +192,217 @@ __be32 in_aton(const char *str) } EXPORT_SYMBOL(in_aton); + +#define IN6PTON_XDIGIT 0x00010000 +#define IN6PTON_DIGIT 0x00020000 +#define IN6PTON_COLON_MASK 0x00700000 +#define IN6PTON_COLON_1 0x00100000 /* single : requested */ +#define IN6PTON_COLON_2 0x00200000 /* second : requested */ +#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ +#define IN6PTON_DOT 0x00800000 /* . */ +#define IN6PTON_DELIM 0x10000000 +#define IN6PTON_NULL 0x20000000 /* first/tail */ +#define IN6PTON_UNKNOWN 0x40000000 + +static inline int digit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_DIGIT | (c - '0')); + return IN6PTON_UNKNOWN; +} + +static inline int xdigit2bin(char c, char delim) +{ + if (c == delim || c == '\0') + return IN6PTON_DELIM; + if (c == ':') + return IN6PTON_COLON_MASK; + if (c == '.') + return IN6PTON_DOT; + if (c >= '0' && c <= '9') + return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); + if (c >= 'a' && c <= 'f') + return (IN6PTON_XDIGIT | (c - 'a' + 10)); + if (c >= 'A' && c <= 'F') + return (IN6PTON_XDIGIT | (c - 'A' + 10)); + return IN6PTON_UNKNOWN; +} + +int in4_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s; + u8 *d; + u8 dbuf[4]; + int ret = 0; + int i; + int w = 0; + + if (srclen < 0) + srclen = strlen(src); + s = src; + d = dbuf; + i = 0; + while(1) { + int c; + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { + goto out; + } + if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { + if (w == 0) + goto out; + *d++ = w & 0xff; + w = 0; + i++; + if (c & IN6PTON_DELIM) { + if (i != 4) + goto out; + break; + } + goto cont; + } + w = (w * 10) + c; + if ((w & 0xffff) > 255) { + goto out; + } +cont: + if (i >= 4) + goto out; + s++; + srclen--; + } + ret = 1; + memcpy(dst, dbuf, sizeof(dbuf)); +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in4_pton); + +int in6_pton(const char *src, int srclen, + u8 *dst, + char delim, const char **end) +{ + const char *s, *tok = NULL; + u8 *d, *dc = NULL; + u8 dbuf[16]; + int ret = 0; + int i; + int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; + int w = 0; + + memset(dbuf, 0, sizeof(dbuf)); + + s = src; + d = dbuf; + if (srclen < 0) + srclen = strlen(src); + + printf("srclen=%d\n", srclen); + + while (1) { + int c; + + c = xdigit2bin(srclen > 0 ? *s : '\0', delim); + if (!(c & state)) + goto out; + if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { + /* process one 16-bit word */ + if (!(state & IN6PTON_NULL)) { + *d++ = (w >> 8) & 0xff; + *d++ = w & 0xff; + } + w = 0; + if (c & IN6PTON_DELIM) { + /* We've processed last word */ + break; + } + /* + * COLON_1 => XDIGIT + * COLON_2 => XDIGIT|DELIM + * COLON_1_2 => COLON_2 + */ + switch (state & IN6PTON_COLON_MASK) { + case IN6PTON_COLON_2: + dc = d; + state = IN6PTON_XDIGIT | IN6PTON_DELIM; + if (dc - dbuf >= sizeof(dbuf)) + state |= IN6PTON_NULL; + break; + case IN6PTON_COLON_1|IN6PTON_COLON_1_2: + state = IN6PTON_XDIGIT | IN6PTON_COLON_2; + break; + case IN6PTON_COLON_1: + state = IN6PTON_XDIGIT; + break; + case IN6PTON_COLON_1_2: + state = IN6PTON_COLON_2; + break; + default: + state = 0; + } + tok = s + 1; + goto cont; + } + + if (c & IN6PTON_DOT) { + ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); + if (ret > 0) { + d += 4; + break; + } + goto out; + } + + w = (w << 4) | (0xff & c); + state = IN6PTON_COLON_1 | IN6PTON_DELIM; + if (!(w & 0xf000)) { + state |= IN6PTON_XDIGIT; + } + if (!dc && d + 2 < dbuf + sizeof(dbuf)) { + state |= IN6PTON_COLON_1_2; + state &= ~IN6PTON_DELIM; + } + if (d + 2 >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); + } +cont: + if ((dc && d + 4 < dbuf + sizeof(dbuf)) || + d + 4 == dbuf + sizeof(dbuf)) { + state |= IN6PTON_DOT; + } + if (d >= dbuf + sizeof(dbuf)) { + state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); + } + s++; + srclen--; + } + + i = 15; d--; + + if (dc) { + while(d >= dc) + dst[i--] = *d--; + while(i >= dc - dbuf) + dst[i--] = 0; + while(i >= 0) + dst[i--] = *d--; + } else + memcpy(dst, dbuf, sizeof(dbuf)); + + ret = 1; +out: + if (end) + *end = s; + return ret; +} + +EXPORT_SYMBOL(in6_pton); From 1884f78c7a8b456c654338e3eb2874a99688ea10 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 19 Jun 2006 03:20:32 +0900 Subject: [PATCH 0611/1063] [NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to convert address string. Signed-off-by: YOSHIFUJI Hideaki --- net/netfilter/nf_conntrack_ftp.c | 96 ++------------------------------ 1 file changed, 4 insertions(+), 92 deletions(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 960972d225f9..9dccb4039889 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -111,101 +111,13 @@ static struct ftp_search { }, }; -/* This code is based on inet_pton() in glibc-2.2.4 */ static int get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) { - static const char xdigits[] = "0123456789abcdef"; - u_int8_t tmp[16], *tp, *endp, *colonp; - int ch, saw_xdigit; - u_int32_t val; - size_t clen = 0; - - tp = memset(tmp, '\0', sizeof(tmp)); - endp = tp + sizeof(tmp); - colonp = NULL; - - /* Leading :: requires some special handling. */ - if (*src == ':'){ - if (*++src != ':') { - DEBUGP("invalid \":\" at the head of addr\n"); - return 0; - } - clen++; - } - - saw_xdigit = 0; - val = 0; - while ((clen < dlen) && (*src != term)) { - const char *pch; - - ch = tolower(*src++); - clen++; - - pch = strchr(xdigits, ch); - if (pch != NULL) { - val <<= 4; - val |= (pch - xdigits); - if (val > 0xffff) - return 0; - - saw_xdigit = 1; - continue; - } - if (ch != ':') { - DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); - return 0; - } - - if (!saw_xdigit) { - if (colonp) { - DEBUGP("invalid location of \"::\".\n"); - return 0; - } - colonp = tp; - continue; - } else if (*src == term) { - DEBUGP("trancated IPv6 addr\n"); - return 0; - } - - if (tp + 2 > endp) - return 0; - *tp++ = (u_int8_t) (val >> 8) & 0xff; - *tp++ = (u_int8_t) val & 0xff; - - saw_xdigit = 0; - val = 0; - continue; - } - if (saw_xdigit) { - if (tp + 2 > endp) - return 0; - *tp++ = (u_int8_t) (val >> 8) & 0xff; - *tp++ = (u_int8_t) val & 0xff; - } - if (colonp != NULL) { - /* - * Since some memmove()'s erroneously fail to handle - * overlapping regions, we'll do the shift by hand. - */ - const int n = tp - colonp; - int i; - - if (tp == endp) - return 0; - - for (i = 1; i <= n; i++) { - endp[- i] = colonp[n - i]; - colonp[n - i] = 0; - } - tp = endp; - } - if (tp != endp || (*src != term)) - return 0; - - memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); - return clen; + int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), dst, term, &end); + if (ret > 0) + return (int)(end - src); + return 0; } static int try_number(const char *data, size_t dlen, u_int32_t array[], From d4f3e9b735c72823aab597bfa4860d184658a609 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Aug 2006 00:27:09 -0700 Subject: [PATCH 0612/1063] [NET] in6_pton: Kill errant printf statement. Signed-off-by: David S. Miller --- net/core/utils.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/utils.c b/net/core/utils.c index 5a06e8a72c17..2682490777de 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -306,8 +306,6 @@ int in6_pton(const char *src, int srclen, if (srclen < 0) srclen = strlen(src); - printf("srclen=%d\n", srclen); - while (1) { int c; From 298969727e7b855d53f3becfa92c055914082ec4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 25 Aug 2006 00:37:24 -0700 Subject: [PATCH 0613/1063] [TCP] tcp_lp: use BUILD_BUG_ON Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/tcp_lp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 649ebaed1df1..308fb7e071c5 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -327,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = { static int __init tcp_lp_register(void) { - BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_lp); } From 65e3d72654d9a33cdccd5c19777a5515ae9dd37d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 25 Aug 2006 00:38:03 -0700 Subject: [PATCH 0614/1063] [TCP] tcp_bic: use BUILD_BUG_ON Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/tcp_bic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b0134ab08379..5730333cd0ac 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } From acba48e1a3c95082af1e12c5efaaca3506103a92 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Aug 2006 15:46:46 -0700 Subject: [PATCH 0615/1063] [XFRM]: Respect priority in policy lookups. Even if we find an exact match in the hash table, we must inspect the inexact list to look for a match with a better priority. Noticed by Masahide NAKAMURA . Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b446ca31fecc..1cf3209cdf4b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -908,6 +908,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, xfrm_address_t *daddr, *saddr; struct hlist_node *entry; struct hlist_head *chain; + u32 priority = ~0U; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); @@ -919,21 +920,21 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_policy_match(pol, fl, type, family, dir)) { - xfrm_pol_hold(pol); + ret = pol; + priority = ret->priority; + break; + } + } + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir) && + pol->priority < priority) { ret = pol; break; } } - if (!ret) { - chain = &xfrm_policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir)) { - xfrm_pol_hold(pol); - ret = pol; - break; - } - } - } + if (ret) + xfrm_pol_hold(ret); read_unlock_bh(&xfrm_policy_lock); return ret; From 6c5eb6a50741b882fd99fbb8178942ca2f74b724 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 25 Aug 2006 16:04:29 -0700 Subject: [PATCH 0616/1063] [IPV6] ROUTE: Fix FWMARK support. - Add missing nla_policy entry. - type of fwmark is u32, not u8. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index aebd9e2b85a8..b4cd5c03b0b6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -27,7 +27,7 @@ struct fib6_rule struct rt6key src; struct rt6key dst; #ifdef CONFIG_IPV6_ROUTE_FWMARK - u8 fwmark; + u32 fwmark; #endif u8 tclass; }; @@ -140,6 +140,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_FWMARK] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; From 2613aad5ab28579687519918cdc353af0eed5a3f Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 25 Aug 2006 16:05:00 -0700 Subject: [PATCH 0617/1063] [IPV6] ROUTE: Fix size of fib6_rule_policy. It should not be RTA_MAX+1 but FRA_MAX+1. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4cd5c03b0b6..3d64c71f52de 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -135,7 +135,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 1; } -static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { +static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_IFNAME] = { .type = NLA_STRING }, [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, From cd9d742622fbc2190221e0b2aca80596bfd17733 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 25 Aug 2006 16:05:43 -0700 Subject: [PATCH 0618/1063] [IPV6] ROUTE: Add support for fwmask in routing rules. Add support for fwmark masks. A mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask. Based on patch for net/ipv4/fib_rules.c by Patrick McHardy . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 3d64c71f52de..ee4aa43ad973 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -28,6 +28,7 @@ struct fib6_rule struct rt6key dst; #ifdef CONFIG_IPV6_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif u8 tclass; }; @@ -128,7 +129,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; #ifdef CONFIG_IPV6_ROUTE_FWMARK - if (r->fwmark && (r->fwmark != fl->fl6_fwmark)) + if ((r->fwmark ^ fl->fl6_fwmark) / r->fwmask) return 0; #endif @@ -141,6 +142,7 @@ static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -174,8 +176,20 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, sizeof(struct in6_addr)); #ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule6->fwmark) { + /* + * if the mark value is non-zero, + * all bits are compared by default + * unless a mask is explicitly specified. + */ + rule6->fwmask = 0xFFFFFFFF; + } + } + + if (tb[FRA_FWMASK]) + rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif rule6->src.plen = frh->src_len; @@ -212,6 +226,9 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, #ifdef CONFIG_IPV6_ROUTE_FWMARK if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif return 1; @@ -238,6 +255,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, #ifdef CONFIG_IPV6_ROUTE_FWMARK if (rule6->fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); + + if (rule6->fwmask) + NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); #endif return 0; From 267935b197d2a6e6924f9de2841f0470bfe63acd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Aug 2006 16:07:48 -0700 Subject: [PATCH 0619/1063] [IPV6]: Fix build with fwmark disabled. Based upon a patch by Brian Haley. Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 649350bd9299..d83844d9499b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,7 +703,9 @@ void ip6_route_input(struct sk_buff *skb) .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, +#ifdef CONFIG_IPV6_ROUTE_FWMARK .fwmark = skb->nfmark, +#endif .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, }, }, From bbfb39cbf63829d1db607aa90cbdca557a3a131d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:10:14 -0700 Subject: [PATCH 0620/1063] [IPV4]: Add support for fwmark masks in routing rules Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 3 ++- net/ipv4/fib_rules.c | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 2987549d6044..4418c8d9d479 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,12 +34,13 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ + FRA_FWMARK, /* netfilter mark */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, FRA_UNUSED8, FRA_TABLE, /* Extended table id */ + FRA_FWMASK, /* mask for netfilter mark */ __FRA_MAX }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ce185ac6f260..280f424ca9c9 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -46,6 +46,7 @@ struct fib4_rule u32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif #ifdef CONFIG_NET_CLS_ROUTE u32 tclassid; @@ -160,7 +161,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->fwmark && (r->fwmark != fl->fl4_fwmark)) + if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) return 0; #endif @@ -183,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_SRC] = { .type = NLA_U32 }, [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -219,8 +221,17 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dst = nla_get_u32(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule4->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule4->fwmask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -256,6 +267,9 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, #ifdef CONFIG_IP_ROUTE_FWMARK if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -285,6 +299,9 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, #ifdef CONFIG_IP_ROUTE_FWMARK if (rule4->fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); + + if (rule4->fwmask || rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); #endif if (rule4->dst_len) From 88e91f290307d22ae88302e3a24f0c36905e8a6c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:11:08 -0700 Subject: [PATCH 0621/1063] [DECNET]: Add support for fwmark masks in routing rules Add support for fwmark masks. For compatibility a mask of 0xFFFFFFFF is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy Acked-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_rules.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 50e819edf8c7..63ad63dfd252 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -47,6 +47,7 @@ struct dn_fib_rule u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif }; @@ -116,6 +117,7 @@ static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_SRC] = { .type = NLA_U16 }, [FRA_DST] = { .type = NLA_U16 }, [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -130,7 +132,7 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->fwmark && (r->fwmark != fl->fld_fwmark)) + if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask) return 0; #endif @@ -168,8 +170,17 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, r->dst = nla_get_u16(tb[FRA_DST]); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { r->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (r->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + r->fwmask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + r->fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif r->src_len = frh->src_len; @@ -195,6 +206,9 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, #ifdef CONFIG_DECNET_ROUTE_FWMARK if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) @@ -237,6 +251,8 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, #ifdef CONFIG_DECNET_ROUTE_FWMARK if (r->fwmark) NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); + if (r->fwmask || r->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask); #endif if (r->dst_len) NLA_PUT_U16(skb, FRA_DST, r->dst); From b4e9b520ca5d07a37ea59648e7f50f478e7487a3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 25 Aug 2006 16:11:42 -0700 Subject: [PATCH 0622/1063] [NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + net/sched/cls_fw.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index bd2c5a2bbbf5..c3f01b3085a4 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -305,6 +305,7 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, __TCA_FW_MAX }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e6973d9b686d..e54acc6bcccd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -50,6 +50,7 @@ struct fw_head { struct fw_filter *ht[HTSIZE]; + u32 mask; }; struct fw_filter @@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb->nfmark; + u32 id = skb->nfmark & head->mask; #else u32 id = 0; #endif @@ -209,7 +210,9 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { + struct fw_head *head = (struct fw_head *)tp->root; struct tcf_exts e; + u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); @@ -232,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + goto errout; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + if (mask != head->mask) + goto errout; + } else if (head->mask != 0xFFFFFFFF) + goto errout; + tcf_exts_change(tp, &f->exts, &e); return 0; @@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, return -EINVAL; if (head == NULL) { + u32 mask = 0xFFFFFFFF; + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + return -EINVAL; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + } + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; + head->mask = mask; tcf_tree_lock(tp); tp->root = head; @@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char *b = skb->tail; struct rtattr *rta; @@ -351,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (strlen(f->indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); #endif /* CONFIG_NET_CLS_IND */ + if (head->mask != 0xFFFFFFFF) + RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; From 74975d40b16fd4bad24a2e2630dc7957d8cba013 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 25 Aug 2006 17:10:33 -0700 Subject: [PATCH 0623/1063] [TCP] Congestion control (modulo lp, bic): use BUILD_BUG_ON Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_hybla.c | 2 +- net/ipv4/tcp_vegas.c | 2 +- net/ipv4/tcp_veno.c | 2 +- net/ipv4/tcp_westwood.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 2be27980ca78..a60ef38d75c6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = { static int __init cubictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index fa3e1aad660c..c4fc811bf377 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 6edfe5e4510e..682e7d5b6f2f 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); return tcp_register_congestion_control(&htcp); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 7406e0c5fb8e..59e691d26f64 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 490360b5b4bf..a3b7aa015a2f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 5b2fe6d2aba9..ce57bf302f6c 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -212,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = { static int __init tcp_veno_register(void) { - BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_veno); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 5446312ffd2a..4f42a86c77f3 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } From 366e4adc0f9ef33f56c62f980a7d83775e64abd0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 26 Aug 2006 16:50:20 -0700 Subject: [PATCH 0624/1063] [IPV6]: Fix routing by fwmark Fix mark comparison, also dump the mask to userspace when the mask is zero, but the mark is not (in which case the mark is dumped, so the mask is needed to make sense of it). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ee4aa43ad973..2fbc71d90187 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -129,7 +129,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; #ifdef CONFIG_IPV6_ROUTE_FWMARK - if ((r->fwmark ^ fl->fl6_fwmark) / r->fwmask) + if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask) return 0; #endif @@ -256,7 +256,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, if (rule6->fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); - if (rule6->fwmask) + if (rule6->fwmask || rule6->fwmark) NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); #endif From ef047f5e1085d6393748d1ee27d6327905f098dc Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 1 Sep 2006 00:29:06 -0700 Subject: [PATCH 0625/1063] [NET]: Use BUILD_BUG_ON() for checking size of skb->cb. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 5 +---- net/ipv6/af_inet6.c | 7 ++----- net/netlink/af_netlink.c | 5 +---- net/unix/af_unix.c | 5 +---- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2e8927f4596..fdd89e37b9aa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1254,10 +1254,7 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - goto out; - } + BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); rc = proto_register(&tcp_prot, 1); if (rc) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fc9c8a99bea6..bf6e8aff19d4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -761,6 +761,8 @@ static int __init inet6_init(void) struct list_head *r; int err; + BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + #ifdef MODULE #if 0 /* FIXME --RR */ if (!mod_member_present(&__this_module, can_unload)) @@ -770,11 +772,6 @@ static int __init inet6_init(void) #endif #endif - if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "inet6_proto_init: size fault\n"); - return -EINVAL; - } - err = proto_register(&tcpv6_prot, 1); if (err) goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a80e4456e204..d56e0d21f919 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1762,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; -extern void netlink_skb_parms_too_large(void); - static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1775,8 +1773,7 @@ static int __init netlink_proto_init(void) if (err != 0) goto out; - if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) - netlink_skb_parms_too_large(); + BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); if (!nl_table) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index de6ec519272e..7c91c2024d49 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2060,10 +2060,7 @@ static int __init af_unix_init(void) int rc = -1; struct sk_buff *dummy_skb; - if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - goto out; - } + BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); rc = proto_register(&unix_proto, 1); if (rc != 0) { From 2a0109a707d2b0ae48f124d3be0fdf1715c0107a Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:15:35 -0700 Subject: [PATCH 0626/1063] [DCCP]: Shift sysctls into feat.h This shifts further sysctls into feat.h. No change in functionality - shifting code only. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/feat.h | 5 +++++ net/dccp/sysctl.c | 8 +------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/dccp/feat.h b/net/dccp/feat.h index b44c45504fb6..cee553d416ca 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -27,5 +27,10 @@ extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); extern int dccp_feat_init(struct dccp_minisock *dmsk); extern int dccp_feat_default_sequence_window; +extern int dccp_feat_default_rx_ccid; +extern int dccp_feat_default_tx_ccid; +extern int dccp_feat_default_ack_ratio; +extern int dccp_feat_default_send_ack_vector; +extern int dccp_feat_default_send_ndp_count; #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index c1ba9451bc3d..38bc157876f3 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -11,18 +11,12 @@ #include #include +#include "feat.h" #ifndef CONFIG_SYSCTL #error This file should not be compiled without CONFIG_SYSCTL defined #endif -extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; - static struct ctl_table dccp_default_table[] = { { .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, From 97e5848dd39e7e76bd6077735ebb5473763ab9c5 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:16:45 -0700 Subject: [PATCH 0627/1063] [DCCP]: Introduce tx buffering This adds transmit buffering to DCCP. I have tested with CCID2/3 and with loss and rate limiting. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 + net/dccp/dccp.h | 2 +- net/dccp/output.c | 88 ++++++++++++++++++++++++++++++++------------ net/dccp/proto.c | 16 +++----- 4 files changed, 72 insertions(+), 36 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 676333b9fad0..2d7671c92c0b 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -438,6 +438,7 @@ struct dccp_ackvec; * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackvec - rx half connection ack vector + * @dccps_xmit_timer - timer for when CCID is not ready to send */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -470,6 +471,7 @@ struct dccp_sock { enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; + struct timer_list dccps_xmit_timer; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a5c5475724c0..0a21be437ed3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); +extern void dccp_write_xmit(struct sock *sk, int block); extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); diff --git a/net/dccp/output.c b/net/dccp/output.c index 58669beee132..7102e3aed4ca 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_err) goto do_error; if (!*timeo) goto do_nonblock; @@ -234,37 +234,72 @@ do_interrupted: goto out; } -int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) +static void dccp_write_xmit_timer(unsigned long data) { + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); + else + dccp_write_xmit(sk, 0); + bh_unlock_sock(sk); + sock_put(sk); +} + +void dccp_write_xmit(struct sock *sk, int block) { - const struct dccp_sock *dp = dccp_sk(sk); - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + long timeo = 30000; /* If a packet is taking longer than 2 secs + we have other issues */ + + while ((skb = skb_peek(&sk->sk_write_queue))) { + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, skb->len); - if (err > 0) - err = dccp_wait_for_ccid(sk, skb, timeo); + if (err > 0) { + if (!block) { + sk_reset_timer(sk, &dp->dccps_xmit_timer, + msecs_to_jiffies(err)+jiffies); + break; + } else + err = dccp_wait_for_ccid(sk, skb, &timeo); + if (err) { + printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" + " %d\n", __FUNCTION__, err); + dump_stack(); + } + } - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; + skb_dequeue(&sk->sk_write_queue); + if (err == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; + dcb->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - } else - kfree_skb(skb); - - return err; + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + if (err) { + printk(KERN_CRIT "%s:err from " + "ccid_hc_tx_packet_sent %d\n", + __FUNCTION__, err); + dump_stack(); + } + } else + kfree(skb); + } } int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -426,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk) dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); icsk->icsk_retransmits = 0; + init_timer(&dp->dccps_xmit_timer); + dp->dccps_xmit_timer.data = (unsigned long)sk; + dp->dccps_xmit_timer.function = dccp_write_xmit_timer; } int dccp_connect(struct sock *sk) @@ -560,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; if (active) { + dccp_write_xmit(sk, 1); dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, prio)); + /* FIXME do we need a retransmit timer here? */ } else dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6f14bb5a28d4..962df0ea31aa 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -662,17 +662,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - rc = dccp_write_xmit(sk, skb, &timeo); - /* - * XXX we don't use sk_write_queue, so just discard the packet. - * Current plan however is to _use_ sk_write_queue with - * an algorith similar to tcp_sendmsg, where the main difference - * is that in DCCP we have to respect packet boundaries, so - * no coalescing of skbs. - * - * This bug was _quickly_ found & fixed by just looking at an OSTRA - * generated callgraph 8) -acme - */ + skb_queue_tail(&sk->sk_write_queue, skb); + dccp_write_xmit(sk,0); out_release: release_sock(sk); return rc ? : len; @@ -846,6 +837,7 @@ static int dccp_close_state(struct sock *sk) void dccp_close(struct sock *sk, long timeout) { + struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; int state; @@ -862,6 +854,8 @@ void dccp_close(struct sock *sk, long timeout) goto adjudge_to_death; } + sk_stop_timer(sk, &dp->dccps_xmit_timer); + /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the From ff5dfe736dd9f6c74b206aa77c0465dfd503bdb9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 26 Aug 2006 19:17:53 -0700 Subject: [PATCH 0628/1063] [NETLINK]: remove third bogus argument from NLA_PUT_FLAG This patch removes the 'value' argument from NLA_PUT_FLAG which is unused anyway. The documentation comment was already correct so it doesn't need an update :) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 47044da167c5..bcb27e3a312e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -828,7 +828,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) -#define NLA_PUT_FLAG(skb, attrtype, value) \ +#define NLA_PUT_FLAG(skb, attrtype) \ NLA_PUT(skb, attrtype, 0, NULL) #define NLA_PUT_MSECS(skb, attrtype, jiffies) \ From e5d679f33900c71d1a76ba07c5b04055abd34480 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 26 Aug 2006 19:25:52 -0700 Subject: [PATCH 0629/1063] [NET]: Use SLAB_PANIC Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/flow.c | 6 +----- net/core/neighbour.c | 12 ++++-------- net/core/skbuff.c | 9 ++------- net/decnet/dn_route.c | 11 +++-------- net/ipv4/inetpeer.c | 5 +---- net/ipv4/ipmr.c | 5 +---- net/ipv4/route.c | 10 +++------- net/ipv4/tcp.c | 4 +--- net/ipv6/ip6_fib.c | 4 +--- net/ipv6/route.c | 10 +++------- net/xfrm/xfrm_input.c | 4 +--- net/xfrm/xfrm_policy.c | 4 +--- 12 files changed, 22 insertions(+), 62 deletions(-) diff --git a/net/core/flow.c b/net/core/flow.c index 645241165e6c..f23e7e386543 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -343,12 +343,8 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - - if (!flow_cachep) - panic("NET: failed to allocate flow cache slab\n"); - flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c0a27407f445..a45bd2124d6b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1339,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) neigh_rand_reach_time(tbl->parms.base_reachable_time); if (!tbl->kmem_cachep) - tbl->kmem_cachep = kmem_cache_create(tbl->id, - tbl->entry_size, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!tbl->kmem_cachep) - panic("cannot create neighbour cache"); - + tbl->kmem_cachep = + kmem_cache_create(tbl->id, tbl->entry_size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL, NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a476f1956e5..c448c7f6fde2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2046,19 +2046,14 @@ void __init skb_init(void) skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!skbuff_fclone_cache) - panic("cannot create skbuff cache"); } EXPORT_SYMBOL(___pskb_trim); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c5daf3557c1f..dd0761e3d280 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1781,14 +1781,9 @@ void __init dn_route_init(void) { int i, goal, order; - dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", - sizeof(struct dn_route), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!dn_dst_ops.kmem_cachep) - panic("DECnet: Failed to allocate dn_dst_cache\n"); - + dn_dst_ops.kmem_cachep = + kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); init_timer(&dn_route_timer); dn_route_timer.function = dn_dst_check_expire; dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 03ff62ebcfeb..a675602ef295 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -126,12 +126,9 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!peer_cachep) - panic("cannot create inet_peer_cache"); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 98f0aa0d4216..ba49588da242 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1900,11 +1900,8 @@ void __init ip_mr_init(void) { mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!mrt_cachep) - panic("cannot allocate ip_mrt_cache"); - init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4d4cb85a16c..20ffe8e88c0f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3147,13 +3147,9 @@ int __init ip_rt_init(void) } #endif - ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", - sizeof(struct rtable), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!ipv4_dst_ops.kmem_cachep) - panic("IP: failed to allocate ip_dst_cache\n"); + ipv4_dst_ops.kmem_cachep = + kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e570db4d33c8..29e3d606db78 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2254,9 +2254,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!tcp_hashinfo.bind_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fbca60950b14..8fcae7a6510b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1472,10 +1472,8 @@ void __init fib6_init(void) { fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!fib6_node_kmem) - panic("cannot create fib6_nodes cache"); fib6_tables_init(); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d83844d9499b..ba1b3d11865e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2419,13 +2419,9 @@ void __init ip6_route_init(void) { struct proc_dir_entry *p; - ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", - sizeof(struct rt6_info), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!ip6_dst_ops.kmem_cachep) - panic("cannot create ip6_dst_cache"); - + ip6_dst_ops.kmem_cachep = + kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); fib6_init(); #ifdef CONFIG_PROC_FS p = proc_net_create("ipv6_route", 0, rt6_proc_info); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 891a6090cc09..dfc90bb1cf1f 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -82,8 +82,6 @@ void __init xfrm_input_init(void) { secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!secpath_cachep) - panic("XFRM: failed to allocate secpath_cache\n"); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1cf3209cdf4b..7db1c48537f0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1985,10 +1985,8 @@ static void __init xfrm_policy_init(void) xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!xfrm_dst_cache) - panic("XFRM: failed to allocate xfrm_dst_cache\n"); hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); From 6a28ec8cd0c6993a4ac0d52f4347f7ed077b5cac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Aug 2006 19:48:49 -0700 Subject: [PATCH 0630/1063] [NETFILTER]: Fix nf_conntrack_ftp.c build. Noticed by Adrian Bunk. Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_ftp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 9dccb4039889..0c17a5bd112b 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,8 @@ static struct ftp_search { static int get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) { - int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), dst, term, &end); + const char *end; + int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end); if (ret > 0) return (int)(end - src); return 0; From 25030a7f9eeab2dcefff036469e0e2b4f956198f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Aug 2006 20:06:05 -0700 Subject: [PATCH 0631/1063] [UDP]: Unify UDPv4 and UDPv6 ->get_port() This patch creates one common function which is called by udp_v4_get_port() and udp_v6_get_port(). As a result, * duplicated code is removed * udp_port_rover and local port lookup can now be removed from udp.h * further savings follow since the same function will be used by UDP-Litev4 and UDP-Litev6 In contrast to the patch sent in response to Yoshifujis comments (fixed by this variant), the code below also removes the EXPORT_SYMBOL(udp_port_rover), since udp_port_rover can now remain local to net/ipv4/udp.c. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/udp.h | 18 +-------- net/ipv4/udp.c | 94 +++++++++++++++++++++++++++++------------------ net/ipv6/udp.c | 76 +------------------------------------- 3 files changed, 63 insertions(+), 125 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 766fba1369ce..c490a0f662ac 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -30,25 +30,9 @@ #define UDP_HTABLE_SIZE 128 -/* udp.c: This needs to be shared by v4 and v6 because the lookup - * and hashing code needs to work with different AF's yet - * the port space is shared. - */ extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; -extern int udp_port_rover; - -static inline int udp_lport_inuse(u16 num) -{ - struct sock *sk; - struct hlist_node *node; - - sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) - if (inet_sk(sk)->num == num) - return 1; - return 0; -} /* Note: this must match 'valbool' in sock_setsockopt */ #define UDP_CSUM_NOXMIT 1 @@ -63,6 +47,8 @@ extern struct proto udp_prot; struct sk_buff; +extern int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(struct sock *, struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 514c1e9ae810..7552b50bcd84 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -118,14 +118,34 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -/* Shared by v4/v6 udp. */ +/* Shared by v4/v6 udp_get_port */ int udp_port_rover; -static int udp_v4_get_port(struct sock *sk, unsigned short snum) +static inline int udp_lport_inuse(u16 num) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + if (inet_sk(sk)->num == num) + return 1; + return 0; +} + +/** + * udp_get_port - common port lookup for IPv4 and IPv6 + * + * @sk: socket struct in question + * @snum: port number to look up + * @saddr_comp: AF-dependent comparison of bound local IP addresses + */ +int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(struct sock *sk1, struct sock *sk2)) { struct hlist_node *node; + struct hlist_head *head; struct sock *sk2; - struct inet_sock *inet = inet_sk(sk); + int error = 1; write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -137,11 +157,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct hlist_head *list; int size; - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { + head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -149,12 +168,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) goto gotit; } size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; + sk_for_each(sk2, node, head) + if (++size < best_size_so_far) { + best_size_so_far = size; + best = result; + } } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -170,38 +188,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) gotit: udp_port_rover = snum = result; } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet2 = inet_sk(sk2); + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && - (!sk2->sk_reuse || !sk->sk_reuse)) + sk_for_each(sk2, node, head) + if (inet_sk(sk2)->num == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_cmp)(sk, sk2) ) goto fail; - } } - inet->num = snum; + inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_add_node(sk, h); + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } - write_unlock_bh(&udp_hash_lock); - return 0; - + error = 0; fail: write_unlock_bh(&udp_hash_lock); - return 1; + return error; } +static inline int ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); +} + +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + + static void udp_v4_hash(struct sock *sk) { BUG(); @@ -1596,7 +1620,7 @@ EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_port_rover); +EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b9cc55ccb000..9662561701d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -61,81 +61,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -/* Grrr, addr_type already calculated by caller, but I don't want - * to add some silly "cookie" argument to this method just for that. - */ -static int udp_v6_get_port(struct sock *sk, unsigned short snum) +static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { - struct sock *sk2; - struct hlist_node *node; - - write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - - if (udp_port_rover > sysctl_local_port_range[1] || - udp_port_rover < sysctl_local_port_range[0]) - udp_port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = udp_port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - struct hlist_head *list; - - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - goto gotit; - } - size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; - } - result = best; - for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) - break; - } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; -gotit: - udp_port_rover = snum = result; - } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - if (inet_sk(sk2)->num == snum && - sk2 != sk && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!sk2->sk_reuse || !sk->sk_reuse) && - ipv6_rcv_saddr_equal(sk, sk2)) - goto fail; - } - } - - inet_sk(sk)->num = snum; - if (sk_unhashed(sk)) { - sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); - sock_prot_inc_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); - return 0; - -fail: - write_unlock_bh(&udp_hash_lock); - return 1; + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } static void udp_v6_hash(struct sock *sk) From bed53ea7fef37820b7c92ad74feff1b817c6aae3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Aug 2006 20:06:49 -0700 Subject: [PATCH 0632/1063] [UDP]: Mark udp_port_rover static. It is not referenced outside of net/ipv4/udp.c any longer. Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7552b50bcd84..aa1823050b00 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -118,8 +118,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -/* Shared by v4/v6 udp_get_port */ -int udp_port_rover; +static int udp_port_rover; static inline int udp_lport_inuse(u16 num) { From e3b4eadbea77ecb3c3a74d1bc81b392f454c7f2e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Aug 2006 20:10:15 -0700 Subject: [PATCH 0633/1063] [UDP]: saddr_cmp function should take const socket pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also kills a warning while building ipv6: net/ipv6/udp.c: In function ‘udp_v6_get_port’: net/ipv6/udp.c:66: warning: passing argument 3 of ‘udp_get_port’ from incompatible pointer type Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- net/ipv4/udp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index c490a0f662ac..db0c05f67546 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -48,7 +48,7 @@ extern struct proto udp_prot; struct sk_buff; extern int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(struct sock *, struct sock *)); + int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa1823050b00..77e265d7bb8f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,7 +139,7 @@ static inline int udp_lport_inuse(u16 num) * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(struct sock *sk1, struct sock *sk2)) + int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) { struct hlist_node *node; struct hlist_head *head; @@ -210,7 +210,7 @@ fail: return error; } -static inline int ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2) +static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); From a5531a5d852008be40811496029012f4ad3093d1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 26 Aug 2006 20:11:47 -0700 Subject: [PATCH 0634/1063] [NETLINK]: Improve string attribute validation Introduces a new attribute type NLA_NUL_STRING to support NUL terminated strings. Attributes of this kind require to carry a terminating NUL within the maximum specified in the policy. The `old' NLA_STRING which is not required to be NUL terminated is extended to provide means to specify a maximum length of the string. Aims at easing the pain with using nla_strlcpy() on temporary buffers. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 15 +++++++++---- net/netlink/attr.c | 49 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index bcb27e3a312e..11dc2e7f679a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -167,6 +167,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NUL_STRING, __NLA_TYPE_MAX, }; @@ -175,21 +176,27 @@ enum { /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC - * @minlen: Minimal length of payload required to be available + * @len: Type specific length of payload * * Policies are defined as arrays of this struct, the array must be * accessible by attribute type up to the highest identifier to be expected. * + * Meaning of `len' field: + * NLA_STRING Maximum length of string + * NLA_NUL_STRING Maximum length of string (excluding NUL) + * NLA_FLAG Unused + * All other Exact length of attribute payload + * * Example: * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { * [ATTR_FOO] = { .type = NLA_U16 }, - * [ATTR_BAR] = { .type = NLA_STRING }, - * [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) }, + * [ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ }, + * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, * }; */ struct nla_policy { u16 type; - u16 minlen; + u16 len; }; /** diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 136e529e5780..004139557e09 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), - [NLA_STRING] = 1, [NLA_NESTED] = NLA_HDRLEN, }; @@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, struct nla_policy *policy) { struct nla_policy *pt; - int minlen = 0; + int minlen = 0, attrlen = nla_len(nla); if (nla->nla_type <= 0 || nla->nla_type > maxtype) return 0; @@ -37,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - if (pt->minlen) - minlen = pt->minlen; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; - if (pt->type == NLA_FLAG && nla_len(nla) > 0) - return -ERANGE; + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; - if (nla_len(nla) < minlen) - return -ERANGE; + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } return 0; } From 5176f91ea83f1a59eba4dba88634a4729d51d1ac Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 26 Aug 2006 20:13:18 -0700 Subject: [PATCH 0635/1063] [NETLINK]: Make use of NLA_STRING/NLA_NUL_STRING attribute validation Converts existing NLA_STRING attributes to use the new validation features, saving a couple of temporary buffers. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/fib_rules.c | 8 +------- net/core/rtnetlink.c | 9 ++++----- net/decnet/dn_rules.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv6/fib6_rules.c | 6 +++--- net/ipv6/route.c | 2 +- net/netlink/genetlink.c | 10 ++++------ 9 files changed, 17 insertions(+), 26 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 7b2e9bb1a605..a99d87d82b7f 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -161,9 +161,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) goto errout; - if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ) - goto errout; - rule = kzalloc(ops->rule_size, GFP_KERNEL); if (rule == NULL) { err = -ENOMEM; @@ -177,10 +174,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct net_device *dev; rule->ifindex = -1; - if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME], - IFNAMSIZ) >= IFNAMSIZ) - goto errout_free; - + nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); dev = __dev_get_by_name(rule->ifname); if (dev) rule->ifindex = dev->ifindex; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8f225499e32e..0ebcf8488e99 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -371,8 +371,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) } static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { - [IFLA_IFNAME] = { .type = NLA_STRING }, - [IFLA_MAP] = { .minlen = sizeof(struct rtnl_link_ifmap) }, + [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, + [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, @@ -392,9 +392,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (err < 0) goto errout; - if (tb[IFLA_IFNAME] && - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ) >= IFNAMSIZ) - return -EINVAL; + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); err = -EINVAL; ifm = nlmsg_data(nlh); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 63ad63dfd252..3e0c882c90bf 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -112,7 +112,7 @@ errout: } static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .type = NLA_U16 }, [FRA_DST] = { .type = NLA_U16 }, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0487677729cf..8e8d1f17d77a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -85,7 +85,7 @@ static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, [IFA_ANYCAST] = { .type = NLA_U32 }, - [IFA_LABEL] = { .type = NLA_STRING }, + [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d0abeab16e66..cfb527c060e4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -462,7 +462,7 @@ struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, - [RTA_MULTIPATH] = { .minlen = sizeof(struct rtnexthop) }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, [RTA_MP_ALGO] = { .type = NLA_U32 }, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 280f424ca9c9..52b2adae4f22 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -179,7 +179,7 @@ static struct fib_table *fib_empty_table(void) } static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .type = NLA_U32 }, [FRA_DST] = { .type = NLA_U32 }, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 2fbc71d90187..34f5bfaddfc2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -137,10 +137,10 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) } static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [FRA_PRIORITY] = { .type = NLA_U32 }, - [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, - [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_SRC] = { .len = sizeof(struct in6_addr) }, + [FRA_DST] = { .len = sizeof(struct in6_addr) }, [FRA_FWMARK] = { .type = NLA_U32 }, [FRA_FWMASK] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ba1b3d11865e..75f4bb9611ce 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1865,7 +1865,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) } static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { - [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) }, + [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d32599116c56..3ac942cdb677 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -455,7 +455,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, - [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, + [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, + .len = GENL_NAMSIZ - 1 }, }; static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) @@ -470,12 +471,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { - char name[GENL_NAMSIZ]; - - if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], - GENL_NAMSIZ) >= GENL_NAMSIZ) - goto errout; + char *name; + name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); res = genl_family_find_byname(name); } From 33cc48966827165e49de1cb8ff4fb57c127d4be0 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 28 Aug 2006 13:19:30 -0700 Subject: [PATCH 0636/1063] [IPV6] ROUTE: Fix dst reference counting in ip6_pol_route_lookup(). In ip6_pol_route_lookup(), when we finish backtracking at the top-level root entry, we need to hold it. Bug noticed by Mitsuru Chinen . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 75f4bb9611ce..d6b4b4f48d18 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -510,8 +510,8 @@ restart: rt = fn->leaf; rt = rt6_device_match(rt, fl->oif, flags); BACKTRACK(&fl->fl6_src); - dst_hold(&rt->u.dst); out: + dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); rt->u.dst.lastuse = jiffies; From 0719bdf1b5e7eb0d9c3c73ebbd9c9d5d382bb9e1 Mon Sep 17 00:00:00 2001 From: Benoit Boissinot Date: Mon, 28 Aug 2006 17:50:37 -0700 Subject: [PATCH 0637/1063] [NETFILTER]: xt_CONNMARK.c build fix net/netfilter/xt_CONNMARK.c: In function 'target': net/netfilter/xt_CONNMARK.c:59: warning: implicit declaration of function 'nf_conntrack_event_cache' The warning is due to the following .config: CONFIG_IP_NF_CONNTRACK=m CONFIG_IP_NF_CONNTRACK_MARK=y # CONFIG_IP_NF_CONNTRACK_EVENTS is not set CONFIG_IP_NF_CONNTRACK_NETLINK=m This change was introduced by: http://www.kernel.org/git/?p=linux/kernel/git/davem/net-2.6.19.git;a=commit;h=76e4b41009b8a2e9dd246135cf43c7fe39553aa5 Proposed solution (based on the define in include/net/netfilter/nf_conntrack_compat.h: Signed-off-by: Benoit Boissinot Acked-by: Pablo Neira Ayuso Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/netfilter/xt_CONNMARK.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 0e4249ddc17b..6ccb45ee0880 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -53,7 +53,7 @@ target(struct sk_buff **pskb, newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; if (newmark != *ctmark) { *ctmark = newmark; -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) ip_conntrack_event_cache(IPCT_MARK, *pskb); #else nf_conntrack_event_cache(IPCT_MARK, *pskb); @@ -65,7 +65,7 @@ target(struct sk_buff **pskb, ((*pskb)->nfmark & markinfo->mask); if (*ctmark != newmark) { *ctmark = newmark; -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) ip_conntrack_event_cache(IPCT_MARK, *pskb); #else nf_conntrack_event_cache(IPCT_MARK, *pskb); From def42ff4dd6f54ebcf78192579a8ff1f81d8e2e8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:57:56 -0700 Subject: [PATCH 0638/1063] [IPV4]: Make struct in_addr::s_addr __be32 There will be relatively small increase in sparse endian warnings, but this (and sin_port) patch is a first step to make networking code endian clean. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/in.h b/include/linux/in.h index 94f557fa4636..9a9d5dd32e73 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -52,7 +52,7 @@ enum { /* Internet address. */ struct in_addr { - __u32 s_addr; + __be32 s_addr; }; #define IP_TOS 1 From cd360007a0eb8cbf17c006cca42aa884d33f96be Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 28 Aug 2006 23:58:32 -0700 Subject: [PATCH 0639/1063] [IPV4]: Make struct sockaddr_in::sin_port __be16 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/in.h b/include/linux/in.h index 9a9d5dd32e73..bcaca8399aed 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -177,7 +177,7 @@ struct in_pktinfo #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { sa_family_t sin_family; /* Address family */ - unsigned short int sin_port; /* Port number */ + __be16 sin_port; /* Port number */ struct in_addr sin_addr; /* Internet address */ /* Pad to size of `struct sockaddr'. */ From 07317621d004e8e6967f2dac8562825267e56135 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:17 -0700 Subject: [PATCH 0640/1063] [NETFILTER] bridge: code rearrangement for clarity Cleanup and rearrangement for better style and clarity: Split the function nf_bridge_maybe_copy_header into two pieces Move copy portion out of line. Use Ethernet header size macros. Use header file to handle CONFIG_NETFILTER_BRIDGE differences Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 26 +++++++------------------- net/bridge/br_forward.c | 5 +---- net/bridge/br_netfilter.c | 27 +++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 427c67ff89e9..274fe4b33155 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -47,26 +47,12 @@ enum nf_br_hook_priorities { /* Only used in br_forward.c */ -static inline -int nf_bridge_maybe_copy_header(struct sk_buff *skb) +extern int nf_bridge_copy_header(struct sk_buff *skb); +static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) { - int err; - - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - err = skb_cow(skb, 18); - if (err) - return err; - memcpy(skb->data - 18, skb->nf_bridge->data, 18); - skb_push(skb, 4); - } else { - err = skb_cow(skb, 16); - if (err) - return err; - memcpy(skb->data - 16, skb->nf_bridge->data, 16); - } - } - return 0; + if (skb->nf_bridge) + return nf_bridge_copy_header(skb); + return 0; } /* This is called by the IP fragmenting code and it ensures there is @@ -90,6 +76,8 @@ struct bridge_skb_cb { }; extern int brnf_deferred_hooks; +#else +#define nf_bridge_maybe_copy_header(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 864fbbc7b24d..191b861e5e53 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -38,13 +38,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { -#ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ if (nf_bridge_maybe_copy_header(skb)) kfree_skb(skb); - else -#endif - { + else { skb_push(skb, ETH_HLEN); dev_queue_xmit(skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 05b3de888243..b498efcfe451 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -127,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) static inline void nf_bridge_save_header(struct sk_buff *skb) { - int header_size = 16; + int header_size = ETH_HLEN; if (skb->protocol == htons(ETH_P_8021Q)) - header_size = 18; + header_size += VLAN_HLEN; memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); } +/* + * When forwarding bridge frames, we save a copy of the original + * header before processing. + */ +int nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + int header_size = ETH_HLEN; + + if (skb->protocol == htons(ETH_P_8021Q)) + header_size += VLAN_HLEN; + + err = skb_cow(skb, header_size); + if (err) + return err; + + memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); + + if (skb->protocol == htons(ETH_P_8021Q)) + __skb_push(skb, VLAN_HLEN); + return 0; +} + /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ From 9bcfcaf5e9cc887eb39236e43bdbe4b4b2572229 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:48:57 -0700 Subject: [PATCH 0641/1063] [NETFILTER] bridge: simplify nf_bridge_pad Do some simple optimization on the nf_bridge_pad() function and don't use magic constants. Eliminate a double call and the #ifdef'd code for CONFIG_BRIDGE_NETFILTER. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netfilter_bridge.h | 16 +++++----------- net/ipv4/ip_output.c | 15 +++++++-------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 274fe4b33155..9a4dd11af86e 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -5,9 +5,8 @@ */ #include -#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER) #include -#endif +#include /* Bridge Hooks */ /* After promisc drops, checksum checks. */ @@ -57,16 +56,10 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -static inline -int nf_bridge_pad(struct sk_buff *skb) +static inline int nf_bridge_pad(const struct sk_buff *skb) { - if (skb->protocol == __constant_htons(ETH_P_IP)) - return 0; - if (skb->nf_bridge) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) - return 4; - } - return 0; + return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q)) + ? VLAN_HLEN : 0; } struct bridge_skb_cb { @@ -78,6 +71,7 @@ struct bridge_skb_cb { extern int brnf_deferred_hooks; #else #define nf_bridge_maybe_copy_header(skb) (0) +#define nf_bridge_pad(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 81b2795a4c20..97aee76fb746 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -426,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs; + unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; @@ -556,14 +556,13 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ -#ifdef CONFIG_BRIDGE_NETFILTER /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); - mtu -= nf_bridge_pad(skb); -#else - ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); -#endif + * we need to make room for the encapsulating header + */ + pad = nf_bridge_pad(skb); + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); + mtu -= pad; + /* * Fragment the datagram. */ From 8394e9b2faf539f82470b36c86f0485cab5278bd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 29 Aug 2006 17:49:31 -0700 Subject: [PATCH 0642/1063] [NETFILTER] bridge: debug message fixes If CONFIG_NETFILTER_DEBUG is enabled, it shouldn't change the actions of the filtering. The message about skb->dst being NULL is commonly triggered by dhclient, so it is useless. Make sure all messages end in newline. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index b498efcfe451..cf80dd0e896d 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -718,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; -#ifdef CONFIG_NETFILTER_DEBUG - /* Sometimes we get packets with NULL ->dst here (for example, - * running a dhcp client daemon triggers this). This should now - * be fixed, but let's keep the check around. */ - if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); - return NF_ACCEPT; - } -#endif - nf_bridge = skb->nf_bridge; nf_bridge->physoutdev = skb->dev; realindev = nf_bridge->physindev; @@ -809,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, * keep the check just to be sure... */ if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " - "bad mac.raw pointer."); + "bad mac.raw pointer.\n"); goto print_error; } #endif @@ -827,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); + printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); goto print_error; } #endif @@ -864,6 +854,7 @@ print_error: } printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, skb->data); + dump_stack(); return NF_ACCEPT; #endif } From fc747e82b40ea50a62eb2aef55bedd4465607cb0 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Tue, 29 Aug 2006 17:50:19 -0700 Subject: [PATCH 0643/1063] [DCCP]: Tidyup CCID3 list handling As Arnaldo Carvalho de Melo points out I should be using list_entry in case the structure changes in future. Current code functions but is reliant on position and requires type cast. Noticed when doing this that I have one more variable than I needed so removing that also. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 090bc39e8199..195aa9566228 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -900,7 +900,7 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_li_hist_entry *next, *head; + struct dccp_li_hist_entry *head; u64 seq_temp; if (list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -908,15 +908,15 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) return; - next = (struct dccp_li_hist_entry *) - hcrx->ccid3hcrx_li_hist.next; - next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + head = list_entry(hcrx->ccid3hcrx_li_hist.next, + struct dccp_li_hist_entry, dccplih_node); + head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); } else { struct dccp_li_hist_entry *entry; struct list_head *tail; - head = (struct dccp_li_hist_entry *) - hcrx->ccid3hcrx_li_hist.next; + head = list_entry(hcrx->ccid3hcrx_li_hist.next, + struct dccp_li_hist_entry, dccplih_node); /* FIXME win count check removed as was wrong */ /* should make this check with receive history */ /* and compare there as per section 10.2 of RFC4342 */ From 99f59ed073d3c1b890690064ab285a201dea2e35 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:53:48 -0700 Subject: [PATCH 0644/1063] [NetLabel]: Correctly initialize the NetLabel fields. Fix a problem where the NetLabel specific fields of the sk_security_struct structure were not being initialized early enough in some cases. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- security/selinux/hooks.c | 6 +++ security/selinux/include/selinux_netlabel.h | 18 +++++++++ security/selinux/ss/services.c | 45 ++++++++++++++++++++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 180b26b97d2d..5a66c4c09f7a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -281,6 +281,8 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; + selinux_netlbl_sk_security_init(ssec, family); + return 0; } @@ -3585,6 +3587,8 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) newssec->sid = ssec->sid; newssec->peer_sid = ssec->peer_sid; + + selinux_netlbl_sk_clone_security(ssec, newssec); } static void selinux_sk_getsecid(struct sock *sk, u32 *secid) @@ -3648,6 +3652,8 @@ static void selinux_inet_csk_clone(struct sock *newsk, new socket in sync, but we don't have the isec available yet. So we will wait until sock_graft to do it, by which time it will have been created and available. */ + + selinux_netlbl_sk_security_init(newsksec, req->rsk_ops->family); } static void selinux_req_classify_flow(const struct request_sock *req, diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h index 88c463eef1e1..d885d880540e 100644 --- a/security/selinux/include/selinux_netlabel.h +++ b/security/selinux/include/selinux_netlabel.h @@ -39,6 +39,10 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, struct avc_audit_data *ad); u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock); u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb); +void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec, + int family); +void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec, + struct sk_security_struct *newssec); int __selinux_netlbl_inode_permission(struct inode *inode, int mask); /** @@ -115,6 +119,20 @@ static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) return SECSID_NULL; } +static inline void selinux_netlbl_sk_security_init( + struct sk_security_struct *ssec, + int family) +{ + return; +} + +static inline void selinux_netlbl_sk_clone_security( + struct sk_security_struct *ssec, + struct sk_security_struct *newssec) +{ + return; +} + static inline int selinux_netlbl_inode_permission(struct inode *inode, int mask) { diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 910afa1ffc31..835b485b2afd 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2422,6 +2422,45 @@ netlbl_socket_setsid_return: return rc; } +/** + * selinux_netlbl_sk_security_init - Setup the NetLabel fields + * @ssec: the sk_security_struct + * @family: the socket family + * + * Description: + * Called when a new sk_security_struct is allocated to initialize the NetLabel + * fields. + * + */ +void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec, + int family) +{ + if (family == PF_INET) + ssec->nlbl_state = NLBL_REQUIRE; + else + ssec->nlbl_state = NLBL_UNSET; +} + +/** + * selinux_netlbl_sk_clone_security - Copy the NetLabel fields + * @ssec: the original sk_security_struct + * @newssec: the cloned sk_security_struct + * + * Description: + * Clone the NetLabel specific sk_security_struct fields from @ssec to + * @newssec. + * + */ +void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec, + struct sk_security_struct *newssec) +{ + newssec->sclass = ssec->sclass; + if (ssec->nlbl_state != NLBL_UNSET) + newssec->nlbl_state = NLBL_REQUIRE; + else + newssec->nlbl_state = NLBL_UNSET; +} + /** * selinux_netlbl_socket_post_create - Label a socket using NetLabel * @sock: the socket to label @@ -2440,10 +2479,11 @@ int selinux_netlbl_socket_post_create(struct socket *sock, struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; struct sk_security_struct *sksec = sock->sk->sk_security; + sksec->sclass = isec->sclass; + if (sock_family != PF_INET) return 0; - sksec->sclass = isec->sclass; sksec->nlbl_state = NLBL_REQUIRE; return selinux_netlbl_socket_setsid(sock, sid); } @@ -2463,12 +2503,13 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; struct sk_security_struct *sksec = sk->sk_security; + sksec->sclass = isec->sclass; + if (sk->sk_family != PF_INET) return; sksec->nlbl_state = NLBL_REQUIRE; sksec->peer_sid = sksec->sid; - sksec->sclass = isec->sclass; /* Try to set the NetLabel on the socket to save time later, if we fail * here we will pick up the pieces in later calls to From 1b7f775209bbee6b993587bae69acb9fc12ceb17 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:54:17 -0700 Subject: [PATCH 0645/1063] [NetLabel]: remove unused function prototypes Removed some older function prototypes for functions that no longer exist. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index c7175e725804..5aed72ab652b 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -200,15 +200,9 @@ static inline int cipso_v4_cache_add(const struct sk_buff *skb, #ifdef CONFIG_NETLABEL void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); -int cipso_v4_socket_setopt(struct socket *sock, - unsigned char *opt, - u32 opt_len); int cipso_v4_socket_setattr(const struct socket *sock, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); -int cipso_v4_socket_getopt(const struct socket *sock, - unsigned char **opt, - u32 *opt_len); int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, From c1b14c0a46232246f61d3157bac1201e1e102227 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:54:41 -0700 Subject: [PATCH 0646/1063] [NetLabel]: Comment corrections. Fix some incorrect comments. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 835b485b2afd..4f7642c7337e 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2617,7 +2617,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, } /** - * selinux_netlbl_socket_peersid - Return the peer SID of a connected socket + * selinux_netlbl_socket_getpeersec_stream - Return the connected peer's SID * @sock: the socket * * Description: From 7b3bbb926f4b3dd3a007dcf8dfa00203f52cb58d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:55:11 -0700 Subject: [PATCH 0647/1063] [NetLabel]: Cleanup ebitmap_import() Rewrite ebitmap_import() so it is a bit cleaner and easier to read. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- security/selinux/ss/ebitmap.c | 36 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index 4b915eb60c45..cfed1d30fa6a 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -145,29 +145,28 @@ int ebitmap_import(const unsigned char *src, struct ebitmap *dst) { size_t src_off = 0; + size_t node_limit; struct ebitmap_node *node_new; struct ebitmap_node *node_last = NULL; - size_t iter; - size_t iter_bit; - size_t iter_limit; + u32 i_byte; + u32 i_bit; unsigned char src_byte; - do { - iter_limit = src_len - src_off; - if (iter_limit >= sizeof(MAPTYPE)) { + while (src_off < src_len) { + if (src_len - src_off >= sizeof(MAPTYPE)) { if (*(MAPTYPE *)&src[src_off] == 0) { src_off += sizeof(MAPTYPE); continue; } - iter_limit = sizeof(MAPTYPE); + node_limit = sizeof(MAPTYPE); } else { - iter = src_off; - src_byte = 0; - do { - src_byte |= src[iter++]; - } while (iter < src_len && src_byte == 0); + for (src_byte = 0, i_byte = src_off; + i_byte < src_len && src_byte == 0; + i_byte++) + src_byte |= src[i_byte]; if (src_byte == 0) break; + node_limit = src_len - src_off; } node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC); @@ -176,24 +175,21 @@ int ebitmap_import(const unsigned char *src, return -ENOMEM; } node_new->startbit = src_off * 8; - iter = 0; - do { + for (i_byte = 0; i_byte < node_limit; i_byte++) { src_byte = src[src_off++]; - iter_bit = iter++ * 8; - while (src_byte != 0) { + for (i_bit = i_byte * 8; src_byte != 0; i_bit++) { if (src_byte & 0x80) - node_new->map |= MAPBIT << iter_bit; - iter_bit++; + node_new->map |= MAPBIT << i_bit; src_byte <<= 1; } - } while (iter < iter_limit); + } if (node_last != NULL) node_last->next = node_new; else dst->node = node_new; node_last = node_new; - } while (src_off < src_len); + } if (likely(node_last != NULL)) dst->highbit = node_last->startbit + MAPSIZE; From e448e931309e703f51d71a557973c620ff12fbda Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:55:38 -0700 Subject: [PATCH 0648/1063] [NetLabel]: uninline selinux_netlbl_inode_permission() Uninline the selinux_netlbl_inode_permission() at the request of Andrew Morton. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- security/selinux/include/selinux_netlabel.h | 35 +-------------------- security/selinux/ss/services.c | 33 +++++++++++++------ 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h index d885d880540e..d69ec650cdbe 100644 --- a/security/selinux/include/selinux_netlabel.h +++ b/security/selinux/include/selinux_netlabel.h @@ -43,40 +43,7 @@ void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec, int family); void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec, struct sk_security_struct *newssec); - -int __selinux_netlbl_inode_permission(struct inode *inode, int mask); -/** - * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled - * @inode: the file descriptor's inode - * @mask: the permission mask - * - * Description: - * Looks at a file's inode and if it is marked as a socket protected by - * NetLabel then verify that the socket has been labeled, if not try to label - * the socket now with the inode's SID. Returns zero on success, negative - * values on failure. - * - */ -static inline int selinux_netlbl_inode_permission(struct inode *inode, - int mask) -{ - int rc = 0; - struct inode_security_struct *isec; - struct sk_security_struct *sksec; - - if (!S_ISSOCK(inode->i_mode)) - return 0; - - isec = inode->i_security; - sksec = SOCKET_I(inode)->sk->sk_security; - down(&isec->sem); - if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && - (mask & (MAY_WRITE | MAY_APPEND)))) - rc = __selinux_netlbl_inode_permission(inode, mask); - up(&isec->sem); - - return rc; -} +int selinux_netlbl_inode_permission(struct inode *inode, int mask); #else static inline void selinux_netlbl_cache_invalidate(void) { diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 4f7642c7337e..27ee28ccf266 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2544,24 +2544,39 @@ u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) } /** - * __selinux_netlbl_inode_permission - Label a socket using NetLabel + * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled * @inode: the file descriptor's inode * @mask: the permission mask * * Description: - * Try to label a socket with the inode's SID using NetLabel. Returns zero on - * success, negative values on failure. + * Looks at a file's inode and if it is marked as a socket protected by + * NetLabel then verify that the socket has been labeled, if not try to label + * the socket now with the inode's SID. Returns zero on success, negative + * values on failure. * */ -int __selinux_netlbl_inode_permission(struct inode *inode, int mask) +int selinux_netlbl_inode_permission(struct inode *inode, int mask) { int rc; - struct socket *sock = SOCKET_I(inode); - struct sk_security_struct *sksec = sock->sk->sk_security; + struct inode_security_struct *isec; + struct sk_security_struct *sksec; + struct socket *sock; - lock_sock(sock->sk); - rc = selinux_netlbl_socket_setsid(sock, sksec->sid); - release_sock(sock->sk); + if (!S_ISSOCK(inode->i_mode)) + return 0; + + sock = SOCKET_I(inode); + isec = inode->i_security; + sksec = sock->sk->sk_security; + down(&isec->sem); + if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && + (mask & (MAY_WRITE | MAY_APPEND)))) { + lock_sock(sock->sk); + rc = selinux_netlbl_socket_setsid(sock, sksec->sid); + release_sock(sock->sk); + } else + rc = 0; + up(&isec->sem); return rc; } From 7a0e1d602288370801c353221c6a938eab925053 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:56:04 -0700 Subject: [PATCH 0649/1063] [NetLabel]: add some missing #includes to various header files Add some missing include files to the NetLabel related header files. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 2 ++ include/net/netlabel.h | 1 + net/netlabel/netlabel_domainhash.h | 4 ++++ net/netlabel/netlabel_user.h | 5 +++-- security/selinux/include/selinux_netlabel.h | 9 +++++++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5aed72ab652b..59406e0dc5b2 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include /* known doi values */ diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 7cae730832c7..fc2b72fc7e07 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -31,6 +31,7 @@ #define _NETLABEL_H #include +#include #include #include diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 9217863ce0d3..99a2287de246 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -32,6 +32,10 @@ #ifndef _NETLABEL_DOMAINHASH_H #define _NETLABEL_DOMAINHASH_H +#include +#include +#include + /* Domain hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_DOMHSH_BITSIZE 7 diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index ccf237b3a128..385a6c7488c6 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -31,11 +31,12 @@ #ifndef _NETLABEL_USER_H #define _NETLABEL_USER_H +#include #include #include -#include -#include +#include #include +#include /* NetLabel NETLINK helper functions */ diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h index d69ec650cdbe..ecab4bddaaf4 100644 --- a/security/selinux/include/selinux_netlabel.h +++ b/security/selinux/include/selinux_netlabel.h @@ -27,6 +27,15 @@ #ifndef _SELINUX_NETLABEL_H_ #define _SELINUX_NETLABEL_H_ +#include +#include +#include +#include +#include + +#include "avc.h" +#include "objsec.h" + #ifdef CONFIG_NETLABEL void selinux_netlbl_cache_invalidate(void); int selinux_netlbl_socket_post_create(struct socket *sock, From 28a7b327b8cc8ea35662d360d3d11d60195debc9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 30 Aug 2006 15:03:07 -0700 Subject: [PATCH 0650/1063] [PKT_SCHED] act_simple.c: make struct simp_hash_info static This patch makes the needlessly global struct simp_hash_info static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- net/sched/act_simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8c1ab8ad8fa6..901571a67707 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -28,7 +28,7 @@ static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; static u32 simp_idx_gen; static DEFINE_RWLOCK(simp_lock); -struct tcf_hashinfo simp_hash_info = { +static struct tcf_hashinfo simp_hash_info = { .htab = tcf_simp_ht, .hmask = SIMP_TAB_MASK, .lock = &simp_lock, From 7a42c2175703f54a3640f25dc078c8190a4f904e Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Thu, 31 Aug 2006 15:03:02 -0700 Subject: [PATCH 0651/1063] [NET]: Change somaxconn sysctl to __read_mostly Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index d6f27ed9ba6c..1bc4167e0da8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1337,7 +1337,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) * ready for listening. */ -int sysctl_somaxconn = SOMAXCONN; +int sysctl_somaxconn __read_mostly = SOMAXCONN; asmlinkage long sys_listen(int fd, int backlog) { From 18adaf067cf013fc2690d3830eba99ff800795b4 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Thu, 31 Aug 2006 15:03:36 -0700 Subject: [PATCH 0652/1063] [AF_UNIX]: Change max_dgram_qlen sysctl to __read_mostly Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7c91c2024d49..b43a27828df5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -117,7 +117,7 @@ #include #include -int sysctl_unix_max_dgram_qlen = 10; +int sysctl_unix_max_dgram_qlen __read_mostly = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); From 3015d5d4e5b15eddea272a697e83391100581932 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 31 Aug 2006 15:04:30 -0700 Subject: [PATCH 0653/1063] [RTNETLINK]: Fix typo causing wrong skb to be freed A typo introduced by myself which leads to freeing the skb containing the netlink message when it should free the newly allocated skb for the reply. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0ebcf8488e99..63b882ac288a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -596,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); if (err <= 0) { - kfree_skb(skb); + kfree_skb(nskb); goto errout; } From ff9b5e0f08cb650d113eef0c654f931c0a7ae730 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Aug 2006 15:11:02 -0700 Subject: [PATCH 0654/1063] [TCP]: Fix rcv mss estimate for LRO By passing a Linux-generated TSO packet straight back into Linux, Xen becomes our first LRO user :) Unfortunately, there is at least one spot in our stack that needs to be changed to cope with this. The receive MSS estimate is computed from the raw packet size. This is broken if the packet is GSO/LRO. Fortunately the real MSS can be found in gso_size so we simply need to use that if it is non-zero. Real LRO NICs should of course set the gso_size field in future. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index caf3c41dcc8c..511b738f118a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ - len = skb->len; + len = skb_shinfo(skb)->gso_size ?: skb->len; if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = len; } else { From a9917c06652165fe4eeb9ab7a5d1e0674e90e508 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 31 Aug 2006 15:14:32 -0700 Subject: [PATCH 0655/1063] [XFRM] STATE: Fix flusing with hash mask. This is a minor fix about transformation state flushing for net-2.6.19. Please apply it. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4341795eb244..9f63edd39346 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -384,7 +384,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < xfrm_state_hmask; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: From dc435e6dac1439340eaeceef84022c4e4749796d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 31 Aug 2006 15:18:49 -0700 Subject: [PATCH 0656/1063] [IPV6] MIP6: Fix to update IP6CB when cloned skbuff is received at HAO. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 084f78c3479b..88c96b10684c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -233,9 +233,14 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) if (skb_cloned(skb)) { struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + struct inet6_skb_parm *opt2; + if (skb2 == NULL) goto discard; + opt2 = IP6CB(skb2); + memcpy(opt2, opt, sizeof(*opt2)); + kfree_skb(skb); /* update all variable using below by copied skbuff */ @@ -296,6 +301,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); + opt = IP6CB(skb); #ifdef CONFIG_IPV6_MIP6 opt->nhoff = dstbuf; #else @@ -690,6 +696,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp) if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { skb = *skbp; skb->h.raw += (skb->h.raw[1]+1)<<3; + opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; } From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Thu, 31 Aug 2006 15:28:39 -0700 Subject: [PATCH 0657/1063] [NET]: Fix sk->sk_filter field access Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev --- include/linux/filter.h | 13 +++++++------ include/net/sock.h | 34 ++++++++++++++++----------------- net/core/filter.c | 8 ++++---- net/core/sock.c | 22 +++++++++------------ net/dccp/ipv6.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/packet/af_packet.c | 43 ++++++++++++++++++------------------------ net/sctp/input.c | 2 +- 10 files changed, 61 insertions(+), 71 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index c6cb8f095088..91b2e3b9251e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) diff --git a/include/net/sock.h b/include/net/sock.h index 337ebec84c70..edd4d73ce7f5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; + struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - if (sk->sk_filter) { - struct sk_filter *filter; - - if (needlock) - bh_lock_sock(sk); - - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - - if (needlock) - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } + rcu_read_unlock_bh(); + return err; } @@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) * Remove a filter from a socket and release its resources. */ +static inline void sk_filter_rcu_free(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + kfree(fp); +} + static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf6..6732782a5a40 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - spin_lock_bh(&sk->sk_lock.slock); - old_fp = sk->sk_filter; - sk->sk_filter = fp; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); fp = old_fp; } diff --git a/net/core/sock.c b/net/core/sock.c index cfaf09039b02..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - /* It would be deadlock, if sock_queue_rcv_skb is used - with socket lock! We assume that users of this - function are lock free. - */ - err = sk_filter(sk, skb, 1); + err = sk_filter(sk, skb); if (err) goto out; @@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; @@ -606,15 +602,15 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - spin_lock_bh(&sk->sk_lock.slock); - filter = sk->sk_filter; + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); if (filter) { - sk->sk_filter = NULL; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_release(sk, filter); + rcu_read_unlock_bh(); break; } - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_unlock_bh(); ret = -ENONET; break; @@ -884,10 +880,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { sk_filter_release(sk, filter); - sk->sk_filter = NULL; + rcu_assign_pointer(sk->sk_filter, NULL); } sock_disable_timestamp(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f9c5e12d7038..7a47399cf31f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 86f7f3b28e70..72ecc6e62ec4 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb, 0); + err = sk_filter(sk, skb); if (err) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23b46e36b147..39b179856082 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1104,7 +1104,7 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b18918f3011..2546fc9f0a78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* @@ -1232,7 +1232,7 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 300215bdbf46..f4ccb90e6739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -427,21 +427,24 @@ out_unlock: } #endif -static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) +static inline int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned *snaplen) { struct sk_filter *filter; + int err = 0; - bh_lock_sock(sk); - filter = sk->sk_filter; - /* - * Our caller already checked that filter != NULL but we need to - * verify that under bh_lock_sock() to be safe - */ - if (likely(filter != NULL)) - res = sk_run_filter(skb, filter->insns, filter->len); - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) { + err = sk_run_filter(skb, filter->insns, filter->len); + if (!err) + err = -EPERM; + else if (*snaplen > err) + *snaplen = err; + } + rcu_read_unlock_bh(); - return res; + return err; } /* @@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; diff --git a/net/sctp/input.c b/net/sctp/input.c index 8a34d95602ce..03f65de75d88 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb, 1)) + if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ From eb878e84575fbce21d2edb079eada78bfa27023d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 31 Aug 2006 17:42:59 -0700 Subject: [PATCH 0658/1063] [IPSEC]: output mode to take an xfrm state as input param Expose IPSEC modes output path to take an xfrm state as input param. This makes it consistent with the input mode processing (which already takes the xfrm state as a param). Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_mode_transport.c | 4 +--- net/ipv4/xfrm4_mode_tunnel.c | 3 +-- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_mode_ro.c | 3 +-- net/ipv6/xfrm6_mode_transport.c | 3 +-- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- net/ipv6/xfrm6_output.c | 2 +- 8 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0acabf2a0a8f..4d6dc627df9b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,7 +285,7 @@ extern void xfrm_put_type(struct xfrm_type *type); struct xfrm_mode { int (*input)(struct xfrm_state *x, struct sk_buff *skb); - int (*output)(struct sk_buff *skb); + int (*output)(struct xfrm_state *x,struct sk_buff *skb); struct module *owner; unsigned int encap; diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index a9e6b3dd19c9..92676b7e4034 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -21,9 +21,8 @@ * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_transport_output(struct sk_buff *skb) +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x; struct iphdr *iph; int ihl; @@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb) ihl = iph->ihl * 4; skb->h.raw += ihl; - x = skb->dst->xfrm; skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 13cafbe56ce3..e23c21d31a53 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_tunnel_output(struct sk_buff *skb) +static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; int flags; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 5fd115f0c547..04403fb01a58 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index c11c335312f9..6031c16d46ca 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -43,9 +43,8 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_ro_output(struct sk_buff *skb) +static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index a5dce216024d..3a4b39b12bad 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -25,9 +25,8 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_transport_output(struct sk_buff *skb) +static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 8af79be2edca..5e7d8a7d6414 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_tunnel_output(struct sk_buff *skb) +static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct ipv6hdr *iph, *top_iph; int dsfield; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index db58104e710b..c260ea104c52 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -65,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; From d1d9facfd1b326e0df587c96f0ee55de2ae9f946 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 1 Sep 2006 00:32:12 -0700 Subject: [PATCH 0659/1063] [XFRM]: remove xerr_idxp from __xfrm_policy_check() It seems that during the MIPv6 respin, some code which was originally conditionally compiled around CONFIG_XFRM_ADVANCED was accidently left in after the config option was removed. This patch removes an extraneous pointer (xerr_idxp) which is no longer needed. Signed-off-by: James Morris Acked-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7db1c48537f0..537854fe47ca 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1514,8 +1514,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { - if (idxp) - *idxp = k; + *idxp = k; return 1; } } @@ -1534,7 +1533,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1; - int *xerr_idxp = &xerr_idx; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; @@ -1560,7 +1558,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, xfrm_policy_lookup); if (!pol) { - if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); return 0; } @@ -1619,13 +1617,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family); if (k < 0) { - if (k < -1 && xerr_idxp) - *xerr_idxp = -(2+k); + if (k < -1) + /* "-2 - errored_index" returned */ + xerr_idx = -(2+k); goto reject; } } - if (secpath_has_nontransport(sp, k, xerr_idxp)) + if (secpath_has_nontransport(sp, k, &xerr_idx)) goto reject; xfrm_pols_put(pols, npols); From 78e5b8916e7db119850f57ce8548fbb9767078fc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Sep 2006 20:35:36 -0700 Subject: [PATCH 0660/1063] [RTNETLINK]: Fix netdevice name corruption When changing a device by ifindex without including a IFLA_IFNAME attribute, the ifname variable contains random garbage and is used to change the device name. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 63b882ac288a..d8e25e08cb7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -394,6 +394,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + else + ifname[0] = '\0'; err = -EINVAL; ifm = nlmsg_data(nlh); From eb328111efde7bca782f340fe805756039ec6a0c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:01:59 -0700 Subject: [PATCH 0661/1063] [GENL]: Provide more information to userspace about registered genl families Additionaly exports the following information when providing the list of registered generic netlink families: - protocol version - header size - maximum number of attributes - list of available operations including - id - flags - avaiability of policy and doit/dumpit function libnl HEAD provides a utility to read this new information: 0x0010 nlctrl version 1 hdrsize 0 maxattr 6 op GETFAMILY (0x03) [POLICY,DOIT,DUMPIT] 0x0011 NLBL_MGMT version 1 hdrsize 0 maxattr 0 op unknown (0x02) [DOIT] op unknown (0x03) [DOIT] .... Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/genetlink.h | 18 ++++++++++++++++++ include/net/genetlink.h | 2 -- net/netlink/genetlink.c | 40 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 84f12a41dc01..9049dc65ae51 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -16,6 +16,8 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) +#define GENL_ADMIN_PERM 0x01 + /* * List of reserved static generic netlink identifiers: */ @@ -43,9 +45,25 @@ enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP_DOIT, + CTRL_ATTR_OP_DUMPIT, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + #endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 97d6d3aba9d2..4a38d85e4e25 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -27,8 +27,6 @@ struct genl_family struct list_head family_list; /* private */ }; -#define GENL_ADMIN_PERM 0x01 - /** * struct genl_info - receiving information * @snd_seq: sending sequence number diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3ac942cdb677..49bc2db7982b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len) static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { + struct nlattr *nla_ops; + struct genl_ops *ops; void *hdr; + int idx = 1; hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, family->version); @@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + if (ops->policy) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); + + if (ops->doit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); + + if (ops->dumpit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); return genlmsg_end(skb, hdr); @@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + if (chains_to_skip != 0) + genl_lock(); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: + if (chains_to_skip != 0) + genl_unlock(); + cb->args[0] = i; cb->args[1] = n; From 9c1ea148ad8bb06538b43908891afedebeaf361b Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 18 Sep 2006 00:03:41 -0700 Subject: [PATCH 0662/1063] [BRIDGE]: Change sysctl tunables to __read_mostly Change some bridge sysctl tunables to __read_mostly. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index cf80dd0e896d..ac181be13d83 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -53,10 +53,10 @@ #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables = 1; -static int brnf_call_ip6tables = 1; -static int brnf_call_arptables = 1; -static int brnf_filter_vlan_tagged = 1; +static int brnf_call_iptables __read_mostly = 1; +static int brnf_call_ip6tables __read_mostly = 1; +static int brnf_call_arptables __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 1; #else #define brnf_filter_vlan_tagged 1 #endif From 4cbf1cae9f08c76ed92700090a69a5b1f1f6a982 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 18 Sep 2006 00:04:22 -0700 Subject: [PATCH 0663/1063] [SCTP]: Change globals to __read_mostly Change sctp globals to __read_mostly. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5692ef5485d3..d9dd4c47bc29 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -61,7 +61,7 @@ #include /* Global data structures. */ -struct sctp_globals sctp_globals; +struct sctp_globals sctp_globals __read_mostly; struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; From 94aec08ea426903a3fb3cafd4d8b900cd50df702 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Mon, 18 Sep 2006 00:05:22 -0700 Subject: [PATCH 0664/1063] [NETFILTER]: Change tunables to __read_mostly Change some netfilter tunables to __read_mostly. Also fixed some incorrect file reference comments while I was in there. (this will be my last __read_mostly patch unless someone points out something else that needs it) Signed-off-by: Brian Haley Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 6 ++--- .../netfilter/ip_conntrack_proto_generic.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 14 +++++------ net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 24 +++++++++---------- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 4 ++-- net/ipv4/netfilter/ip_conntrack_standalone.c | 4 ++-- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- .../netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 ++--- net/netfilter/nf_conntrack_core.c | 6 ++--- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 14 +++++------ net/netfilter/nf_conntrack_proto_tcp.c | 24 +++++++++---------- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_standalone.c | 2 +- 19 files changed, 62 insertions(+), 62 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index aa459177c3f8..5da25ad50309 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -66,13 +66,13 @@ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size = 0; -int ip_conntrack_max; +unsigned int ip_conntrack_htable_size __read_mostly = 0; +int ip_conntrack_max __read_mostly; struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep __read_mostly; static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid; +unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index f891308b5e4c..36f2b5e5d80a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -12,7 +12,7 @@ #include #include -unsigned int ip_ct_generic_timeout = 600*HZ; +unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 23f1c504586d..09c40ebe3345 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -21,7 +21,7 @@ #include #include -unsigned int ip_ct_icmp_timeout = 30*HZ; +unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 2d3612cd5f18..b908a4842e18 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; -static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; +static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; static const unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 9de81ff645d5..75a7237eb8c1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int ip_ct_tcp_be_liberal = 0; +int ip_ct_tcp_be_liberal __read_mostly = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose = 3; +int ip_ct_tcp_loose __read_mostly = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int ip_ct_tcp_max_retrans = 3; +int ip_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int ip_ct_tcp_timeout_established = 5 DAYS; -unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index e58e52f14553..d0e8a16970ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -18,8 +18,8 @@ #include #include -unsigned int ip_ct_udp_timeout = 30*HZ; -unsigned int ip_ct_udp_timeout_stream = 180*HZ; +unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; +unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 7a9fa04a467a..3f5d495b853b 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -534,7 +534,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = { /* Sysctl support */ -int ip_conntrack_checksum = 1; +int ip_conntrack_checksum __read_mostly = 1; #ifdef CONFIG_SYSCTL @@ -563,7 +563,7 @@ extern unsigned int ip_ct_udp_timeout_stream; /* From ip_conntrack_proto_icmp.c */ extern unsigned int ip_ct_icmp_timeout; -/* From ip_conntrack_proto_icmp.c */ +/* From ip_conntrack_proto_generic.c */ extern unsigned int ip_ct_generic_timeout; /* Log invalid packets of a given protocol */ diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 276a964ee6cf..80060cbe4a07 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -53,7 +53,7 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); static int peer_pid; static unsigned int copy_range; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 663a73ee3f2f..790f00d500c3 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -25,7 +25,7 @@ #include #include -unsigned long nf_ct_icmp_timeout = 30*HZ; +unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c01c126224e2..d322e8395794 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -57,7 +57,7 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); static int peer_pid; static unsigned int copy_range; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c2ab38ff46af..e5e53fff9e38 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -335,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { /* From nf_conntrack_proto_icmpv6.c */ extern unsigned int nf_ct_icmpv6_timeout; -/* From nf_conntrack_frag6.c */ +/* From nf_conntrack_reasm.c */ extern unsigned int nf_ct_frag6_timeout; extern unsigned int nf_ct_frag6_low_thresh; extern unsigned int nf_ct_frag6_high_thresh; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ef18a7b7014b..34d447208ffd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -33,7 +33,7 @@ #include #include -unsigned long nf_ct_icmpv6_timeout = 30*HZ; +unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 7a4e4c2e3197..bf93c1ea6be9 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -54,9 +54,9 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh = 256*1024; -unsigned int nf_ct_frag6_low_thresh = 192*1024; -unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; +unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; +unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; +unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; struct nf_ct_frag6_skb_cb { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8f2261965a68..3b64dbee6620 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -77,12 +77,12 @@ LIST_HEAD(nf_conntrack_expect_list); struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; static LIST_HEAD(helpers); -unsigned int nf_conntrack_htable_size = 0; -int nf_conntrack_max; +unsigned int nf_conntrack_htable_size __read_mostly = 0; +int nf_conntrack_max __read_mostly; struct list_head *nf_conntrack_hash; static kmem_cache_t *nf_conntrack_expect_cachep; struct nf_conn nf_conntrack_untracked; -unsigned int nf_ct_log_invalid; +unsigned int nf_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 46bc27e2756d..26408bb0955b 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -17,7 +17,7 @@ #include #include -unsigned int nf_ct_generic_timeout = 600*HZ; +unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9bd8a7877fd5..af568777372b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -64,13 +64,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int nf_ct_sctp_timeout_closed = 10 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int nf_ct_sctp_timeout_established = 5 DAYS; -static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; +static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; +static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; static unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 308d2abd7ee5..9fc0ee61f92a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -57,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int nf_ct_tcp_be_liberal = 0; +int nf_ct_tcp_be_liberal __read_mostly = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose = 3; +int nf_ct_tcp_loose __read_mostly = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int nf_ct_tcp_max_retrans = 3; +int nf_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -92,19 +92,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int nf_ct_tcp_timeout_established = 5 DAYS; -unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int nf_ct_tcp_timeout_close = 10 SECS; +unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; +unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; static unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index d36e03139e8b..d28981cf9af5 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -27,8 +27,8 @@ #include #include -unsigned int nf_ct_udp_timeout = 30*HZ; -unsigned int nf_ct_udp_timeout_stream = 180*HZ; +unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; +unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ef836699962..9a1de0ca475b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -428,7 +428,7 @@ static struct file_operations ct_cpu_seq_fops = { /* Sysctl support */ -int nf_conntrack_checksum = 1; +int nf_conntrack_checksum __read_mostly = 1; #ifdef CONFIG_SYSCTL From 461d8837faac141f4676bf451b3339d0e48656d1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:09:49 -0700 Subject: [PATCH 0665/1063] [IPV6] address: Convert address addition to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 72 ++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc9cff3426c4..52ba96a64a1f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2868,6 +2868,29 @@ restart: spin_unlock_bh(&addrconf_verify_lock); } +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +{ + struct in6_addr *pfx = NULL; + + if (addr) + pfx = nla_data(addr); + + if (local) { + if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) + pfx = NULL; + else + pfx = nla_data(local); + } + + return pfx; +} + +static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { + [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, + [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, +}; + static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { @@ -2945,46 +2968,41 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; - __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; + u32 valid_lft, preferred_lft; + int err; - pfx = NULL; - if (rta[IFA_ADDRESS-1]) { - if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_ADDRESS-1]); - } - if (rta[IFA_LOCAL-1]) { - if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || - (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_LOCAL-1]); - } + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); if (pfx == NULL) return -EINVAL; - if (rta[IFA_CACHEINFO-1]) { + if (tb[IFA_CACHEINFO]) { struct ifa_cacheinfo *ci; - if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) - return -EINVAL; - ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + + ci = nla_data(tb[IFA_CACHEINFO]); valid_lft = ci->ifa_valid; - prefered_lft = ci->ifa_prefered; + preferred_lft = ci->ifa_prefered; + } else { + preferred_lft = INFINITY_LIFE_TIME; + valid_lft = INFINITY_LIFE_TIME; } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - int ret; - ret = inet6_addr_modify(ifm->ifa_index, pfx, - prefered_lft, valid_lft); - if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) - return ret; + err = inet6_addr_modify(ifm->ifa_index, pfx, + preferred_lft, valid_lft); + if (err == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return err; } return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - prefered_lft, valid_lft); - + preferred_lft, valid_lft); } /* Maximum length of ifa_cacheinfo attributes */ From b933f7166ba376967f88a598ff04256f6d1b0b21 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:10:19 -0700 Subject: [PATCH 0666/1063] [IPV6] address: Convert address deletion to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 52ba96a64a1f..61627036eb2b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2894,22 +2894,17 @@ static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; + int err; - pfx = NULL; - if (rta[IFA_ADDRESS-1]) { - if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_ADDRESS-1]); - } - if (rta[IFA_LOCAL-1]) { - if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || - (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) - return -EINVAL; - pfx = RTA_DATA(rta[IFA_LOCAL-1]); - } + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); if (pfx == NULL) return -EINVAL; From 1b29fc2c8bf42d8fc5310f3770d7fd7ddf4386c0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:10:50 -0700 Subject: [PATCH 0667/1063] [IPV6] address: Convert address lookup to new netlink api Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 58 +++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 61627036eb2b..b2c38b3edb39 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3234,58 +3234,54 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, - struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) { - struct rtattr **rta = arg; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE; int err; - if (rta[IFA_ADDRESS-1]) { - if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) - return -EINVAL; - addr = RTA_DATA(rta[IFA_ADDRESS-1]); - } - if (rta[IFA_LOCAL-1]) { - if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || - (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) - return -EINVAL; - addr = RTA_DATA(rta[IFA_LOCAL-1]); - } - if (addr == NULL) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + goto errout; + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + if (addr == NULL) { + err = -EINVAL; + goto errout; + } + + ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = __dev_get_by_index(ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) - return -EADDRNOTAVAIL; - - if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { - err = -ENOBUFS; - goto out; + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + err = -EADDRNOTAVAIL; + goto errout; + } + + if ((skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto errout_ifa; } - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { - err = -EMSGSIZE; - goto out_free; + kfree_skb(skb); + goto errout_ifa; } err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -out: +errout_ifa: in6_ifa_put(ifa); +errout: return err; -out_free: - kfree_skb(skb); - goto out; } static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) From 85486af00b620ebe26fe0fa72172c115667a2fba Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:11:24 -0700 Subject: [PATCH 0668/1063] [IPV6] address: Add put_cacheinfo() to dump struct cacheinfo Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 72 ++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b2c38b3edb39..d546f0e74530 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3000,6 +3000,21 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) preferred_lft, valid_lft); } +static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, + unsigned long tstamp, u32 preferred, u32 valid) +{ + struct ifa_cacheinfo ci; + + ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.ifa_prefered = preferred; + ci.ifa_valid = valid; + + return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +} + /* Maximum length of ifa_cacheinfo attributes */ #define INET6_IFADDR_RTA_SPACE \ RTA_SPACE(16) /* IFA_ADDRESS */ + \ @@ -3010,8 +3025,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; unsigned char *b = skb->tail; + u32 preferred, valid; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); ifm = NLMSG_DATA(nlh); @@ -3028,23 +3043,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, ifm->ifa_index = ifa->idev->dev->ifindex; RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); if (!(ifa->flags&IFA_F_PERMANENT)) { - ci.ifa_prefered = ifa->prefered_lft; - ci.ifa_valid = ifa->valid_lft; - if (ci.ifa_prefered != INFINITY_LIFE_TIME) { + preferred = ifa->prefered_lft; + valid = ifa->valid_lft; + if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - ci.ifa_prefered -= tval; - if (ci.ifa_valid != INFINITY_LIFE_TIME) - ci.ifa_valid -= tval; + preferred -= tval; + if (valid != INFINITY_LIFE_TIME) + valid -= tval; } } else { - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; + preferred = INFINITY_LIFE_TIME; + valid = INFINITY_LIFE_TIME; } - ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + + if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + goto rtattr_failure; + nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -3059,7 +3073,6 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; unsigned char *b = skb->tail; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); @@ -3072,15 +3085,11 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, ifm->ifa_scope = RT_SCOPE_SITE; ifm->ifa_index = ifmca->idev->dev->ifindex; RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); - ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + + if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) + goto rtattr_failure; + nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -3095,7 +3104,6 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - struct ifa_cacheinfo ci; unsigned char *b = skb->tail; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); @@ -3108,15 +3116,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, ifm->ifa_scope = RT_SCOPE_SITE; ifm->ifa_index = ifaca->aca_idev->dev->ifindex; RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); - ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ - * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ - * 100 / HZ); - ci.ifa_prefered = INFINITY_LIFE_TIME; - ci.ifa_valid = INFINITY_LIFE_TIME; - RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + + if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) + goto rtattr_failure; + nlh->nlmsg_len = skb->tail - b; return skb->len; From 101bb229691c438bce4d7f13006494dd4de6910a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:11:52 -0700 Subject: [PATCH 0669/1063] [IPV6] address: Add put_ifaddrmsg() and rt_scope() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 77 +++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d546f0e74530..ca7ecf2f3e82 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3000,6 +3000,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) preferred_lft, valid_lft); } +static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, + u8 scope, int ifindex) +{ + struct ifaddrmsg *ifm; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = prefixlen; + ifm->ifa_flags = flags; + ifm->ifa_scope = scope; + ifm->ifa_index = ifindex; +} + static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, unsigned long tstamp, u32 preferred, u32 valid) { @@ -3015,6 +3028,18 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); } +static inline int rt_scope(int ifa_scope) +{ + if (ifa_scope & IFA_HOST) + return RT_SCOPE_HOST; + else if (ifa_scope & IFA_LINK) + return RT_SCOPE_LINK; + else if (ifa_scope & IFA_SITE) + return RT_SCOPE_SITE; + else + return RT_SCOPE_UNIVERSE; +} + /* Maximum length of ifa_cacheinfo attributes */ #define INET6_IFADDR_RTA_SPACE \ RTA_SPACE(16) /* IFA_ADDRESS */ + \ @@ -3023,24 +3048,14 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; u32 preferred, valid; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = ifa->prefix_len; - ifm->ifa_flags = ifa->flags; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ifa->scope&IFA_HOST) - ifm->ifa_scope = RT_SCOPE_HOST; - else if (ifa->scope&IFA_LINK) - ifm->ifa_scope = RT_SCOPE_LINK; - else if (ifa->scope&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifa->idev->dev->ifindex; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), + ifa->idev->dev->ifindex); + RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); if (!(ifa->flags&IFA_F_PERMANENT)) { preferred = ifa->prefered_lft; @@ -3071,19 +3086,16 @@ rtattr_failure: static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, u32 pid, u32 seq, int event, u16 flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; + u8 scope = RT_SCOPE_UNIVERSE; + int ifindex = ifmca->idev->dev->ifindex; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = 128; - ifm->ifa_flags = IFA_F_PERMANENT; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifmca->idev->dev->ifindex; + if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) + scope = RT_SCOPE_SITE; + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, @@ -3102,19 +3114,16 @@ rtattr_failure: static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 pid, u32 seq, int event, unsigned int flags) { - struct ifaddrmsg *ifm; struct nlmsghdr *nlh; unsigned char *b = skb->tail; + u8 scope = RT_SCOPE_UNIVERSE; + int ifindex = ifaca->aca_idev->dev->ifindex; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = 128; - ifm->ifa_flags = IFA_F_PERMANENT; - ifm->ifa_scope = RT_SCOPE_UNIVERSE; - if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE) - ifm->ifa_scope = RT_SCOPE_SITE; - ifm->ifa_index = ifaca->aca_idev->dev->ifindex; + if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) + scope = RT_SCOPE_SITE; + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, From 0ab6803bc90a8ee5c860ef09334b30007d1746be Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:12:35 -0700 Subject: [PATCH 0670/1063] [IPV6] address: Convert address dumping to new netlink api Replaces INET6_IFADDR_RTA_SPACE with a new function calculating the total required message size for all address messages. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 79 +++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ca7ecf2f3e82..75a69bac82a8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3040,23 +3040,27 @@ static inline int rt_scope(int ifa_scope) return RT_SCOPE_UNIVERSE; } -/* Maximum length of ifa_cacheinfo attributes */ -#define INET6_IFADDR_RTA_SPACE \ - RTA_SPACE(16) /* IFA_ADDRESS */ + \ - RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ +static inline int inet6_ifaddr_msgsize(void) +{ + return nlmsg_total_size(sizeof(struct ifaddrmsg) + + nla_total_size(16) + + nla_total_size(sizeof(struct ifa_cacheinfo)) + + 128); +} static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; - unsigned char *b = skb->tail; u32 preferred, valid; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); if (!(ifa->flags&IFA_F_PERMANENT)) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; @@ -3071,72 +3075,57 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) - goto rtattr_failure; + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || + put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + return nlmsg_cancel(skb, nlh); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, u32 pid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; - unsigned char *b = skb->tail; u8 scope = RT_SCOPE_UNIVERSE; int ifindex = ifmca->idev->dev->ifindex; if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); - - if (put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, + if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - goto rtattr_failure; + return nlmsg_cancel(skb, nlh); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 pid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; - unsigned char *b = skb->tail; u8 scope = RT_SCOPE_UNIVERSE; int ifindex = ifaca->aca_idev->dev->ifindex; if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); - - if (put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, + if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - goto rtattr_failure; + return nlmsg_cancel(skb, nlh); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); } enum addr_type_t @@ -3256,7 +3245,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; - int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3278,7 +3266,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto errout; } - if ((skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL)) == NULL) { + if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { err = -ENOBUFS; goto errout_ifa; } @@ -3300,10 +3288,9 @@ errout: static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - int payload = sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE; int err = -ENOBUFS; - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); + skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); if (skb == NULL) goto errout; From 680a27a23af45307095ae432dd0bc859e1fbe219 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:13:07 -0700 Subject: [PATCH 0671/1063] [IPV6] address: Allow address changes while device is administrative down Same behaviour as IPv4, using IFF_UP is a no-no anyway. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 75a69bac82a8..bb18b9c3a5cb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1886,9 +1886,6 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - if ((idev = addrconf_add_dev(dev)) == NULL) return -ENOBUFS; @@ -2922,9 +2919,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx, if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; From 7198f8cec12ccec6d6f2e18c08ecc8c66c8aaf93 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:13:46 -0700 Subject: [PATCH 0672/1063] [IPV6] address: Support NLM_F_EXCL when adding addresses iproute2 doesn't provide the NLM_F_CREATE flag when adding addresses, it is assumed to be implied. The existing code issues a check on said flag when the modify operation fails (likely due to ENOENT) before continueing to create it, this leads to a hard to predict result, therefore the NLM_F_CREATE check is removed. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bb18b9c3a5cb..1e5a296d0a82 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2908,24 +2908,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int -inet6_addr_modify(int ifindex, struct in6_addr *pfx, - __u32 prefered_lft, __u32 valid_lft) +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 prefered_lft, + u32 valid_lft) { - struct inet6_ifaddr *ifp = NULL; - struct net_device *dev; int ifa_flags = 0; - if ((dev = __dev_get_by_index(ifindex)) == NULL) - return -ENODEV; - if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - ifp = ipv6_get_ifaddr(pfx, dev, 1); - if (ifp == NULL) - return -ENOENT; - if (valid_lft == INFINITY_LIFE_TIME) ifa_flags = IFA_F_PERMANENT; else if (valid_lft >= 0x7FFFFFFF/HZ) @@ -2947,7 +2937,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx, spin_unlock_bh(&ifp->lock); if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); - in6_ifa_put(ifp); addrconf_verify(0); @@ -2960,6 +2949,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; + struct inet6_ifaddr *ifa; + struct net_device *dev; u32 valid_lft, preferred_lft; int err; @@ -2983,15 +2974,29 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - if (nlh->nlmsg_flags & NLM_F_REPLACE) { - err = inet6_addr_modify(ifm->ifa_index, pfx, - preferred_lft, valid_lft); - if (err == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) - return err; + dev = __dev_get_by_index(ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + ifa = ipv6_get_ifaddr(pfx, dev, 1); + if (ifa == NULL) { + /* + * It would be best to check for !NLM_F_CREATE here but + * userspace alreay relies on not having to provide this. + */ + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + preferred_lft, valid_lft); } - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - preferred_lft, valid_lft); + if (nlh->nlmsg_flags & NLM_F_EXCL || + !(nlh->nlmsg_flags & NLM_F_REPLACE)) + err = -EEXIST; + else + err = inet6_addr_modify(ifa, preferred_lft, valid_lft); + + in6_ifa_put(ifa); + + return err; } static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, From 161643660129dd7d98f0b12418c0a2710ffa7db6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 18 Sep 2006 00:40:38 -0700 Subject: [PATCH 0673/1063] [SCTP]: Cleanups This patch contains the following cleanups: - make the following needlessly global function static: - socket.c: sctp_apply_peer_addr_params() - add proper prototypes for the several global functions in include/net/sctp/sctp.h Note that this fixes wrong prototypes for the following functions: - sctp_snmp_proc_exit() - sctp_eps_proc_exit() - sctp_assocs_proc_exit() The latter was spotted by the GNU C compiler and reported by David Woodhouse. Signed-off-by: Adrian Bunk Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 13 +++++++++++++ net/sctp/ipv6.c | 1 - net/sctp/protocol.c | 7 ------- net/sctp/socket.c | 14 +++++++------- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e274fd479990..ee68a3124076 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -128,6 +128,8 @@ extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); +int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, + void *ptr); /* * sctp/socket.c @@ -177,6 +179,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk, void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); +/* + * sctp/proc.c + */ +int sctp_snmp_proc_init(void); +void sctp_snmp_proc_exit(void); +int sctp_eps_proc_init(void); +void sctp_eps_proc_exit(void); +int sctp_assocs_proc_init(void); +void sctp_assocs_proc_exit(void); + + /* * Section: Macros, externs, and inlines */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 99c0cefc04e0..fd87e3ceb56e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,7 +78,6 @@ #include -extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *); static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inetaddr_event, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d9dd4c47bc29..fac7674438a4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep __read_mostly; kmem_cache_t *sctp_bucket_cachep __read_mostly; -extern int sctp_snmp_proc_init(void); -extern int sctp_snmp_proc_exit(void); -extern int sctp_eps_proc_init(void); -extern int sctp_eps_proc_exit(void); -extern int sctp_assocs_proc_init(void); -extern int sctp_assocs_proc_exit(void); - /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7c1dbb1d10df..79c3e072cf28 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. */ -int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, - struct sctp_transport *trans, - struct sctp_association *asoc, - struct sctp_sock *sp, - int hb_change, - int pmtud_change, - int sackdelay_change) +static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) { int error; From 4eb327b517cf85f6cb7dcd5691e7b748cbe8c343 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Tue, 19 Sep 2006 10:24:19 -0700 Subject: [PATCH 0674/1063] [SELINUX]: Fix bug in security_sid_mls_copy The following fixes a bug where random mem is being tampered with in the non-mls case; encountered by Jashua Brindle on a gentoo box. Signed-off-by: Venkat Yekkirala Acked-by: Stephen Smalley Signed-off-by: James Morris --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 27ee28ccf266..7eb69a602d8f 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1841,7 +1841,7 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) u32 len; int rc = 0; - if (!ss_initialized) { + if (!ss_initialized || !selinux_mls_enabled) { *new_sid = sid; goto out; } From 1ef9696c909060ccdae3ade245ca88692b49285b Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 19 Sep 2006 12:52:50 -0700 Subject: [PATCH 0675/1063] [TCP]: Send ACKs each 2nd received segment. It does not affect either mss-sized connections (obviously) or connections controlled by Nagle (because there is only one small segment in flight). The idea is to record the fact that a small segment arrives on a connection, where one small segment has already been received and still not-ACKed. In this case ACK is forced after tcp_recvmsg() drains receive buffer. In other words, it is a "soft" each-2nd-segment ACK, which is enough to preserve ACK clock even when ABC is enabled. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 3 ++- net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_input.c | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9bf73fe50948..de4e83b6da4b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,7 +147,8 @@ extern struct sock *inet_csk_clone(struct sock *sk, enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4 + ICSK_ACK_PUSHED = 4, + ICSK_ACK_PUSHED2 = 8 }; extern void inet_csk_init_xmit_timers(struct sock *sk, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 29e3d606db78..66e9a729f6df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 511b738f118a..b3def0df14fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, return; } } + if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } From a1e59abf824969554b90facd44a4ab16e265afa4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 19 Sep 2006 12:57:34 -0700 Subject: [PATCH 0676/1063] [XFRM]: Fix wildcard as tunnel source Hashing SAs by source address breaks templates with wildcards as tunnel source since the source address used for hashing/lookup is still 0/0. Move source address lookup to xfrm_tmpl_resolve_one() so we can use the real address in the lookup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 13 +++++++++++++ net/ipv4/xfrm4_policy.c | 20 ++++++++++++++++++++ net/ipv4/xfrm4_state.c | 15 --------------- net/ipv6/xfrm6_policy.c | 21 +++++++++++++++++++++ net/ipv6/xfrm6_state.c | 16 ---------------- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++++ 6 files changed, 75 insertions(+), 31 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4d6dc627df9b..11e0b1d6bd47 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -222,6 +222,7 @@ struct xfrm_policy_afinfo { struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); + int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); int (*bundle_create)(struct xfrm_policy *policy, struct xfrm_state **xfrm, @@ -630,6 +631,18 @@ secpath_reset(struct sk_buff *skb) #endif } +static inline int +xfrm_addr_any(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return addr->a4 == 0; + case AF_INET6: + return ipv6_addr_any((struct in6_addr *)&addr->a6); + } + return 0; +} + static inline int __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 479598566f1d..eabcd27b1767 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return __ip_route_output_key((struct rtable**)dst, fl); } +static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rtable *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip4_u = { + .daddr = daddr->a4, + }, + }, + }; + + if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + saddr->a4 = rt->rt_src; + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -298,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, .find_bundle = __xfrm4_find_bundle, .bundle_create = __xfrm4_bundle_create, .decode_session = _decode_session4, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 6a2a4ab42772..fe2034494d08 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -42,21 +42,6 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) { - struct rtable *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip4_u = { - .daddr = x->id.daddr.a4, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET)) { - x->props.saddr.a4 = rt->rt_src; - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 9391c4c94feb..6a252e2134d1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -34,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return err; } +static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rt6_info *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)&daddr->a6, + }, + }, + }; + + if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -362,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, + .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index d88cd92c864e..711bfafb2472 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -42,22 +42,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)daddr, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET6)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, - (struct in6_addr *)&x->props.saddr); - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET6; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 537854fe47ca..b6e2e79d7261 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1107,6 +1107,20 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } +static int +xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->get_saddr(local, remote); + xfrm_policy_put_afinfo(afinfo); + return err; +} + /* Resolve list of templates for the flow, given policy. */ static int @@ -1118,6 +1132,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + xfrm_address_t tmp; for (nx=0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; @@ -1128,6 +1143,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + if (xfrm_addr_any(local, family)) { + error = xfrm_get_saddr(&tmp, remote, family); + if (error) + goto fail; + local = &tmp; + } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); From 23d06e3b986677ec57007a24891fa9deb09ac973 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:04:54 -0700 Subject: [PATCH 0677/1063] [DCCP] ACKVEC: fix ackvector length calculation Fix ackvector length calculation upon receiving an "ack-of-ack". This patch avoids the ackvector from growing too large which causes it to not be inserted into packets. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ackvec.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 8c211c58893b..8dab723cc704 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -353,11 +353,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av, { struct dccp_ackvec_record *next; - av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1; - if (av->dccpav_buf_tail == 0) - av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; - - av->dccpav_vec_len -= avr->dccpavr_sent_len; + /* sort out vector length */ + if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) + av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; + else + av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 + - av->dccpav_buf_head + + avr->dccpavr_ack_ptr; /* free records */ list_for_each_entry_safe_from(avr, next, &av->dccpav_records, From 8e27e4650cb7e73aa4dd97d860539e7605ac7e39 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:05:35 -0700 Subject: [PATCH 0678/1063] [DCCP] ackvec: Fix how DCCP_ACKVEC_STATE_NOT_RECEIVED is used Fix the way state is masked out. DCCP_ACKVEC_STATE_NOT_RECEIVED is defined as appears in the packet, therefore bit shifting is not required. This fix allows CCID2 to correctly detect losses. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ackvec.c | 3 +-- net/dccp/ccids/ccid2.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 8dab723cc704..bc5ff1212418 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -436,8 +436,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, break; found: if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { #ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e9615627dcd6..b1d90c07535e 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -582,8 +582,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = (*vector & - DCCP_ACKVEC_STATE_MASK) >> 6; + const u8 state = *vector & + DCCP_ACKVEC_STATE_MASK; /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && From 4a0a50fb43912b4a593d2416c507a198fe607a6d Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:06:16 -0700 Subject: [PATCH 0679/1063] [DCCP] ackvec: Remove unused variables Get rid of unused variables in ackvector state. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ackvec.c | 5 ++--- net/dccp/ackvec.h | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bc5ff1212418..4d176d33983f 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); if (av != NULL) { - av->dccpav_buf_head = - av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; + av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; av->dccpav_ack_ptr = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; - av->dccpav_sent_len = av->dccpav_vec_len = 0; + av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 0adf4b56c34c..2424effac7f6 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -54,9 +54,7 @@ struct dccp_ackvec { struct list_head dccpav_records; struct timeval dccpav_time; u8 dccpav_buf_head; - u8 dccpav_buf_tail; u8 dccpav_ack_ptr; - u8 dccpav_sent_len; u8 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; @@ -107,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { - return av->dccpav_sent_len != av->dccpav_vec_len; + return av->dccpav_vec_len; } #else /* CONFIG_IP_DCCP_ACKVEC */ static inline int dccp_ackvec_init(void) From 29651cda97b0a9e4ac0fbeb5ea731a9909f0f128 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:06:46 -0700 Subject: [PATCH 0680/1063] [DCCP] CCID2: Fix jiffie wrap issues Jiffies are now handled correctly (I hope) in CCID2. If they wrap, no problem. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index b1d90c07535e..54a6b7ef3b7b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -27,7 +27,6 @@ * * BUGS: * - sequence number wrapping - * - jiffies wrapping */ #include "../ccid.h" @@ -71,7 +70,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) /* packets are sent sequentially */ BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); - BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); + BUG_ON(time_before(seqp->ccid2s_sent, + prev->ccid2s_sent)); BUG_ON(len > ccid2_seq_len); seqp = prev; @@ -418,8 +418,8 @@ static inline void ccid2_new_ack(struct sock *sk, /* update RTO */ if (hctx->ccid2hctx_srtt == -1 || - (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { - unsigned long r = jiffies - seqp->ccid2s_sent; + time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { + unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; int s; /* first measurement */ From d458c25ce24ce00ea547e9976e293e7835416253 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:07:20 -0700 Subject: [PATCH 0681/1063] [DCCP] CCID2: Initialize ssthresh to infinity Initialize the slow-start threshold to infinity. This way, upon connection initiation, slow-start will be exited only upon a packet loss. This patch will allow connections to quickly gain speed. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 7 +++++-- net/dccp/ccids/ccid2.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 54a6b7ef3b7b..699a56674659 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -678,9 +678,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) int seqcount = ccid2_seq_len; int i; - /* XXX init variables with proper values */ hctx->ccid2hctx_cwnd = 1; - hctx->ccid2hctx_ssthresh = 10; + /* Initialize ssthresh to infinity. This means that we will exit the + * initial slow-start after the first packet loss. This is what we + * want. + */ + hctx->ccid2hctx_ssthresh = ~0; hctx->ccid2hctx_numdupack = 3; /* XXX init ~ to window size... */ diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 451a87464fa5..b4cc6c0bf020 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -50,7 +50,7 @@ struct ccid2_hc_tx_sock { int ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; - int ccid2hctx_ssthresh; + unsigned int ccid2hctx_ssthresh; int ccid2hctx_pipe; int ccid2hctx_numdupack; struct ccid2_seq *ccid2hctx_seqbuf; From 69263bcfb5016bc3bdd099607a4232cba06f8491 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 22 Sep 2006 14:28:11 -0700 Subject: [PATCH 0682/1063] [ATM]: proper prototypes in net/atm/mpc.h (and reduce ifdef clutter) Signed-off-by: Adrian Bunk Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/mpc.c | 11 ----------- net/atm/mpc.h | 8 ++++++++ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 00704661e83f..b87c2a88bdce 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -98,11 +98,6 @@ static struct notifier_block mpoa_notifier = { 0 }; -#ifdef CONFIG_PROC_FS -extern int mpc_proc_init(void); -extern void mpc_proc_clean(void); -#endif - struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; static DEFINE_TIMER(mpc_timer, NULL, 0, 0); @@ -1439,12 +1434,8 @@ static __init int atm_mpoa_init(void) { register_atm_ioctl(&atm_ioctl_ops); -#ifdef CONFIG_PROC_FS if (mpc_proc_init() != 0) printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); - else - printk(KERN_INFO "mpoa: /proc/mpoa initialized\n"); -#endif printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); @@ -1457,9 +1448,7 @@ static void __exit atm_mpoa_cleanup(void) struct atm_mpoa_qos *qos, *nextqos; struct lec_priv *priv; -#ifdef CONFIG_PROC_FS mpc_proc_clean(); -#endif del_timer(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 863ddf6079e1..3c7981a229e8 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); struct seq_file; void atm_mpoa_disp_qos(struct seq_file *m); +#ifdef CONFIG_PROC_FS +int mpc_proc_init(void); +void mpc_proc_clean(void); +#else +#define mpc_proc_init() (0) +#define mpc_proc_clean() do { } while(0) +#endif + #endif /* _MPC_H_ */ From 446dec30c7f305ed1bb0092b0a8d9367d842a33f Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:10:11 -0700 Subject: [PATCH 0683/1063] [DCCP] CCID2: Tell DCCP to quickly check whether cwnd is available If not enough cwnd is available, tell the sender to check again as soon as possible. This will increase CPU utilization (polling frequently for cwnd) but will improve network performance. That is, the sender will need to wait less before detecting the increase of cwnd. A better architecture would be for the CCID to call-back (or dequeue) from DCCP when it is able to transmit traffic -- not the other way around as it currently occurs. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 699a56674659..e0acd1ba4e88 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -122,7 +122,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, } } - return 100; /* XXX */ + return 1; /* XXX CCID should dequeue when ready instead of polling */ } static void ccid2_change_l_ack_ratio(struct sock *sk, int val) From 8d424f6ca2d02026dadff409770639d720375afb Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:12:44 -0700 Subject: [PATCH 0684/1063] [DCCP] CCID2: Add Kconfig option for CCID2 debug Allow the user to choose whether or not to enable CCID2 debugging via Kconfig. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/Kconfig | 8 ++++++++ net/dccp/ccids/ccid2.c | 7 +++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index ca00191628f7..32752f750447 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -30,6 +30,14 @@ config IP_DCCP_CCID2 If in doubt, say M. +config IP_DCCP_CCID2_DEBUG + bool "CCID2 debug" + depends on IP_DCCP_CCID2 + ---help--- + Enable CCID2 debug messages. + + If in doubt, say N. + config IP_DCCP_CCID3 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" depends on IP_DCCP diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e0acd1ba4e88..dbcda7e868b7 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -35,8 +35,7 @@ static int ccid2_debug; -#undef CCID2_DEBUG -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG #define ccid2_pr_debug(format, a...) \ do { if (ccid2_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ @@ -47,7 +46,7 @@ static int ccid2_debug; static const int ccid2_seq_len = 128; -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; @@ -295,7 +294,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) if (!timer_pending(&hctx->ccid2hctx_rtotimer)) ccid2_start_rto_timer(sk); -#ifdef CCID2_DEBUG +#ifdef CONFIG_IP_DCCP_CCID2_DEBUG ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); ccid2_pr_debug("Sent: seq=%llu\n", seq); do { From 07978aabd52ce67f59971872c80f76d6e3ca18ae Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:13:37 -0700 Subject: [PATCH 0685/1063] [DCCP] CCID2: Allocate seq records on demand Allocate more sequence state on demand. Each time a packet is sent out by CCID2, a record of it needs to be kept. This list of records grows proportionally to cwnd. Previously, the length of this list was hardcored and therefore the cwnd could only grow to this value (of 128). Now, records are allocated on demand as necessary---cwnd may grow as it wishes. The exceptional case of when memory is not available is not handled gracefully. Perhaps, cwnd should be capped at that point. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 96 ++++++++++++++++++++++++++++-------------- net/dccp/ccids/ccid2.h | 6 ++- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index dbcda7e868b7..93a30ae8d07a 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -44,8 +44,6 @@ static int ccid2_debug; #define ccid2_pr_debug(format, a...) #endif -static const int ccid2_seq_len = 128; - #ifdef CONFIG_IP_DCCP_CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { @@ -71,7 +69,6 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); BUG_ON(time_before(seqp->ccid2s_sent, prev->ccid2s_sent)); - BUG_ON(len > ccid2_seq_len); seqp = prev; } @@ -83,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) do { seqp = seqp->ccid2s_prev; len++; - BUG_ON(len > ccid2_seq_len); } while (seqp != hctx->ccid2hctx_seqh); - BUG_ON(len != ccid2_seq_len); ccid2_pr_debug("total len=%d\n", len); + BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); } #else #define ccid2_hc_tx_check_sanity(hctx) do {} while (0) #endif +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, + gfp_t gfp) +{ + struct ccid2_seq *seqp; + int i; + + /* check if we have space to preserve the pointer to the buffer */ + if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / + sizeof(struct ccid2_seq*))) + return -ENOMEM; + + /* allocate buffer and initialize linked list */ + seqp = kmalloc(sizeof(*seqp) * num, gfp); + if (seqp == NULL) + return -ENOMEM; + + for (i = 0; i < (num - 1); i++) { + seqp[i].ccid2s_next = &seqp[i + 1]; + seqp[i + 1].ccid2s_prev = &seqp[i]; + } + seqp[num - 1].ccid2s_next = seqp; + seqp->ccid2s_prev = &seqp[num - 1]; + + /* This is the first allocation. Initiate the head and tail. */ + if (hctx->ccid2hctx_seqbufc == 0) + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; + else { + /* link the existing list with the one we just created */ + hctx->ccid2hctx_seqh->ccid2s_next = seqp; + seqp->ccid2s_prev = hctx->ccid2hctx_seqh; + + hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; + seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; + } + + /* store the original pointer to the buffer so we can free it */ + hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; + hctx->ccid2hctx_seqbufc++; + + return 0; +} + static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { @@ -231,6 +269,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_seq *next; u64 seq; ccid2_hc_tx_check_sanity(hctx); @@ -250,16 +289,24 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid2hctx_seqh->ccid2s_seq = seq; hctx->ccid2hctx_seqh->ccid2s_acked = 0; hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next; + + next = hctx->ccid2hctx_seqh->ccid2s_next; + /* check if we need to alloc more space */ + if (next == hctx->ccid2hctx_seqt) { + int rc; + + ccid2_pr_debug("allocating more space in history\n"); + rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL); + BUG_ON(rc); /* XXX what do we do? */ + + next = hctx->ccid2hctx_seqh->ccid2s_next; + BUG_ON(next == hctx->ccid2hctx_seqt); + } + hctx->ccid2hctx_seqh = next; ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, hctx->ccid2hctx_pipe); - if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { - /* XXX allocate more space */ - WARN_ON(1); - } - hctx->ccid2hctx_sent++; /* Ack Ratio. Need to maintain a concept of how many windows we sent */ @@ -674,8 +721,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); - int seqcount = ccid2_seq_len; - int i; hctx->ccid2hctx_cwnd = 1; /* Initialize ssthresh to infinity. This means that we will exit the @@ -684,26 +729,12 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) */ hctx->ccid2hctx_ssthresh = ~0; hctx->ccid2hctx_numdupack = 3; + hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * - seqcount, gfp_any()); - if (hctx->ccid2hctx_seqbuf == NULL) + if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) return -ENOMEM; - for (i = 0; i < (seqcount - 1); i++) { - hctx->ccid2hctx_seqbuf[i].ccid2s_next = - &hctx->ccid2hctx_seqbuf[i + 1]; - hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev = - &hctx->ccid2hctx_seqbuf[i]; - } - hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next = - hctx->ccid2hctx_seqbuf; - hctx->ccid2hctx_seqbuf->ccid2s_prev = - &hctx->ccid2hctx_seqbuf[seqcount - 1]; - - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf; - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; hctx->ccid2hctx_srtt = -1; @@ -722,10 +753,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) static void ccid2_hc_tx_exit(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + int i; ccid2_hc_tx_kill_rto_timer(sk); - kfree(hctx->ccid2hctx_seqbuf); - hctx->ccid2hctx_seqbuf = NULL; + + for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) + kfree(hctx->ccid2hctx_seqbuf[i]); + hctx->ccid2hctx_seqbufc = 0; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index b4cc6c0bf020..2a02ce04ba85 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -35,6 +35,9 @@ struct ccid2_seq { struct ccid2_seq *ccid2s_next; }; +#define CCID2_SEQBUF_LEN 256 +#define CCID2_SEQBUF_MAX 128 + /** struct ccid2_hc_tx_sock - CCID2 TX half connection * * @ccid2hctx_ssacks - ACKs recv in slow start @@ -53,7 +56,8 @@ struct ccid2_hc_tx_sock { unsigned int ccid2hctx_ssthresh; int ccid2hctx_pipe; int ccid2hctx_numdupack; - struct ccid2_seq *ccid2hctx_seqbuf; + struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; + int ccid2hctx_seqbufc; struct ccid2_seq *ccid2hctx_seqh; struct ccid2_seq *ccid2hctx_seqt; long ccid2hctx_rto; From 374bcf32c86e1b56eab832bbb6b21e636707eab6 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:14:43 -0700 Subject: [PATCH 0686/1063] [DCCP] CCID2: Halve cwnd once upon multiple losses in a single RTT When multiple losses occur in one RTT, the window should be halved only once [a single "congestion event"]. This is now implemented, although not perfectly. Slightly changed the interface for changing the cwnd: pass hctx instead of dp. This is required in order to allow for change_cwnd to be called from _init(). Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 49 ++++++++++++++++++++++++++---------------- net/dccp/ccids/ccid2.h | 1 + 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 93a30ae8d07a..b88da035865f 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -187,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct sock *sk, int val) +static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - if (val == 0) val = 1; @@ -234,7 +232,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; if (hctx->ccid2hctx_ssthresh < 2) hctx->ccid2hctx_ssthresh = 2; - ccid2_change_cwnd(sk, 1); + ccid2_change_cwnd(hctx, 1); /* clear state about stuff we sent */ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; @@ -444,7 +442,7 @@ static inline void ccid2_new_ack(struct sock *sk, /* increase every 2 acks */ hctx->ccid2hctx_ssacks++; if (hctx->ccid2hctx_ssacks == 2) { - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1); hctx->ccid2hctx_ssacks = 0; *maxincr = *maxincr - 1; } @@ -457,7 +455,7 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_acks++; if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1); hctx->ccid2hctx_acks = 0; } } @@ -532,6 +530,22 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) ccid2_hc_tx_kill_rto_timer(sk); } +static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, + struct ccid2_seq *seqp) +{ + if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { + ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); + return; + } + + hctx->ccid2hctx_last_cong = jiffies; + + ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; +} + static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -542,7 +556,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) unsigned char veclen; int offset = 0; int done = 0; - int loss = 0; unsigned int maxincr = 0; ccid2_hc_tx_check_sanity(hctx); @@ -636,7 +649,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) !seqp->ccid2s_acked) { if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - loss = 1; + ccid2_congestion_event(hctx, + seqp); } else ccid2_new_ack(sk, seqp, &maxincr); @@ -688,7 +702,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* check for lost packets */ while (1) { if (!seqp->ccid2s_acked) { - loss = 1; + ccid2_pr_debug("Packet lost: %llu\n", + seqp->ccid2s_seq); + /* XXX need to traverse from tail -> head in + * order to detect multiple congestion events in + * one ack vector. + */ + ccid2_congestion_event(hctx, seqp); ccid2_hc_tx_dec_pipe(sk); } if (seqp == hctx->ccid2hctx_seqt) @@ -707,14 +727,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; } - if (loss) { - /* XXX do bit shifts guarantee a 0 as the new bit? */ - ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1); - hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; - if (hctx->ccid2hctx_ssthresh < 2) - hctx->ccid2hctx_ssthresh = 2; - } - ccid2_hc_tx_check_sanity(hctx); } @@ -722,7 +734,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); - hctx->ccid2hctx_cwnd = 1; + ccid2_change_cwnd(hctx, 1); /* Initialize ssthresh to infinity. This means that we will exit the * initial slow-start after the first packet loss. This is what we * want. @@ -741,6 +753,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_rttvar = -1; hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; + hctx->ccid2hctx_last_cong = jiffies; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 2a02ce04ba85..5b2ef4acb300 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -71,6 +71,7 @@ struct ccid2_hc_tx_sock { u64 ccid2hctx_rpseq; int ccid2hctx_rpdupack; int ccid2hctx_sendwait; + unsigned long ccid2hctx_last_cong; }; struct ccid2_hc_rx_sock { From 593f16aa627d61da447c76ee5a159450174627f6 Mon Sep 17 00:00:00 2001 From: Andrea Bittau Date: Tue, 19 Sep 2006 13:15:33 -0700 Subject: [PATCH 0687/1063] [DCCP] CCID2: Add helper functions for changing important CCID2 state Introduce methods which manipulate interesting congestion control state such as pipe and rtt estimate. This is useful for people wishing to monitor the variables of CCID and instrument the code [perhaps using Kprobes]. Personally, I am a fan of encapsulation---that justifies this change =D. Signed-off-by: Andrea Bittau Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index b88da035865f..457dd3db7f41 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -199,6 +199,17 @@ static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) hctx->ccid2hctx_cwnd = val; } +static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) +{ + ccid2_pr_debug("change SRTT to %ld\n", val); + hctx->ccid2hctx_srtt = val; +} + +static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val) +{ + hctx->ccid2hctx_pipe = val; +} + static void ccid2_start_rto_timer(struct sock *sk); static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -228,7 +239,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - hctx->ccid2hctx_pipe = 0; + ccid2_change_pipe(hctx, 0); hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; if (hctx->ccid2hctx_ssthresh < 2) hctx->ccid2hctx_ssthresh = 2; @@ -274,7 +285,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) BUG_ON(!hctx->ccid2hctx_sendwait); hctx->ccid2hctx_sendwait = 0; - hctx->ccid2hctx_pipe++; + ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1); BUG_ON(hctx->ccid2hctx_pipe < 0); /* There is an issue. What if another packet is sent between @@ -470,11 +481,13 @@ static inline void ccid2_new_ack(struct sock *sk, if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", r, jiffies, seqp->ccid2s_seq); - hctx->ccid2hctx_srtt = r; + ccid2_change_srtt(hctx, r); hctx->ccid2hctx_rttvar = r >> 1; } else { /* RTTVAR */ long tmp = hctx->ccid2hctx_srtt - r; + long srtt; + if (tmp < 0) tmp *= -1; @@ -484,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_rttvar += tmp; /* SRTT */ - hctx->ccid2hctx_srtt *= 7; - hctx->ccid2hctx_srtt >>= 3; + srtt = hctx->ccid2hctx_srtt; + srtt *= 7; + srtt >>= 3; tmp = r >> 3; - hctx->ccid2hctx_srtt += tmp; + srtt += tmp; + ccid2_change_srtt(hctx, srtt); } s = hctx->ccid2hctx_rttvar << 2; /* clock granularity is 1 when based on jiffies */ @@ -523,7 +538,7 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - hctx->ccid2hctx_pipe--; + ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); BUG_ON(hctx->ccid2hctx_pipe < 0); if (hctx->ccid2hctx_pipe == 0) @@ -749,7 +764,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; - hctx->ccid2hctx_srtt = -1; + ccid2_change_srtt(hctx, -1); hctx->ccid2hctx_rttvar = -1; hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; From c55e2f4997a104d66b59bdf1aa8ab125d09ae00a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2006 13:23:19 -0700 Subject: [PATCH 0688/1063] [IPV4]: ipip and ip_gre encapsulation bugs Handling of ipip and ip_gre ICMP error relaying is b0rken; it accesses 8bit field + 3 reserved octets as host-endian 32bit, does comparison, subtraction and stuffs the result back. That breaks on big-endian. Fixed, made endian-clean. [ Note that this effected code is permanently commented out with and ifdef, so this error couldn't actually cause problems for anyone. -DaveM ] Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 23 +++++++++++++---------- net/ipv4/ipip.c | 22 ++++++++++++---------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e66f6ff2e198..f5fba051df3d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -393,7 +393,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; u16 flags; int grehlen = (iph->ihl<<2) + 4; struct sk_buff *skb2; @@ -422,14 +423,16 @@ out: default: return; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < (iph->ihl<<2)) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < (iph->ihl<<2)) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - grehlen; + n -= grehlen; + rel_info = htonl(n << 24); break; case ICMP_DEST_UNREACH: @@ -440,13 +443,14 @@ out: return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < grehlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < grehlen+68) return; - rel_info -= grehlen; + n -= grehlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -508,12 +512,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 76ab50b0d6ef..0c4556529228 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -341,7 +341,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; struct sk_buff *skb2; struct flowi fl; struct rtable *rt; @@ -354,14 +355,15 @@ out: default: return 0; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < hlen) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < hlen) return 0; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - hlen; + rel_info = htonl((n - hlen) << 24); break; case ICMP_DEST_UNREACH: @@ -372,13 +374,14 @@ out: return 0; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < hlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < hlen+68) return 0; - rel_info -= hlen; + n -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return 0; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -440,12 +443,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return 0; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { From 1bf38a36b6a0e810dafae048fdbb999e587f0f2f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:09 -0700 Subject: [PATCH 0689/1063] [NETFILTER]: remove unused include file Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_logging.h | 33 ------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 include/linux/netfilter_logging.h diff --git a/include/linux/netfilter_logging.h b/include/linux/netfilter_logging.h deleted file mode 100644 index 562bb6aad4e1..000000000000 --- a/include/linux/netfilter_logging.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Internal logging interface, which relies on the real - LOG target modules */ -#ifndef __LINUX_NETFILTER_LOGGING_H -#define __LINUX_NETFILTER_LOGGING_H - -#ifdef __KERNEL__ -#include - -struct nf_logging_t { - void (*nf_log_packet)(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *prefix); - void (*nf_log)(char *pfh, size_t len, - const char *prefix); -}; - -extern void nf_log_register(int pf, const struct nf_logging_t *logging); -extern void nf_log_unregister(int pf, const struct nf_logging_t *logging); - -extern void nf_log_packet(int pf, - struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const char *fmt, ...); -extern void nf_log(int pf, - char *pfh, size_t len, - const char *fmt, ...); -#endif /*__KERNEL__*/ - -#endif /*__LINUX_NETFILTER_LOGGING_H*/ From df0933dcb027e156cb5253570ad694b81bd52b69 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:57:53 -0700 Subject: [PATCH 0690/1063] [NETFILTER]: kill listhelp.h Kill listhelp.h and use the list.h functions instead. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 - include/linux/netfilter_ipv4/listhelp.h | 123 ------------ net/bridge/netfilter/ebtables.c | 76 +++++--- net/ipv4/netfilter/arp_tables.c | 2 - net/ipv4/netfilter/ip_conntrack_core.c | 189 +++++++++---------- net/ipv4/netfilter/ip_conntrack_proto_gre.c | 24 ++- net/ipv4/netfilter/ip_conntrack_standalone.c | 1 - net/ipv4/netfilter/ip_nat_core.c | 4 - net/ipv4/netfilter/ip_nat_helper.c | 4 - net/ipv4/netfilter/ip_nat_rule.c | 4 - net/ipv4/netfilter/ip_nat_standalone.c | 4 - net/ipv6/netfilter/ip6_tables.c | 3 - net/netfilter/nf_conntrack_core.c | 185 +++++++++--------- net/netfilter/nf_conntrack_standalone.c | 1 - net/netfilter/x_tables.c | 17 +- 15 files changed, 237 insertions(+), 404 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/listhelp.h diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 03d1027fb0e8..c832295dbf61 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,10 +138,6 @@ struct xt_counters_info #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include - #ifdef CONFIG_COMPAT #define COMPAT_TO_USER 1 #define COMPAT_FROM_USER -1 diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h deleted file mode 100644 index 5d92cf044d91..000000000000 --- a/include/linux/netfilter_ipv4/listhelp.h +++ /dev/null @@ -1,123 +0,0 @@ -#ifndef _LISTHELP_H -#define _LISTHELP_H -#include - -/* Header to do more comprehensive job than linux/list.h; assume list - is first entry in structure. */ - -/* Return pointer to first true entry, if any, or NULL. A macro - required to allow inlining of cmpfn. */ -#define LIST_FIND(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -#define LIST_FIND_W(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if (cmpfn((type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -/* Just like LIST_FIND but we search backwards */ -#define LIST_FIND_B(head, cmpfn, type, args...) \ -({ \ - const struct list_head *__i, *__j = NULL; \ - \ - ASSERT_READ_LOCK(head); \ - list_for_each_prev(__i, (head)) \ - if (cmpfn((const type)__i , ## args)) { \ - __j = __i; \ - break; \ - } \ - (type)__j; \ -}) - -static inline int -__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } - -/* Is this entry in the list? */ -static inline int -list_inlist(struct list_head *head, const void *entry) -{ - return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; -} - -/* Delete from list. */ -#ifdef CONFIG_NETFILTER_DEBUG -#define LIST_DELETE(head, oldentry) \ -do { \ - ASSERT_WRITE_LOCK(head); \ - if (!list_inlist(head, oldentry)) \ - printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ - __FILE__, __LINE__, #oldentry, oldentry, #head); \ - else list_del((struct list_head *)oldentry); \ -} while(0) -#else -#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) -#endif - -/* Append. */ -static inline void -list_append(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add((new), (head)->prev); -} - -/* Prepend. */ -static inline void -list_prepend(struct list_head *head, void *new) -{ - ASSERT_WRITE_LOCK(head); - list_add(new, head); -} - -/* Insert according to ordering function; insert before first true. */ -#define LIST_INSERT(head, new, cmpfn) \ -do { \ - struct list_head *__i; \ - ASSERT_WRITE_LOCK(head); \ - list_for_each(__i, (head)) \ - if ((new), (typeof (new))__i) \ - break; \ - list_add((struct list_head *)(new), __i->prev); \ -} while(0) - -/* If the field after the list_head is a nul-terminated string, you - can use these functions. */ -static inline int __list_cmp_name(const void *i, const char *name) -{ - return strcmp(name, i+sizeof(struct list_head)) == 0; -} - -/* Returns false if same name already in list, otherwise does insert. */ -static inline int -list_named_insert(struct list_head *head, void *new) -{ - if (LIST_FIND(head, __list_cmp_name, void *, - new + sizeof(struct list_head))) - return 0; - list_prepend(head, new); - return 1; -} - -/* Find this named element in the list. */ -#define list_named_find(head, name) \ -LIST_FIND(head, __list_cmp_name, void *, name) - -#endif /*_LISTHELP_H*/ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index d06a5075b5f6..3df55b2bd91d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -31,12 +32,6 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -/* list_named_find */ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include -#include - #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ @@ -278,18 +273,22 @@ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, struct mutex *mutex) { - void *ret; + struct { + struct list_head list; + char name[EBT_FUNCTION_MAXNAMELEN]; + } *e; *error = mutex_lock_interruptible(mutex); if (*error != 0) return NULL; - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - mutex_unlock(mutex); + list_for_each_entry(e, head, list) { + if (strcmp(e->name, name) == 0) + return e; } - return ret; + *error = -ENOENT; + mutex_unlock(mutex); + return NULL; } #ifndef CONFIG_KMOD @@ -1043,15 +1042,19 @@ free_newinfo: int ebt_register_target(struct ebt_target *target) { + struct ebt_target *t; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_targets, target)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(t, &ebt_targets, list) { + if (strcmp(t->name, target->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&target->list, &ebt_targets); mutex_unlock(&ebt_mutex); return 0; @@ -1060,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target) void ebt_unregister_target(struct ebt_target *target) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_targets, target); + list_del(&target->list); mutex_unlock(&ebt_mutex); } int ebt_register_match(struct ebt_match *match) { + struct ebt_match *m; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_matches, match)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(m, &ebt_matches, list) { + if (strcmp(m->name, match->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&match->list, &ebt_matches); mutex_unlock(&ebt_mutex); return 0; @@ -1083,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match) void ebt_unregister_match(struct ebt_match *match) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_matches, match); + list_del(&match->list); mutex_unlock(&ebt_mutex); } int ebt_register_watcher(struct ebt_watcher *watcher) { + struct ebt_watcher *w; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - if (!list_named_insert(&ebt_watchers, watcher)) { - mutex_unlock(&ebt_mutex); - return -EEXIST; + list_for_each_entry(w, &ebt_watchers, list) { + if (strcmp(w->name, watcher->name) == 0) { + mutex_unlock(&ebt_mutex); + return -EEXIST; + } } + list_add(&watcher->list, &ebt_watchers); mutex_unlock(&ebt_mutex); return 0; @@ -1106,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher) void ebt_unregister_watcher(struct ebt_watcher *watcher) { mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_watchers, watcher); + list_del(&watcher->list); mutex_unlock(&ebt_mutex); } int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; + struct ebt_table *t; int ret, i, countersize; if (!table || !table->table ||!table->table->entries || @@ -1158,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - if (list_named_find(&ebt_tables, table->name)) { - ret = -EEXIST; - BUGPRINT("Table name already exists\n"); - goto free_unlock; + list_for_each_entry(t, &ebt_tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; + } } /* Hold a reference count if the chains aren't empty */ @@ -1169,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_prepend(&ebt_tables, table); + list_add(&table->list, &ebt_tables); mutex_unlock(&ebt_mutex); return 0; free_unlock: @@ -1195,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table) return; } mutex_lock(&ebt_mutex); - LIST_DELETE(&ebt_tables, table); + list_del(&table->list); mutex_unlock(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { @@ -1465,7 +1479,7 @@ static int __init ebtables_init(void) int ret; mutex_lock(&ebt_mutex); - list_named_insert(&ebt_targets, &ebt_standard_target); + list_add(&ebt_standard_target.list, &ebt_targets); mutex_unlock(&ebt_mutex); if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4f10b06413a1..aaeaa9ce0f28 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -56,8 +56,6 @@ do { \ #define ARP_NF_ASSERT(x) #endif -#include - static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, char *hdr_addr, int len) { diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 5da25ad50309..2568d480e9a9 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -47,7 +47,6 @@ #include #include #include -#include #define IP_CONNTRACK_VERSION "2.4" @@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -367,16 +361,6 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -386,7 +370,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (tuplehash_to_ctrack(h) != ignored_conntrack && + ip_ct_tuple_equal(tuple, &h->tuple)) { CONNTRACK_STAT_INC(found); return h; } @@ -417,10 +402,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -440,6 +425,7 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -470,43 +456,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &ip_conntrack_hash[hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + return NF_ACCEPT; +out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); - return NF_DROP; } @@ -527,23 +513,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&ip_conntrack_lock); @@ -559,18 +543,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct ip_conntrack_helper * @@ -1062,7 +1044,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1081,24 +1063,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline void unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } - return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1108,10 +1090,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < ip_conntrack_htable_size; i++) { + list_for_each_entry(h, &ip_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1237,46 +1221,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - /* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * +static struct ip_conntrack * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + list_for_each_entry(h, &unconfirmed, list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); + return NULL; - return h; +found: + atomic_inc(&ct->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); + return ct; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 4ee016c427b4..92c6d8b178c9 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) -#include #include #include #include @@ -82,10 +81,12 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); - km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (km) - key = km->tuple.src.u.gre.key; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t)) { + key = km->tuple.src.u.gre.key; + break; + } + } read_unlock_bh(&ip_ct_gre_lock); DEBUGP("lookup src key 0x%x up key for ", key); @@ -99,7 +100,7 @@ int ip_ct_gre_keymap_add(struct ip_conntrack *ct, struct ip_conntrack_tuple *t, int reply) { - struct ip_ct_gre_keymap **exist_km, *km, *old; + struct ip_ct_gre_keymap **exist_km, *km; if (!ct->helper || strcmp(ct->helper->name, "pptp")) { DEBUGP("refusing to add GRE keymap to non-pptp session\n"); @@ -113,13 +114,10 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, if (*exist_km) { /* check whether it's a retransmission */ - old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (old == *exist_km) { - DEBUGP("retransmission\n"); - return 0; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *exist_km) + return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; @@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, DUMP_TUPLE_GRE(&km->tuple); write_lock_bh(&ip_ct_gre_lock); - list_append(&gre_keymap_list, km); + list_add_tail(&km->list, &gre_keymap_list); write_unlock_bh(&ip_ct_gre_lock); return 0; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 3f5d495b853b..02135756562e 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -35,7 +35,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 4c540d03d48e..71f3e09cbc84 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,9 +22,6 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include @@ -33,7 +30,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 021c3daae3ed..7f6a75984f6c 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -27,16 +27,12 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index e59f5a8ecb6b..7b703839aa58 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -19,14 +19,10 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f3b778355432..9c577db62047 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -30,9 +30,6 @@ #include #include -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include @@ -40,7 +37,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d1c315364ee7..73d477ce216b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -70,9 +70,6 @@ do { \ #define IP_NF_ASSERT(x) #endif - -#include - #if 0 /* All the better to debug you with... */ #define static diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3b64dbee6620..927137b8b3b5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -57,7 +57,6 @@ #include #include #include -#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -539,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct) static void clean_from_lists(struct nf_conn *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&nf_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -617,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } -static inline int -conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) -{ - ASSERT_READ_LOCK(&nf_conntrack_lock); - return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack - && nf_ct_tuple_equal(tuple, &i->tuple); -} - struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -636,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, ASSERT_READ_LOCK(&nf_conntrack_lock); list_for_each_entry(h, &nf_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); return h; } @@ -667,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_prepend(&nf_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&nf_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &nf_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -690,7 +675,9 @@ int __nf_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + struct nf_conn_help *help; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -720,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&nf_conntrack_hash[hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&nf_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct nf_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - struct nf_conn_help *help; - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &nf_conntrack_hash[hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) + if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __nf_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); - help = nfct_help(ct); - if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __nf_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + write_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); + if (help && help->helper) + nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + nf_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; +out: NF_CT_STAT_INC(insert_failed); write_unlock_bh(&nf_conntrack_lock); return NF_DROP; @@ -777,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct nf_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, - &nf_ct_tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL; + struct nf_conn *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); - if (h) { - ct = nf_ct_tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = nf_ct_tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&nf_conntrack_lock); @@ -810,18 +794,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct nf_conntrack_helper *i, - const struct nf_conntrack_tuple *rtuple) -{ - return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct nf_conntrack_helper *, - tuple); + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct nf_conntrack_helper * @@ -1323,7 +1305,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) return ret; } write_lock_bh(&nf_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&nf_conntrack_lock); return 0; @@ -1342,8 +1324,8 @@ __nf_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static inline void unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); @@ -1352,17 +1334,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, nf_conntrack_event(IPCT_HELPER, ct); help->helper = NULL; } - return 0; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { unsigned int i; + struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { @@ -1374,10 +1356,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); - for (i = 0; i < nf_conntrack_htable_size; i++) - LIST_FIND_W(&nf_conntrack_hash[i], unhelp, - struct nf_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < nf_conntrack_htable_size; i++) { + list_for_each_entry(h, &nf_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&nf_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1510,37 +1494,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i, } /* Bring out ya dead! */ -static struct nf_conntrack_tuple_hash * +static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { - struct nf_conntrack_tuple_hash *h = NULL; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct nf_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + list_for_each_entry(h, &unconfirmed, list) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + return NULL; +found: + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&nf_conntrack_lock); - - return h; + return ct; } void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) { - struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9a1de0ca475b..5954f6773810 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -37,7 +37,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8037ba63d587..be7baf4f6846 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -81,7 +81,7 @@ xt_unregister_target(struct xt_target *target) int af = target->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].target, target); + list_del(&target->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_target); @@ -138,7 +138,7 @@ xt_unregister_match(struct xt_match *match) int af = match->family; mutex_lock(&xt[af].mutex); - LIST_DELETE(&xt[af].match, match); + list_del(&match->list); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_match); @@ -575,15 +575,18 @@ int xt_register_table(struct xt_table *table, { int ret; struct xt_table_info *private; + struct xt_table *t; ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) return ret; /* Don't autoload: we'd eat our tail... */ - if (list_named_find(&xt[table->af].tables, table->name)) { - ret = -EEXIST; - goto unlock; + list_for_each_entry(t, &xt[table->af].tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + goto unlock; + } } /* Simplifies replace_table code. */ @@ -598,7 +601,7 @@ int xt_register_table(struct xt_table *table, /* save number of initial entries */ private->initial_entries = private->number; - list_prepend(&xt[table->af].tables, table); + list_add(&table->list, &xt[table->af].tables); ret = 0; unlock: @@ -613,7 +616,7 @@ void *xt_unregister_table(struct xt_table *table) mutex_lock(&xt[table->af].mutex); private = table->private; - LIST_DELETE(&xt[table->af].tables, table); + list_del(&table->list); mutex_unlock(&xt[table->af].mutex); return private; From 50b9f1d509eb998db73cd769c9511186474f566e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:58:17 -0700 Subject: [PATCH 0691/1063] [NETFILTER]: xt_conntrack: clean up overly long lines Also fix some whitespace errors and use the NAT bits instead of deriving the state manually. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_conntrack.c | 193 +++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 88 deletions(-) diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39c57e9f7563..0ea501a2fda5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -45,7 +45,7 @@ match(const struct sk_buff *skb, ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); -#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg)) if (ct == &ip_conntrack_untracked) statebit = XT_CONNTRACK_STATE_UNTRACKED; @@ -54,63 +54,72 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if(sinfo->flags & XT_CONNTRACK_STATE) { + if (sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip) + if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_SNAT; - - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip) + if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } - - if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) + if (FWINV((statebit & sinfo->statemask) == 0, + XT_CONNTRACK_STATE)) return 0; } - if(sinfo->flags & XT_CONNTRACK_PROTO) { - if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; + if (ct == NULL) { + if (sinfo->flags & ~XT_CONNTRACK_STATE) + return 0; + return 1; } - if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) + if (sinfo->flags & XT_CONNTRACK_PROTO && + FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, + XT_CONNTRACK_PROTO)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_ORIGSRC && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip & + sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, + XT_CONNTRACK_ORIGSRC)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_ORIGDST && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip & + sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, + XT_CONNTRACK_ORIGDST)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_REPLSRC && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip & + sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].src.ip, + XT_CONNTRACK_REPLSRC)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_REPLDST && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip & + sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, + XT_CONNTRACK_REPLDST)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_STATUS && + FWINV((ct->status & sinfo->statusmask) == 0, + XT_CONNTRACK_STATUS)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_EXPIRES) { + unsigned long expires = timer_pending(&ct->timeout) ? + (ct->timeout.expires - jiffies)/HZ : 0; + + if (FWINV(!(expires >= sinfo->expires_min && + expires <= sinfo->expires_max), + XT_CONNTRACK_EXPIRES)) return 0; } - - if(sinfo->flags & XT_CONNTRACK_ORIGDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_REPLSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_REPLDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_STATUS) { - if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; - } - - if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires; - - if(!ct) - return 0; - - expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; - - if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) - return 0; - } - return 1; } @@ -141,63 +150,72 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if(sinfo->flags & XT_CONNTRACK_STATE) { + if (sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) + if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_SNAT; - - if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != - ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) + if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } - - if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) + if (FWINV((statebit & sinfo->statemask) == 0, + XT_CONNTRACK_STATE)) return 0; } - if(sinfo->flags & XT_CONNTRACK_PROTO) { - if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) - return 0; + if (ct == NULL) { + if (sinfo->flags & ~XT_CONNTRACK_STATE) + return 0; + return 1; } - if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_PROTO && + FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, + XT_CONNTRACK_PROTO)) + return 0; - if(sinfo->flags & XT_CONNTRACK_ORIGDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_ORIGSRC && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & + sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, + XT_CONNTRACK_ORIGSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_REPLSRC) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_ORIGDST && + FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & + sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != + sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, + XT_CONNTRACK_ORIGDST)) + return 0; - if(sinfo->flags & XT_CONNTRACK_REPLDST) { - if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_REPLSRC && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & + sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].src.ip, + XT_CONNTRACK_REPLSRC)) + return 0; - if(sinfo->flags & XT_CONNTRACK_STATUS) { - if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) - return 0; - } + if (sinfo->flags & XT_CONNTRACK_REPLDST && + FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & + sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != + sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, + XT_CONNTRACK_REPLDST)) + return 0; + + if (sinfo->flags & XT_CONNTRACK_STATUS && + FWINV((ct->status & sinfo->statusmask) == 0, + XT_CONNTRACK_STATUS)) + return 0; if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires; + unsigned long expires = timer_pending(&ct->timeout) ? + (ct->timeout.expires - jiffies)/HZ : 0; - if(!ct) - return 0; - - expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; - - if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) + if (FWINV(!(expires >= sinfo->expires_min && + expires <= sinfo->expires_max), + XT_CONNTRACK_EXPIRES)) return 0; } - return 1; } @@ -220,8 +238,7 @@ checkentry(const char *tablename, return 1; } -static void -destroy(const struct xt_match *match, void *matchinfo) +static void destroy(const struct xt_match *match, void *matchinfo) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); From 68e1f188de535865d4543bae92d168c007857e7b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:58:35 -0700 Subject: [PATCH 0692/1063] [NETFILTER]: ipt_TCPMSS: reformat - fix whitespace error - break lines at 80 characters - reformat some expressions to be more readable Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_TCPMSS.c | 58 ++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ac8a35eeea3f..bfc8d9c7d020 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -31,8 +31,10 @@ static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; - else return opt[offset+1]; + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; } static unsigned int @@ -55,7 +57,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; - tcph = (void *)iph + iph->ihl*4; /* Since it passed flags test in tcp match, we know it is is @@ -71,37 +72,39 @@ ipt_tcpmss_target(struct sk_buff **pskb, return NF_DROP; } - if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if(!(*pskb)->dst) { + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { + if (!(*pskb)->dst) { if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); + printk(KERN_ERR "ipt_tcpmss_target: " + "no dst?! can't determine path-MTU\n"); return NF_DROP; /* or IPT_CONTINUE ?? */ } - if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { + if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + + sizeof(struct tcphdr)) { if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); + printk(KERN_ERR "ipt_tcpmss_target: " + "unknown or invalid path-MTU (%d)\n", + dst_mtu((*pskb)->dst)); return NF_DROP; /* or IPT_CONTINUE ?? */ } - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); + newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - + sizeof(struct tcphdr); } else newmss = tcpmssinfo->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ - if ((opt[i] == TCPOPT_MSS) && - ((tcph->doff*4 - i) >= TCPOLEN_MSS) && - (opt[i+1] == TCPOLEN_MSS)) { + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && + opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - (oldmss <= newmss)) - return IPT_CONTINUE; + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + oldmss <= newmss) + return IPT_CONTINUE; opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); @@ -113,7 +116,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" "->%u.%u.%u.%u:%hu changed TCP MSS option" - " (from %u to %u)\n", + " (from %u to %u)\n", NIPQUAD((*pskb)->nh.iph->saddr), ntohs(tcph->source), NIPQUAD((*pskb)->nh.iph->daddr), @@ -193,9 +196,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m) { const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - if (strcmp(m->u.kernel.match->name, "tcp") == 0 - && (tcpinfo->flg_cmp & TH_SYN) - && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TH_SYN && + !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) return 1; return 0; @@ -212,11 +215,12 @@ ipt_tcpmss_checkentry(const char *tablename, const struct ipt_tcpmss_info *tcpmssinfo = targinfo; const struct ipt_entry *e = e_void; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - ((hook_mask & ~((1 << NF_IP_FORWARD) - | (1 << NF_IP_LOCAL_OUT) - | (1 << NF_IP_POST_ROUTING))) != 0)) { - printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP_FORWARD) | + (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING))) != 0) { + printk("TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); return 0; } From 2be344c4461d29b99113f62fa91c5ceab9997329 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:58:50 -0700 Subject: [PATCH 0693/1063] [NETFILTER]: ipt_TCPMSS: remove impossible condition Every skb must have a dst_entry at this point. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_TCPMSS.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index bfc8d9c7d020..b2d3c4f992d1 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -73,13 +73,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, } if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if (!(*pskb)->dst) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target: " - "no dst?! can't determine path-MTU\n"); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + sizeof(struct tcphdr)) { if (net_ratelimit()) From ecb70c95c45ece0935b076295388267f6d8db65c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:06 -0700 Subject: [PATCH 0694/1063] [NETFILTER]: ipt_TCPMSS: misc cleanup - remove debugging cruft - remove printk for reallocation failures - remove unused addition Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_TCPMSS.c | 36 ++------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index b2d3c4f992d1..4246c4321e5b 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -21,12 +21,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables TCP MSS modification module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { @@ -106,16 +100,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, htons(oldmss)^0xFFFF, htons(newmss), tcph->check, 0); - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu changed TCP MSS option" - " (from %u to %u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - oldmss, newmss); - goto retmodified; + return IPT_CONTINUE; } } @@ -127,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target:" - " unable to allocate larger skb\n"); + if (!newskb) return NF_DROP; - } - kfree_skb(*pskb); *pskb = newskb; iph = (*pskb)->nh.iph; @@ -149,8 +129,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, htons(tcplen) ^ 0xFFFF, htons(tcplen + TCPOLEN_MSS), tcph->check, 1); - tcplen += TCPOLEN_MSS; - opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; @@ -170,16 +148,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, newtotlen, iph->check); iph->tot_len = newtotlen; - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - newmss); - - retmodified: return IPT_CONTINUE; } From 57dab5d0bfee21663ed20222b4cedeb0655ba1f3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:25 -0700 Subject: [PATCH 0695/1063] [NETFILTER]: xt_limit: don't reset state on unrelated rule updates The limit match reinitializes its state whenever the ruleset changes, which means it will forget about previously used credits. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_limit.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index b9c9ff3a06ea..8bfcbdfa8783 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -122,16 +122,16 @@ ipt_limit_checkentry(const char *tablename, return 0; } - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ - r->cost = user2credits(r->avg); - /* For SMP, we only want to use one set of counters. */ r->master = r; - + if (r->cost == 0) { + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + r->prev = jiffies; + r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->cost = user2credits(r->avg); + } return 1; } From 9123de2c043996050bacf77031cad845f5976f5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:42 -0700 Subject: [PATCH 0696/1063] [NETFILTER]: ip6table_mangle: reroute when nfmark changes in NF_IP6_LOCAL_OUT Now that IPv6 supports policy routing we need to reroute in NF_IP6_LOCAL_OUT when the mark value changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 1 + include/net/ip6_route.h | 2 -- net/ipv6/netfilter/ip6table_mangle.c | 8 ++------ 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 52a7b9e76428..d97e268cdfe5 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -73,6 +73,7 @@ enum nf_ip6_hook_priorities { }; #ifdef CONFIG_NETFILTER +extern int ip6_route_me_harder(struct sk_buff *skb); extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 297909570041..6ca6b71dfe0f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -57,8 +57,6 @@ extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl); -extern int ip6_route_me_harder(struct sk_buff *skb); - extern void ip6_route_init(void); extern void ip6_route_cleanup(void); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 32db04fd8310..386ea260e767 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -180,12 +180,8 @@ ip6t_local_hook(unsigned int hook, && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) || (*pskb)->nfmark != nfmark - || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { - - /* something which could affect routing has changed */ - - DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); - } + || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) + return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; return ret; } From 90d47db4a06f93f7339618b2a4f0cb032ef8d6d5 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Wed, 20 Sep 2006 12:00:21 -0700 Subject: [PATCH 0697/1063] [NETFILTER]: x_tables: small check_entry & module_refcount cleanup While standard_target has target->me == NULL, module_put() should be called for it as for others, because there were try_module_get() before. Signed-off-by: Dmitry Mishin Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index aaeaa9ce0f28..85f0d73ebfb4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -485,7 +485,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto out; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a0f36806998c..38e1e4fba0db 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -573,7 +573,7 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto cleanup_matches; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73d477ce216b..4ab368fa0b8f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -610,7 +610,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ip6t_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto cleanup_matches; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, From 01f348484dd8509254d045e3ad49029716eca6a1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 20 Sep 2006 12:00:45 -0700 Subject: [PATCH 0698/1063] [NETFILTER]: ctnetlink: simplify the code to dump the conntrack table Merge the bits to dump the conntrack table and the ones to dump and zero counters in a single piece of code. This patch does not change the default behaviour if accounting is not enabled. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 63 ++++----------------- net/netfilter/nf_conntrack_netlink.c | 67 ++++------------------- 2 files changed, 20 insertions(+), 110 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index a20b0e385f1b..52eddea27e93 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -436,6 +436,11 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } +#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) + memset(&ct->counters, 0, sizeof(ct->counters)); +#endif } if (cb->args[1]) { cb->args[1] = 0; @@ -451,46 +456,6 @@ out: return skb->len; } -#ifdef CONFIG_IP_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack *ct = NULL; - struct ip_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { - h = (struct ip_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} -#endif - static const size_t cta_min_ip[CTA_IP_MAX] = { [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), [CTA_IP_V4_DST-1] = sizeof(u_int32_t), @@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (msg->nfgen_family != AF_INET) return -EAFNOSUPPORT; - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_IP_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else +#ifndef CONFIG_IP_NF_CT_ACCT + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) return -ENOTSUPP; #endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; - } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8cd85cfd9a02..1721f7c78c77 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -455,6 +455,11 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } +#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) + memset(&ct->counters, 0, sizeof(ct->counters)); +#endif } if (cb->args[1]) { cb->args[1] = 0; @@ -470,50 +475,6 @@ out: return skb->len; } -#ifdef CONFIG_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct nf_conn *ct = NULL; - struct nf_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&nf_conntrack_lock); - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { - h = (struct nf_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && L3PROTO(ct) != l3proto) - continue; - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&nf_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} -#endif - static inline int ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) { @@ -788,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { u32 rlen; - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else +#ifndef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) return -ENOTSUPP; #endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; - } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) From 5251e2d2125407bbff0c39394a4011be9ed8b5d0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 20 Sep 2006 12:01:06 -0700 Subject: [PATCH 0699/1063] [NETFILTER]: conntrack: fix race condition in early_drop On SMP environments the maximum number of conntracks can be overpassed under heavy stress situations due to an existing race condition. CPU A CPU B atomic_read() ... early_drop() ... ... atomic_read() allocate conntrack allocate conntrack atomic_inc() atomic_inc() This patch moves the counter incrementation before the early drop stage. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 9 ++++++--- net/netfilter/nf_conntrack_core.c | 10 ++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 2568d480e9a9..422a662194cc 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -622,11 +622,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, ip_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&ip_conntrack_count); + if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { + atomic_dec(&ip_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -638,6 +642,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); + atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -651,8 +656,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - atomic_inc(&ip_conntrack_count); - return conntrack; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 927137b8b3b5..adeafa2cc339 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -848,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, nf_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&nf_conntrack_count); + if (nf_conntrack_max - && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { + && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&nf_conntrack_hash[hash])) { + atomic_dec(&nf_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -903,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + read_unlock_bh(&nf_ct_cache_lock); - atomic_inc(&nf_conntrack_count); + return conntrack; out: read_unlock_bh(&nf_ct_cache_lock); + atomic_dec(&nf_conntrack_count); return conntrack; } From ca39df6cdfbe2ea210e31117f5d469576cfe9008 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:01:34 -0700 Subject: [PATCH 0700/1063] [NETFILTER]: ipt_TTL: fix checksum update bug Fix regression introduced by the incremental checksum patches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_TTL.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 214d9d9c428f..96e79cc6d0f2 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -54,8 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - iph->check = nf_csum_update((iph->ttl << 8) ^ 0xFFFF, - new_ttl << 8, + iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF, + ntohs(new_ttl << 8), iph->check); iph->ttl = new_ttl; } From 7cf73936fe6bb9b027b75fd8fa3c634fe74843d3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:02:21 -0700 Subject: [PATCH 0701/1063] [NETFILTER]: ip6t_HL: remove write-only variable Noticed by Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6t_HL.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index e54ea92d107b..435750f664dd 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -26,7 +26,6 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; - u_int16_t diffs[2]; int new_hl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, break; } - if (new_hl != ip6h->hop_limit) { - diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + if (new_hl != ip6h->hop_limit) ip6h->hop_limit = new_hl; - diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); - } return IP6T_CONTINUE; } From 71cd83a8bde61612b277fd5bf91503ac1ad61e23 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 20 Sep 2006 12:02:44 -0700 Subject: [PATCH 0702/1063] [NETFILTER]: xt_policy: remove dups in .family sparse "defined twice" warning Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_policy.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index e9d81378d653..46bde2b1e1e0 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -171,7 +171,6 @@ static struct xt_match xt_policy_match[] = { .checkentry = checkentry, .match = match, .matchsize = sizeof(struct xt_policy_info), - .family = AF_INET, .me = THIS_MODULE, }, { @@ -180,7 +179,6 @@ static struct xt_match xt_policy_match[] = { .checkentry = checkentry, .match = match, .matchsize = sizeof(struct xt_policy_info), - .family = AF_INET6, .me = THIS_MODULE, }, }; From c1fe3ca5106d9568791433fa6c7f27e71ac69e1b Mon Sep 17 00:00:00 2001 From: George Hansper Date: Wed, 20 Sep 2006 12:03:23 -0700 Subject: [PATCH 0703/1063] [NETFILTER]: TCP conntrack: improve dead connection detection Don't count window updates as retransmissions. Signed-off-by: George Hansper Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 1 + net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 +++- net/netfilter/nf_conntrack_proto_tcp.c | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index b2feeffde384..6b01ba297727 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -49,6 +49,7 @@ struct ip_ct_tcp u_int32_t last_seq; /* Last sequence number seen in dir */ u_int32_t last_ack; /* Last sequence number seen in opposite dir */ u_int32_t last_end; /* Last seq + len */ + u_int16_t last_win; /* Last window advertisement seen in dir */ }; #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 75a7237eb8c1..03ae9a04cb37 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9fc0ee61f92a..238bbb5b72ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -688,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } From 1192e403e9ea2dc23bbbe2b4fe9bdbc47e8c6056 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 20 Sep 2006 12:03:46 -0700 Subject: [PATCH 0704/1063] [NETFILTER]: make some netfilter globals __read_mostly Signed-off-by: Brian Haley Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_core.c | 6 +++--- net/ipv4/netfilter/ip_queue.c | 8 ++++---- net/ipv6/netfilter/ip6_queue.c | 8 ++++---- net/netfilter/nf_conntrack_core.c | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 422a662194cc..2b6f24fc727e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -63,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size __read_mostly = 0; int ip_conntrack_max __read_mostly; -struct list_head *ip_conntrack_hash; +struct list_head *ip_conntrack_hash __read_mostly; static kmem_cache_t *ip_conntrack_cachep __read_mostly; static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc; +static int ip_conntrack_vmalloc __read_mostly; static unsigned int ip_conntrack_next_id; static unsigned int ip_conntrack_expect_next_id; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 80060cbe4a07..7edad790478a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -52,15 +52,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index d322e8395794..9510c24ca8d2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -56,15 +56,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index adeafa2cc339..093b3ddc513c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -73,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0); void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; LIST_HEAD(nf_conntrack_expect_list); -struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; -struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; +struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly; static LIST_HEAD(helpers); unsigned int nf_conntrack_htable_size __read_mostly = 0; int nf_conntrack_max __read_mostly; -struct list_head *nf_conntrack_hash; -static kmem_cache_t *nf_conntrack_expect_cachep; +struct list_head *nf_conntrack_hash __read_mostly; +static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; struct nf_conn nf_conntrack_untracked; unsigned int nf_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc; +static int nf_conntrack_vmalloc __read_mostly; static unsigned int nf_conntrack_next_id; static unsigned int nf_conntrack_expect_next_id; From bec71b162747708d4b45b0cd399b484f52f2901a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:04:08 -0700 Subject: [PATCH 0705/1063] [NETFILTER]: ip_tables: fix module refcount leaks in compat error paths Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_tables.c | 39 +++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 38e1e4fba0db..3d5d4a4640c3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1529,7 +1529,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto out; + goto cleanup_matches; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1539,7 +1539,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto out; + goto cleanup_matches; } t->u.kernel.target = target; @@ -1566,14 +1566,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, (*i)++; return 0; + out: + module_put(t->u.kernel.target->me); +cleanup_matches: IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) + const struct ipt_ip *ip, unsigned int hookmask, int *i) { struct ipt_entry_match *dm; struct ipt_match *match; @@ -1590,16 +1593,22 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, name, hookmask, ip->proto, ip->invflags & IPT_INV_PROTO); if (ret) - return ret; + goto err; if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); - return -EINVAL; + ret = -EINVAL; + goto err; } + (*i)++; return 0; + +err: + module_put(m->u.kernel.match->me); + return ret; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1610,18 +1619,19 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct ipt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int ret, h, j; ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); + j = 0; *dstptr += sizeof(struct compat_ipt_entry); ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, - name, &de->ip, de->comefrom); + name, &de->ip, de->comefrom, &j); if (ret) - goto out; + goto cleanup_matches; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; @@ -1644,21 +1654,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, name, e->comefrom, e->ip.proto, e->ip.invflags & IPT_INV_PROTO); if (ret) - goto out; + goto err; ret = -EINVAL; if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, *size)) - goto out; + goto err; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, t->data, de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); - goto out; + goto err; } ret = 0; -out: + return ret; + +err: + module_put(t->u.kernel.target->me); +cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } From 79030ed07de673e8451a03aecb9ada9f4d75d491 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:05:08 -0700 Subject: [PATCH 0706/1063] [NETFILTER]: ip_tables: revision support for compat code Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_tables.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3d5d4a4640c3..673581db986e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1994,6 +1994,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) return ret; } +static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); + static int compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { @@ -2007,8 +2009,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = compat_get_entries(user, len); break; default: - duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); - ret = -EINVAL; + ret = do_ipt_get_ctl(sk, cmd, user, len); } return ret; } From 9fa492cdc160cd27ce1046cb36f47d3b2b1efa21 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:05:37 -0700 Subject: [PATCH 0707/1063] [NETFILTER]: x_tables: simplify compat API Split the xt_compat_match/xt_compat_target into smaller type-safe functions performing just one operation. Handle all alignment and size-related conversions centrally in these function instead of requiring each module to implement a full-blown conversion function. Replace ->compat callback by ->compat_from_user and ->compat_to_user callbacks, responsible for converting just a single private structure. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 29 +++-- net/ipv4/netfilter/ip_tables.c | 117 ++++------------- net/netfilter/x_tables.c | 200 ++++++++++++++++------------- 3 files changed, 156 insertions(+), 190 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c832295dbf61..739a98eebe2c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -138,12 +138,6 @@ struct xt_counters_info #include -#ifdef CONFIG_COMPAT -#define COMPAT_TO_USER 1 -#define COMPAT_FROM_USER -1 -#define COMPAT_CALC_SIZE 0 -#endif - struct xt_match { struct list_head list; @@ -176,7 +170,8 @@ struct xt_match void (*destroy)(const struct xt_match *match, void *matchinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *match, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -186,6 +181,7 @@ struct xt_match char *table; unsigned int matchsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -224,13 +220,15 @@ struct xt_target void (*destroy)(const struct xt_target *target, void *targinfo); /* Called when userspace align differs from kernel space one */ - int (*compat)(void *target, void **dstptr, int *size, int convert); + void (*compat_from_user)(void *dst, void *src); + int (*compat_to_user)(void __user *dst, void *src); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int targetsize; + unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -387,9 +385,18 @@ struct compat_xt_counters_info extern void xt_compat_lock(int af); extern void xt_compat_unlock(int af); -extern int xt_compat_match(void *match, void **dstptr, int *size, int convert); -extern int xt_compat_target(void *target, void **dstptr, int *size, - int convert); + +extern int xt_compat_match_offset(struct xt_match *match); +extern void xt_compat_match_from_user(struct xt_entry_match *m, + void **dstptr, int *size); +extern int xt_compat_match_to_user(struct xt_entry_match *m, + void * __user *dstptr, int *size); + +extern int xt_compat_target_offset(struct xt_target *target); +extern void xt_compat_target_from_user(struct xt_entry_target *t, + void **dstptr, int *size); +extern int xt_compat_target_to_user(struct xt_entry_target *t, + void * __user *dstptr, int *size); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 673581db986e..800067d69a9a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -942,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset) return delta; } -struct compat_ipt_standard_target +static void compat_standard_from_user(void *dst, void *src) { - struct compat_xt_entry_target target; - compat_int_t verdict; -}; + int v = *(compat_int_t *)src; -struct compat_ipt_standard + if (v > 0) + v += compat_calc_jump(v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, void *src) { - struct compat_ipt_entry entry; - struct compat_ipt_standard_target target; -}; + compat_int_t cv = *(int *)src; -#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) -#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) -#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) - -static int compat_ipt_standard_fn(void *target, - void **dstptr, int *size, int convert) -{ - struct compat_ipt_standard_target compat_st, *pcompat_st; - struct ipt_standard_target st, *pst; - int ret; - - ret = 0; - switch (convert) { - case COMPAT_TO_USER: - pst = target; - memcpy(&compat_st.target, &pst->target, - sizeof(compat_st.target)); - compat_st.verdict = pst->verdict; - if (compat_st.verdict > 0) - compat_st.verdict -= - compat_calc_jump(compat_st.verdict); - compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; - if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) - ret = -EFAULT; - *size -= IPT_ST_OFFSET; - *dstptr += IPT_ST_COMPAT_LEN; - break; - case COMPAT_FROM_USER: - pcompat_st = target; - memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); - st.verdict = pcompat_st->verdict; - if (st.verdict > 0) - st.verdict += compat_calc_jump(st.verdict); - st.target.u.user.target_size = IPT_ST_LEN; - memcpy(*dstptr, &st, IPT_ST_LEN); - *size += IPT_ST_OFFSET; - *dstptr += IPT_ST_LEN; - break; - case COMPAT_CALC_SIZE: - *size += IPT_ST_OFFSET; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + if (cv > 0) + cv -= compat_calc_jump(cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static inline int compat_calc_match(struct ipt_entry_match *m, int * size) { - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(m->u.kernel.match); return 0; } @@ -1023,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, entry_offset = (void *)e - base; IPT_MATCH_ITERATE(e, compat_calc_match, &off); t = ipt_get_target(e); - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1412,17 +1364,13 @@ struct compat_ipt_replace { }; static inline int compat_copy_match_to_user(struct ipt_entry_match *m, - void __user **dstptr, compat_uint_t *size) + void * __user *dstptr, compat_uint_t *size) { - if (m->u.kernel.match->compat) - return m->u.kernel.match->compat(m, dstptr, size, - COMPAT_TO_USER); - else - return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); + return xt_compat_match_to_user(m, dstptr, size); } static int compat_copy_entry_to_user(struct ipt_entry *e, - void __user **dstptr, compat_uint_t *size) + void * __user *dstptr, compat_uint_t *size) { struct ipt_entry_target __user *t; struct compat_ipt_entry __user *ce; @@ -1442,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, if (ret) goto out; t = ipt_get_target(e); - if (t->u.kernel.target->compat) - ret = t->u.kernel.target->compat(t, dstptr, size, - COMPAT_TO_USER); - else - ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); + ret = xt_compat_target_to_user(t, dstptr, size); if (ret) goto out; ret = -EFAULT; @@ -1478,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m, return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(match); (*i)++; return 0; @@ -1543,10 +1483,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, } t->u.kernel.target = target; - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(target); *size += off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1584,10 +1521,7 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, dm = (struct ipt_entry_match *)*dstptr; match = m->u.kernel.match; - if (match->compat) - match->compat(m, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); + xt_compat_match_from_user(m, dstptr, size); ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), name, hookmask, ip->proto, @@ -1635,10 +1569,7 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; - if (target->compat) - target->compat(t, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); + xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_IP_NUMHOOKS; h++) { @@ -2205,7 +2136,9 @@ static struct ipt_target ipt_standard_target = { .targetsize = sizeof(int), .family = AF_INET, #ifdef CONFIG_COMPAT - .compat = &compat_ipt_standard_fn, + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, #endif }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index be7baf4f6846..58522fc65d33 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -333,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_match(void *match, void **dstptr, int *size, int convert) +int xt_compat_match_offset(struct xt_match *match) { - struct xt_match *m; - struct compat_xt_entry_match *pcompat_m; - struct xt_entry_match *pm; - u_int16_t msize; - int off, ret; - - ret = 0; - m = ((struct xt_entry_match *)match)->u.kernel.match; - off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); - switch (convert) { - case COMPAT_TO_USER: - pm = (struct xt_entry_match *)match; - msize = pm->u.user.match_size; - if (copy_to_user(*dstptr, pm, msize)) { - ret = -EFAULT; - break; - } - msize -= off; - if (put_user(msize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += msize; - break; - case COMPAT_FROM_USER: - pcompat_m = (struct compat_xt_entry_match *)match; - pm = (struct xt_entry_match *)*dstptr; - msize = pcompat_m->u.user.match_size; - memcpy(pm, pcompat_m, msize); - msize += off; - pm->u.user.match_size = msize; - *size += off; - *dstptr += msize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + u_int16_t csize = match->compatsize ? : match->matchsize; + return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); } -EXPORT_SYMBOL_GPL(xt_compat_match); -#endif +EXPORT_SYMBOL_GPL(xt_compat_match_offset); + +void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; + int pad, off = xt_compat_match_offset(match); + u_int16_t msize = cm->u.user.match_size; + + m = *dstptr; + memcpy(m, cm, sizeof(*cm)); + if (match->compat_from_user) + match->compat_from_user(m->data, cm->data); + else + memcpy(m->data, cm->data, msize - sizeof(*cm)); + pad = XT_ALIGN(match->matchsize) - match->matchsize; + if (pad > 0) + memset(m->data + match->matchsize, 0, pad); + + msize += off; + m->u.user.match_size = msize; + + *size += off; + *dstptr += msize; +} +EXPORT_SYMBOL_GPL(xt_compat_match_from_user); + +int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, + int *size) +{ + struct xt_match *match = m->u.kernel.match; + struct compat_xt_entry_match __user *cm = *dstptr; + int off = xt_compat_match_offset(match); + u_int16_t msize = m->u.user.match_size - off; + + if (copy_to_user(cm, m, sizeof(*cm)) || + put_user(msize, &cm->u.user.match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user((void __user *)cm->data, m->data)) + return -EFAULT; + } else { + if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) + return -EFAULT; + } + + *size -= off; + *dstptr += msize; + return 0; +} +EXPORT_SYMBOL_GPL(xt_compat_match_to_user); +#endif /* CONFIG_COMPAT */ int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, @@ -410,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_target); #ifdef CONFIG_COMPAT -int xt_compat_target(void *target, void **dstptr, int *size, int convert) +int xt_compat_target_offset(struct xt_target *target) { - struct xt_target *t; - struct compat_xt_entry_target *pcompat; - struct xt_entry_target *pt; - u_int16_t tsize; - int off, ret; - - ret = 0; - t = ((struct xt_entry_target *)target)->u.kernel.target; - off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); - switch (convert) { - case COMPAT_TO_USER: - pt = (struct xt_entry_target *)target; - tsize = pt->u.user.target_size; - if (copy_to_user(*dstptr, pt, tsize)) { - ret = -EFAULT; - break; - } - tsize -= off; - if (put_user(tsize, (u_int16_t *)*dstptr)) - ret = -EFAULT; - *size -= off; - *dstptr += tsize; - break; - case COMPAT_FROM_USER: - pcompat = (struct compat_xt_entry_target *)target; - pt = (struct xt_entry_target *)*dstptr; - tsize = pcompat->u.user.target_size; - memcpy(pt, pcompat, tsize); - tsize += off; - pt->u.user.target_size = tsize; - *size += off; - *dstptr += tsize; - break; - case COMPAT_CALC_SIZE: - *size += off; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + u_int16_t csize = target->compatsize ? : target->targetsize; + return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); } -EXPORT_SYMBOL_GPL(xt_compat_target); +EXPORT_SYMBOL_GPL(xt_compat_target_offset); + +void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; + int pad, off = xt_compat_target_offset(target); + u_int16_t tsize = ct->u.user.target_size; + + t = *dstptr; + memcpy(t, ct, sizeof(*ct)); + if (target->compat_from_user) + target->compat_from_user(t->data, ct->data); + else + memcpy(t->data, ct->data, tsize - sizeof(*ct)); + pad = XT_ALIGN(target->targetsize) - target->targetsize; + if (pad > 0) + memset(t->data + target->targetsize, 0, pad); + + tsize += off; + t->u.user.target_size = tsize; + + *size += off; + *dstptr += tsize; +} +EXPORT_SYMBOL_GPL(xt_compat_target_from_user); + +int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, + int *size) +{ + struct xt_target *target = t->u.kernel.target; + struct compat_xt_entry_target __user *ct = *dstptr; + int off = xt_compat_target_offset(target); + u_int16_t tsize = t->u.user.target_size - off; + + if (copy_to_user(ct, t, sizeof(*ct)) || + put_user(tsize, &ct->u.user.target_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user((void __user *)ct->data, t->data)) + return -EFAULT; + } else { + if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) + return -EFAULT; + } + + *size -= off; + *dstptr += tsize; + return 0; +} +EXPORT_SYMBOL_GPL(xt_compat_target_to_user); #endif struct xt_table_info *xt_alloc_table_info(unsigned int size) From bc80b656657251fc936d2d93fc70d5566c1c7d29 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:05:54 -0700 Subject: [PATCH 0708/1063] [NETFILTER]: xt_mark: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_mark.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index e8059cd17275..934dddfbcd23 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -50,6 +50,37 @@ checkentry(const char *tablename, return 1; } +#ifdef CONFIG_COMPAT +struct compat_xt_mark_info { + compat_ulong_t mark, mask; + u_int8_t invert; + u_int8_t __pad1; + u_int16_t __pad2; +}; + +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_mark_info *cm = src; + struct xt_mark_info m = { + .mark = cm->mark, + .mask = cm->mask, + .invert = cm->invert, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_mark_info *m = src; + struct compat_xt_mark_info cm = { + .mark = m->mark, + .mask = m->mask, + .invert = m->invert, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + static struct xt_match xt_mark_match[] = { { .name = "mark", @@ -57,6 +88,11 @@ static struct xt_match xt_mark_match[] = { .checkentry = checkentry, .match = match, .matchsize = sizeof(struct xt_mark_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_mark_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .me = THIS_MODULE, }, { From be7263b7b72ed9d5d25958f2b71e77e889e4845a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:06:10 -0700 Subject: [PATCH 0709/1063] [NETFILTER]: xt_MARK: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_MARK.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 782f8d8c3edf..c6e860a7114f 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -108,6 +108,35 @@ checkentry_v1(const char *tablename, return 1; } +#ifdef CONFIG_COMPAT +struct compat_xt_mark_target_info_v1 { + compat_ulong_t mark; + u_int8_t mode; + u_int8_t __pad1; + u_int16_t __pad2; +}; + +static void compat_from_user_v1(void *dst, void *src) +{ + struct compat_xt_mark_target_info_v1 *cm = src; + struct xt_mark_target_info_v1 m = { + .mark = cm->mark, + .mode = cm->mode, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user_v1(void __user *dst, void *src) +{ + struct xt_mark_target_info_v1 *m = src; + struct compat_xt_mark_target_info_v1 cm = { + .mark = m->mark, + .mode = m->mode, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + static struct xt_target xt_mark_target[] = { { .name = "MARK", @@ -126,6 +155,11 @@ static struct xt_target xt_mark_target[] = { .checkentry = checkentry_v1, .target = target_v1, .targetsize = sizeof(struct xt_mark_target_info_v1), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_mark_target_info_v1), + .compat_from_user = compat_from_user_v1, + .compat_to_user = compat_to_user_v1, +#endif .table = "mangle", .me = THIS_MODULE, }, From f1eda05386ade8dad4e8e9b48ecbd9432b6739d9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:06:25 -0700 Subject: [PATCH 0710/1063] [NETFILTER]: xt_connmark: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connmark.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index c9104d05a19c..92a5726ef237 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -81,6 +81,37 @@ destroy(const struct xt_match *match, void *matchinfo) #endif } +#ifdef CONFIG_COMPAT +struct compat_xt_connmark_info { + compat_ulong_t mark, mask; + u_int8_t invert; + u_int8_t __pad1; + u_int16_t __pad2; +}; + +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_connmark_info *cm = src; + struct xt_connmark_info m = { + .mark = cm->mark, + .mask = cm->mask, + .invert = cm->invert, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_connmark_info *m = src; + struct compat_xt_connmark_info cm = { + .mark = m->mark, + .mask = m->mask, + .invert = m->invert, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + static struct xt_match xt_connmark_match[] = { { .name = "connmark", @@ -89,6 +120,11 @@ static struct xt_match xt_connmark_match[] = { .match = match, .destroy = destroy, .matchsize = sizeof(struct xt_connmark_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_connmark_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .me = THIS_MODULE }, { From 7ce975b9da93b46dbf6ba70a1b4751bec211d079 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:06:40 -0700 Subject: [PATCH 0711/1063] [NETFILTER]: xt_CONNMARK: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_CONNMARK.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 6ccb45ee0880..c01524f817f0 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -108,6 +108,37 @@ checkentry(const char *tablename, return 1; } +#ifdef CONFIG_COMPAT +struct compat_xt_connmark_target_info { + compat_ulong_t mark, mask; + u_int8_t mode; + u_int8_t __pad1; + u_int16_t __pad2; +}; + +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_connmark_target_info *cm = src; + struct xt_connmark_target_info m = { + .mark = cm->mark, + .mask = cm->mask, + .mode = cm->mode, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_connmark_target_info *m = src; + struct compat_xt_connmark_target_info cm = { + .mark = m->mark, + .mask = m->mask, + .mode = m->mode, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + static struct xt_target xt_connmark_target[] = { { .name = "CONNMARK", @@ -115,6 +146,11 @@ static struct xt_target xt_connmark_target[] = { .checkentry = checkentry, .target = target, .targetsize = sizeof(struct xt_connmark_target_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_connmark_target_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .me = THIS_MODULE }, { From 02c63cf777c331121bfb6e9c1440a9835ad2f2a8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:07:06 -0700 Subject: [PATCH 0712/1063] [NETFILTER]: xt_limit: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_limit.c | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 8bfcbdfa8783..fda7b7dec27d 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -135,6 +135,50 @@ ipt_limit_checkentry(const char *tablename, return 1; } +#ifdef CONFIG_COMPAT +struct compat_xt_rateinfo { + u_int32_t avg; + u_int32_t burst; + + compat_ulong_t prev; + u_int32_t credit; + u_int32_t credit_cap, cost; + + u_int32_t master; +}; + +/* To keep the full "prev" timestamp, the upper 32 bits are stored in the + * master pointer, which does not need to be preserved. */ +static void compat_from_user(void *dst, void *src) +{ + struct compat_xt_rateinfo *cm = src; + struct xt_rateinfo m = { + .avg = cm->avg, + .burst = cm->burst, + .prev = cm->prev | (unsigned long)cm->master << 32, + .credit = cm->credit, + .credit_cap = cm->credit_cap, + .cost = cm->cost, + }; + memcpy(dst, &m, sizeof(m)); +} + +static int compat_to_user(void __user *dst, void *src) +{ + struct xt_rateinfo *m = src; + struct compat_xt_rateinfo cm = { + .avg = m->avg, + .burst = m->burst, + .prev = m->prev, + .credit = m->credit, + .credit_cap = m->credit_cap, + .cost = m->cost, + .master = m->prev >> 32, + }; + return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; +} +#endif /* CONFIG_COMPAT */ + static struct xt_match xt_limit_match[] = { { .name = "limit", @@ -142,6 +186,11 @@ static struct xt_match xt_limit_match[] = { .checkentry = ipt_limit_checkentry, .match = ipt_limit_match, .matchsize = sizeof(struct xt_rateinfo), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_xt_rateinfo), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .me = THIS_MODULE, }, { From 127f15dd659b20e722561ff8c86dc058e1a72323 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:07:23 -0700 Subject: [PATCH 0713/1063] [NETFILTER]: ipt_hashlimit: add compat conversion functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_hashlimit.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index b5b74b07370c..4f73a61aa3dd 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -535,10 +535,39 @@ hashlimit_destroy(const struct xt_match *match, void *matchinfo) htable_put(r->hinfo); } +#ifdef CONFIG_COMPAT +struct compat_ipt_hashlimit_info { + char name[IFNAMSIZ]; + struct hashlimit_cfg cfg; + compat_uptr_t hinfo; + compat_uptr_t master; +}; + +static void compat_from_user(void *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + memcpy(dst, src, off); + memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); +} + +static int compat_to_user(void __user *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + return copy_to_user(dst, src, off) ? -EFAULT : 0; +} +#endif + static struct ipt_match ipt_hashlimit = { .name = "hashlimit", .match = hashlimit_match, .matchsize = sizeof(struct ipt_hashlimit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_hashlimit_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .checkentry = hashlimit_checkentry, .destroy = hashlimit_destroy, .me = THIS_MODULE From edd5a329cf69c112882e03c8ab55e985062a5d2a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:07:39 -0700 Subject: [PATCH 0714/1063] [NETFILTER]: PPTP conntrack: fix whitespace errors Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_pptp.h | 26 +++--- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 76 ++++++++--------- net/ipv4/netfilter/ip_conntrack_proto_gre.c | 28 +++---- net/ipv4/netfilter/ip_nat_helper_pptp.c | 84 +++++++++---------- net/ipv4/netfilter/ip_nat_proto_gre.c | 20 ++--- 5 files changed, 117 insertions(+), 117 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 816144c75de0..88f66d3c8765 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -285,19 +285,19 @@ struct PptpSetLinkInfo { }; union pptp_ctrl_union { - struct PptpStartSessionRequest sreq; - struct PptpStartSessionReply srep; - struct PptpStopSessionRequest streq; - struct PptpStopSessionReply strep; - struct PptpOutCallRequest ocreq; - struct PptpOutCallReply ocack; - struct PptpInCallRequest icreq; - struct PptpInCallReply icack; - struct PptpInCallConnected iccon; - struct PptpClearCallRequest clrreq; - struct PptpCallDisconnectNotify disc; - struct PptpWanErrorNotify wanerr; - struct PptpSetLinkInfo setlink; + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; }; extern int diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index b020a33e65e9..6c94dd5d476c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -20,11 +20,11 @@ * - We can only support one single call within each session * * TODO: - * - testing of incoming PPTP calls + * - testing of incoming PPTP calls * - * Changes: + * Changes: * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from + * - Call ip_conntrack_unexpect_related() from * pptp_destroy_siblings() to destroy expectations in case * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen * (Philip Craig ) @@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, invert_tuplepr(&inv_t, &exp->tuple); DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - + exp_other = ip_conntrack_expect_find(&inv_t); if (exp_other) { /* delete other expectation. */ @@ -194,7 +194,7 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; - /* Since ct->sibling_list has literally rusted away in 2.6.11, + /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ @@ -264,7 +264,7 @@ exp_gre(struct ip_conntrack *master, exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; - + exp_orig->master = master; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; @@ -322,7 +322,7 @@ out_unexpect_orig: goto out_put_both; } -static inline int +static inline int pptp_inbound_pkt(struct sk_buff **pskb, struct tcphdr *tcph, unsigned int nexthdr_off, @@ -336,7 +336,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 *cid, *pcid; - u_int32_t seq; + u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) { @@ -373,7 +373,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, } if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; - else + else info->sstate = PPTP_SESSION_ERROR; break; @@ -420,22 +420,22 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->ocack.peersCallID; info->pac_call_id = ntohs(*cid); - + if (htons(info->pns_call_id) != *pcid) { DEBUGP("%s for unknown callid %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], ntohs(*cid), ntohs(*pcid)); - + info->cstate = PPTP_CALL_OUT_CONF; seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader) + ((void *)pcid - (void *)pptpReq); - + if (exp_gre(ct, seq, *cid, *pcid) != 0) printk("ip_conntrack_pptp: error during exp_gre\n"); break; @@ -479,7 +479,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, cid = &info->pac_call_id; if (info->pns_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown CallID %u\n", + DEBUGP("%s for unknown CallID %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } @@ -491,7 +491,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader) + ((void *)pcid - (void *)pptpReq); - + if (exp_gre(ct, seq, *cid, *pcid) != 0) printk("ip_conntrack_pptp: error during exp_gre\n"); @@ -554,7 +554,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); datalen -= sizeof(_ctlh); - + reqlen = datalen; if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); @@ -606,7 +606,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* client answers incoming call */ if (info->cstate != PPTP_CALL_IN_REQ && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", + DEBUGP("%s without incall_req\n", pptp_msg_name[msg]); break; } @@ -616,7 +616,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, } pcid = &pptpReq->icack.peersCallID; if (info->pac_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown call %u\n", + DEBUGP("%s for unknown call %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; } @@ -644,12 +644,12 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0], msg); /* unknown: no need to create GRE masq table entry */ break; } - + if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); @@ -659,7 +659,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* track caller id inside control connection, call expect_related */ -static int +static int conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) @@ -676,12 +676,12 @@ conntrack_pptp_help(struct sk_buff **pskb, int ret; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { DEBUGP("ctinfo = %u, skipping\n", ctinfo); return NF_ACCEPT; } - + nexthdr_off = (*pskb)->nh.iph->ihl*4; tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); @@ -735,28 +735,28 @@ conntrack_pptp_help(struct sk_buff **pskb, } /* control protocol helper */ -static struct ip_conntrack_helper pptp = { +static struct ip_conntrack_helper pptp = { .list = { NULL, NULL }, - .name = "pptp", + .name = "pptp", .me = THIS_MODULE, .max_expected = 2, .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, .protonum = IPPROTO_TCP - } + } }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, - .protonum = 0xff - } + .protonum = 0xff + } }, .help = conntrack_pptp_help }; @@ -768,7 +768,7 @@ extern int __init ip_ct_proto_gre_init(void); static int __init ip_conntrack_helper_pptp_init(void) { int retcode; - + retcode = ip_ct_proto_gre_init(); if (retcode < 0) return retcode; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 92c6d8b178c9..5fe026f467d3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,15 +1,15 @@ /* - * ip_conntrack_proto_gre.c - Version 3.0 + * ip_conntrack_proto_gre.c - Version 3.0 * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -61,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) #endif - + /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -88,7 +88,7 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) } } read_unlock_bh(&ip_ct_gre_lock); - + DEBUGP("lookup src key 0x%x up key for ", key); DUMP_TUPLE_GRE(t); @@ -107,7 +107,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, return -1; } - if (!reply) + if (!reply) exist_km = &ct->help.ct_pptp_info.keymap_orig; else exist_km = &ct->help.ct_pptp_info.keymap_reply; @@ -118,7 +118,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, if (gre_key_cmpfn(km, t) && km == *exist_km) return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", + DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; } @@ -152,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) write_lock_bh(&ip_ct_gre_lock); if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", + DEBUGP("removing %p from list\n", ct->help.ct_pptp_info.keymap_orig); list_del(&ct->help.ct_pptp_info.keymap_orig->list); kfree(ct->help.ct_pptp_info.keymap_orig); @@ -220,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, static int gre_print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); } @@ -250,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct, } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); - + return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static int gre_new(struct ip_conntrack *ct, const struct sk_buff *skb) -{ +{ DEBUGP(": "); DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); @@ -283,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct) } /* protocol helper struct */ -static struct ip_conntrack_protocol gre = { +static struct ip_conntrack_protocol gre = { .proto = IPPROTO_GRE, - .name = "gre", + .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, @@ -323,7 +323,7 @@ void ip_ct_proto_gre_fini(void) } write_unlock_bh(&ip_ct_gre_lock); - ip_conntrack_protocol_unregister(&gre); + ip_conntrack_protocol_unregister(&gre); } EXPORT_SYMBOL(ip_ct_gre_keymap_add); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 1d149964dc38..5dde1da1c300 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -32,7 +32,7 @@ * 2005-06-10 - Version 3.0 * - kernel >= 2.6.11 version, * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * + * */ #include @@ -93,10 +93,10 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = + t.src.u.gre.key = htons(master->nat.help.nat_pptp_info.pns_call_id); t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = + t.dst.u.gre.key = htons(master->nat.help.nat_pptp_info.pac_call_id); t.dst.protonum = IPPROTO_GRE; } @@ -153,47 +153,47 @@ pptp_outbound_pkt(struct sk_buff **pskb, unsigned int cid_off; new_callid = htons(ct_pptp_info->pns_call_id); - + switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icreq.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; } /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass @@ -216,9 +216,9 @@ static int pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = + struct ip_ct_pptp_master *ct_pptp_info = &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = + struct ip_nat_pptp *nat_pptp_info = &expect_orig->master->nat.help.nat_pptp_info; struct ip_conntrack *ct = expect_orig->master; @@ -324,7 +324,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0]); /* fall through */ diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 70a65372225a..a5226691f02c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -6,10 +6,10 @@ * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -60,7 +60,7 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, } /* generate unique tuple ... */ -static int +static int gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, enum ip_nat_manip_type maniptype, @@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + DEBUGP("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *) greh; - /* we only have destination manip of a packet, since 'source key' + /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype == IP_NAT_MANIP_DST) { /* key manipulation is always dest */ @@ -129,7 +129,7 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = + *(gre_csum(greh)) = nf_proto_csum_update(*pskb, ~*(gre_key(greh)), tuple->dst.u.gre.key, @@ -138,7 +138,7 @@ gre_manip_pkt(struct sk_buff **pskb, *(gre_key(greh)) = tuple->dst.u.gre.key; break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; @@ -152,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb, } /* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", +static struct ip_nat_protocol gre = { + .name = "GRE", .protonum = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, @@ -164,7 +164,7 @@ static struct ip_nat_protocol gre = { .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; - + int __init ip_nat_proto_gre_init(void) { return ip_nat_protocol_register(&gre); From 955b944293dd4c931ec866ebe19a6b2463b8f9a0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:03 -0700 Subject: [PATCH 0715/1063] [NETFILTER]: PPTP conntrack: get rid of unnecessary byte order conversions The conntrack structure contains the call ID in host byte order for no reason, get rid of back and forth conversions. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_pptp.h | 8 ++-- .../netfilter_ipv4/ip_conntrack_proto_gre.h | 22 +++++----- include/linux/netfilter_ipv4/ip_nat_pptp.h | 4 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 22 +++++----- net/ipv4/netfilter/ip_nat_helper_pptp.c | 42 +++++++++---------- net/ipv4/netfilter/ip_nat_proto_gre.c | 2 +- 6 files changed, 50 insertions(+), 50 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 88f66d3c8765..0d35623f9453 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -31,8 +31,8 @@ struct ip_ct_pptp_master { /* everything below is going to be per-expectation in newnat, * since there could be more than one call within one session */ enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC, host byte order */ - u_int16_t pns_call_id; /* call id of PNS, host byte order */ + __be16 pac_call_id; /* call id of PAC, host byte order */ + __be16 pns_call_id; /* call id of PNS, host byte order */ /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack * and therefore imposes a fixed limit on the number of maps */ @@ -42,8 +42,8 @@ struct ip_ct_pptp_master { /* conntrack_expect private member */ struct ip_ct_pptp_expect { enum pptp_ctrlcall_state cstate; /* call state */ - u_int16_t pac_call_id; /* call id of PAC */ - u_int16_t pns_call_id; /* call id of PNS */ + __be16 pac_call_id; /* call id of PAC */ + __be16 pns_call_id; /* call id of PNS */ }; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h index 8d090ef82f5f..1d853aa873eb 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -49,18 +49,18 @@ struct gre_hdr { #else #error "Adjust your defines" #endif - __u16 protocol; + __be16 protocol; }; /* modified GRE header for PPTP */ struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __u16 payload_len; /* size of ppp payload, not inc. gre header */ - __u16 call_id; /* peer's call_id for this session */ - __u32 seq; /* sequence number. Present if S==1 */ - __u32 ack; /* seq number of highest packet recieved by */ + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __be16 payload_len; /* size of ppp payload, not inc. gre header */ + __be16 call_id; /* peer's call_id for this session */ + __be32 seq; /* sequence number. Present if S==1 */ + __be32 ack; /* seq number of highest packet recieved by */ /* sender in this session */ }; @@ -92,13 +92,13 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); /* get pointer to gre key, if present */ -static inline u_int32_t *gre_key(struct gre_hdr *greh) +static inline __be32 *gre_key(struct gre_hdr *greh) { if (!greh->key) return NULL; if (greh->csum || greh->routing) - return (u_int32_t *) (greh+sizeof(*greh)+4); - return (u_int32_t *) (greh+sizeof(*greh)); + return (__be32 *) (greh+sizeof(*greh)+4); + return (__be32 *) (greh+sizeof(*greh)); } /* get pointer ot gre csum, if present */ diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h index eaf66c2e8f93..36668bf0f373 100644 --- a/include/linux/netfilter_ipv4/ip_nat_pptp.h +++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -4,8 +4,8 @@ /* conntrack private data */ struct ip_nat_pptp { - u_int16_t pns_call_id; /* NAT'ed PNS call id */ - u_int16_t pac_call_id; /* NAT'ed PAC call id */ + __be16 pns_call_id; /* NAT'ed PNS call id */ + __be16 pac_call_id; /* NAT'ed PAC call id */ }; #endif /* _NAT_PPTP_H */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 6c94dd5d476c..57637ca2b82c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -201,8 +201,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout original pns->pac ct/exp\n"); @@ -210,8 +210,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout reply pac->pns ct/exp\n"); @@ -419,9 +419,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, cid = &pptpReq->ocack.callID; pcid = &pptpReq->ocack.peersCallID; - info->pac_call_id = ntohs(*cid); + info->pac_call_id = *cid; - if (htons(info->pns_call_id) != *pcid) { + if (info->pns_call_id != *pcid) { DEBUGP("%s for unknown callid %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -454,7 +454,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->icack.peersCallID; DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = ntohs(*pcid); + info->pac_call_id = *pcid; break; case PPTP_IN_CALL_CONNECT: @@ -478,7 +478,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, pcid = &pptpReq->iccon.peersCallID; cid = &info->pac_call_id; - if (info->pns_call_id != ntohs(*pcid)) { + if (info->pns_call_id != *pcid) { DEBUGP("%s for unknown CallID %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -595,7 +595,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* track PNS call id */ cid = &pptpReq->ocreq.callID; DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = ntohs(*cid); + info->pns_call_id = *cid; break; case PPTP_IN_CALL_REPLY: if (reqlen < sizeof(_pptpReq.icack)) { @@ -615,7 +615,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; } pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != ntohs(*pcid)) { + if (info->pac_call_id != *pcid) { DEBUGP("%s for unknown call %u\n", pptp_msg_name[msg], ntohs(*pcid)); break; @@ -623,7 +623,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); /* part two of the three-way handshake */ info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = ntohs(pptpReq->icack.callID); + info->pns_call_id = pptpReq->icack.callID; break; case PPTP_CALL_CLEAR_REQUEST: diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 5dde1da1c300..6e8bd6b3431f 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = - htons(master->nat.help.nat_pptp_info.pns_call_id); + t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = - htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; t.dst.protonum = IPPROTO_GRE; } @@ -149,10 +147,11 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_callid; + u_int16_t msg; + __be16 new_callid; unsigned int cid_off; - new_callid = htons(ct_pptp_info->pns_call_id); + new_callid = ct_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REQUEST: @@ -170,7 +169,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); + ct_pptp_info->pns_call_id = new_callid; break; case PPTP_IN_CALL_REPLY: cid_off = offsetof(union pptp_ctrl_union, icreq.callID); @@ -235,14 +234,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, /* alter expectation for PNS->PAC direction */ invert_tuplepr(&inv_t, &expect_orig->tuple); - expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); - expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; inv_t.src.ip = reply_t->src.ip; inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.u.gre.key = nat_pptp_info->pac_call_id; + inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id; if (!ip_conntrack_expect_related(expect_orig)) { DEBUGP("successfully registered expect\n"); @@ -253,14 +252,14 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, /* alter expectation for PAC->PNS direction */ invert_tuplepr(&inv_t, &expect_reply->tuple); - expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); - expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; inv_t.src.ip = orig_t->src.ip; inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.u.gre.key = nat_pptp_info->pns_call_id; + inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id; if (!ip_conntrack_expect_related(expect_reply)) { DEBUGP("successfully registered expect\n"); @@ -297,10 +296,11 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0, new_pcid; + u_int16_t msg, new_cid = 0; + __be16 new_pcid; unsigned int pcid_off, cid_off = 0; - new_pcid = htons(nat_pptp_info->pns_call_id); + new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index a5226691f02c..bf91f9312b3c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -67,7 +67,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t key; - u_int16_t *keyptr; + __be16 *keyptr; unsigned int min, i, range_size; if (maniptype == IP_NAT_MANIP_SRC) From a1ad1deed5bf6fa06f2213b7f1a794de4cf791a6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:23 -0700 Subject: [PATCH 0716/1063] [NETFILTER]: PPTP conntrack: remove dead code The call ID in reply packets is never changed, remove the code. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_nat_helper_pptp.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 6e8bd6b3431f..0f5e753b481d 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -296,16 +296,15 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0; + u_int16_t msg; __be16 new_pcid; - unsigned int pcid_off, cid_off = 0; + unsigned int pcid_off; new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); - cid_off = offsetof(union pptp_ctrl_union, ocack.callID); break; case PPTP_IN_CALL_CONNECT: pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); @@ -351,17 +350,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, sizeof(new_pcid), (char *)&new_pcid, sizeof(new_pcid)) == 0) return NF_DROP; - - if (new_cid) { - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid)); - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_cid), (char *)&new_cid, - sizeof(new_cid)) == 0) - return NF_DROP; - } return NF_ACCEPT; } From 5256f663a0228af9bf69ba74ad9f0928f35713f7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:41 -0700 Subject: [PATCH 0717/1063] [NETFILTER]: PPTP conntrack: remove more dead code The calculated sequence numbers are not used for anything. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 57637ca2b82c..0510ee50dc65 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -220,7 +220,6 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int exp_gre(struct ip_conntrack *master, - u_int32_t seq, __be16 callid, __be16 peer_callid) { @@ -336,7 +335,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 *cid, *pcid; - u_int32_t seq; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) { @@ -432,12 +430,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_OUT_CONF; - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); + exp_gre(ct, *cid, *pcid); break; case PPTP_IN_CALL_REQUEST: @@ -488,13 +481,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); - + exp_gre(ct, *cid, *pcid); break; case PPTP_CALL_DISCONNECT_NOTIFY: From 6013c0a13e335674a783215e182c367406294392 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:08:56 -0700 Subject: [PATCH 0718/1063] [NETFILTER]: PPTP conntrack: fix header definitions Fix a few header definitions to match RFC2637. Most importantly the PptpOutCallRequest header included an invalid padding field and a size check was disabled because of this. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_pptp.h | 9 +++++---- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 0d35623f9453..620bf06fabc2 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -107,8 +107,7 @@ struct PptpControlHeader { struct PptpStartSessionRequest { __be16 protocolVersion; - __u8 reserved1; - __u8 reserved2; + __u16 reserved1; __be32 framingCapability; __be32 bearerCapability; __be16 maxChannels; @@ -143,6 +142,8 @@ struct PptpStartSessionReply { struct PptpStopSessionRequest { __u8 reason; + __u8 reserved1; + __u16 reserved2; }; /* PptpStopSessionResultCode */ @@ -152,6 +153,7 @@ struct PptpStopSessionRequest { struct PptpStopSessionReply { __u8 resultCode; __u8 generalErrorCode; + __u16 reserved1; }; struct PptpEchoRequest { @@ -188,9 +190,8 @@ struct PptpOutCallRequest { __be32 framingType; __be16 packetWindow; __be16 packetProcDelay; - __u16 reserved1; __be16 phoneNumberLength; - __u16 reserved2; + __u16 reserved1; __u8 phoneNumber[64]; __u8 subAddress[64]; }; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 0510ee50dc65..1a8da9015d8c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -569,7 +569,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, case PPTP_OUT_CALL_REQUEST: if (reqlen < sizeof(_pptpReq.ocreq)) { DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - /* FIXME: break; */ + break; } /* client initiating connection to server */ From 857c06da2ba2e00b81677c2f6740048d87da0207 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:09:19 -0700 Subject: [PATCH 0719/1063] [NETFILTER]: PPTP conntrack: remove unnecessary cid/pcid header pointers Just the values are needed, not the memory locations. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 1a8da9015d8c..5f7af6ef3881 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -334,7 +334,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; + __be16 cid, pcid; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) { @@ -414,23 +414,23 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; } - cid = &pptpReq->ocack.callID; - pcid = &pptpReq->ocack.peersCallID; + cid = pptpReq->ocack.callID; + pcid = pptpReq->ocack.peersCallID; - info->pac_call_id = *cid; + info->pac_call_id = cid; - if (info->pns_call_id != *pcid) { + if (info->pns_call_id != pcid) { DEBUGP("%s for unknown callid %u\n", - pptp_msg_name[msg], ntohs(*pcid)); + pptp_msg_name[msg], ntohs(pcid)); break; } DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(*cid), ntohs(*pcid)); + ntohs(cid), ntohs(pcid)); info->cstate = PPTP_CALL_OUT_CONF; - exp_gre(ct, *cid, *pcid); + exp_gre(ct, cid, pcid); break; case PPTP_IN_CALL_REQUEST: @@ -444,10 +444,10 @@ pptp_inbound_pkt(struct sk_buff **pskb, DEBUGP("%s but no session\n", pptp_msg_name[msg]); break; } - pcid = &pptpReq->icack.peersCallID; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + pcid = pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = *pcid; + info->pac_call_id = pcid; break; case PPTP_IN_CALL_CONNECT: @@ -468,20 +468,20 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; } - pcid = &pptpReq->iccon.peersCallID; - cid = &info->pac_call_id; + pcid = pptpReq->iccon.peersCallID; + cid = info->pac_call_id; - if (info->pns_call_id != *pcid) { + if (info->pns_call_id != pcid) { DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*pcid)); + pptp_msg_name[msg], ntohs(pcid)); break; } - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ - exp_gre(ct, *cid, *pcid); + exp_gre(ct, cid, pcid); break; case PPTP_CALL_DISCONNECT_NOTIFY: @@ -491,8 +491,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, } /* server confirms disconnect */ - cid = &pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + cid = pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -534,7 +534,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; + __be16 cid, pcid; ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) @@ -580,9 +580,9 @@ pptp_outbound_pkt(struct sk_buff **pskb, } info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ - cid = &pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = *cid; + cid = pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->pns_call_id = cid; break; case PPTP_IN_CALL_REPLY: if (reqlen < sizeof(_pptpReq.icack)) { @@ -601,16 +601,16 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_NONE; break; } - pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != *pcid) { + pcid = pptpReq->icack.peersCallID; + if (info->pac_call_id != pcid) { DEBUGP("%s for unknown call %u\n", - pptp_msg_name[msg], ntohs(*pcid)); + pptp_msg_name[msg], ntohs(pcid)); break; } - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid)); /* part two of the three-way handshake */ info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = pptpReq->icack.callID; + info->pns_call_id = pcid; break; case PPTP_CALL_CLEAR_REQUEST: From cf9f81523ef3e95d9f222c896d266e4562999150 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:09:34 -0700 Subject: [PATCH 0720/1063] [NETFILTER]: PPTP conntrack: simplify expectation handling Remove duplicated expectation handling in the NAT helper and simplify the remains in the conntrack helper. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- .../linux/netfilter_ipv4/ip_conntrack_pptp.h | 2 +- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 90 +++++++------------ net/ipv4/netfilter/ip_nat_helper_pptp.c | 58 +----------- 3 files changed, 34 insertions(+), 116 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 620bf06fabc2..2644b1faddd6 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -315,7 +315,7 @@ extern int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -extern int +extern void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, struct ip_conntrack_expect *exp_reply); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 5f7af6ef3881..57eac6e3871a 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -80,7 +80,7 @@ int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -int +void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply); @@ -219,93 +219,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int -exp_gre(struct ip_conntrack *master, +exp_gre(struct ip_conntrack *ct, __be16 callid, __be16 peer_callid) { - struct ip_conntrack_tuple inv_tuple; - struct ip_conntrack_tuple exp_tuples[] = { - /* tuple in original direction, PNS->PAC */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, - .u = { .gre = { .key = peer_callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, - .u = { .gre = { .key = callid } }, - .protonum = IPPROTO_GRE - }, - }, - /* tuple in reply direction, PAC->PNS */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - .u = { .gre = { .key = callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - .u = { .gre = { .key = peer_callid } }, - .protonum = IPPROTO_GRE - }, - } - }; struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; - exp_orig = ip_conntrack_expect_alloc(master); + exp_orig = ip_conntrack_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = ip_conntrack_expect_alloc(master); + exp_reply = ip_conntrack_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; - memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + /* original direction, PNS->PAC */ + exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp_orig->tuple.src.u.gre.key = peer_callid; + exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp_orig->tuple.dst.u.gre.key = callid; + exp_orig->tuple.dst.protonum = IPPROTO_GRE; exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; - exp_orig->mask.dst.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; - exp_orig->master = master; + exp_orig->master = ct; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); + + /* reply direction, PAC->PNS */ + exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp_reply->tuple.src.u.gre.key = callid; + exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp_reply->tuple.dst.u.gre.key = peer_callid; + exp_reply->tuple.dst.protonum = IPPROTO_GRE; if (ip_nat_pptp_hook_exp_gre) - ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - else { + ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + if (ip_conntrack_expect_related(exp_orig) != 0) + goto out_put_both; + if (ip_conntrack_expect_related(exp_reply) != 0) + goto out_unexpect_orig; - DEBUGP("calling expect_related PNS->PAC"); - DUMP_TUPLE(&exp_orig->tuple); - - if (ip_conntrack_expect_related(exp_orig) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_put_both; - } - - DEBUGP("calling expect_related PAC->PNS"); - DUMP_TUPLE(&exp_reply->tuple); - - if (ip_conntrack_expect_related(exp_reply) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_unexpect_orig; - } - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { - DEBUGP("cannot keymap_add() exp\n"); - goto out_unexpect_both; - } - - invert_tuplepr(&inv_tuple, &exp_reply->tuple); - if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(master); - DEBUGP("cannot keymap_add() exp_inv\n"); - goto out_unexpect_both; - } - ret = 0; + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) + goto out_unexpect_both; + if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(ct); + goto out_unexpect_both; } + ret = 0; out_put_both: ip_conntrack_expect_put(exp_reply); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 0f5e753b481d..84f6bd09fcd4 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -211,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } -static int +static void pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = - &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = - &expect_orig->master->nat.help.nat_pptp_info; - struct ip_conntrack *ct = expect_orig->master; - - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_tuple *orig_t, *reply_t; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - /* alter expectation */ - orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - /* alter expectation for PNS->PAC direction */ - invert_tuplepr(&inv_t, &expect_orig->tuple); expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; - inv_t.src.ip = reply_t->src.ip; - inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = nat_pptp_info->pac_call_id; - inv_t.dst.u.gre.key = ct_pptp_info->pns_call_id; - - if (!ip_conntrack_expect_related(expect_orig)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_orig)\n"); - return 1; - } /* alter expectation for PAC->PNS direction */ - invert_tuplepr(&inv_t, &expect_reply->tuple); expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; - inv_t.src.ip = orig_t->src.ip; - inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = nat_pptp_info->pns_call_id; - inv_t.dst.u.gre.key = ct_pptp_info->pac_call_id; - - if (!ip_conntrack_expect_related(expect_reply)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_reply)\n"); - ip_conntrack_unexpect_related(expect_orig); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { - DEBUGP("can't register original keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { - DEBUGP("can't register reply keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - ip_ct_gre_keymap_destroy(ct); - return 1; - } - - return 0; } /* inbound packets == from PAC to PNS */ From a1073406a124c1d3b33a0f06bfb8078a9ddd1985 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:09:51 -0700 Subject: [PATCH 0721/1063] [NETFILTER]: PPTP conntrack: consolidate header size checks Also make sure not to pass undersized messages to the NAT helper. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 65 +++++++------------ 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 57eac6e3871a..3b5464fa4217 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -291,6 +291,22 @@ out_unexpect_orig: goto out_put_both; } +static const unsigned int pptp_msg_size[] = { + [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), + [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), + [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), + [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), + [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), + [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), + [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), + [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), + [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), + [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), + [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), + [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), + [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), +}; + static inline int pptp_inbound_pkt(struct sk_buff **pskb, struct tcphdr *tcph, @@ -326,13 +342,11 @@ pptp_inbound_pkt(struct sk_buff **pskb, msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + switch (msg) { case PPTP_START_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.srep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms new control session */ if (info->sstate < PPTP_SESSION_REQUESTED) { DEBUGP("%s without START_SESS_REQUEST\n", @@ -346,11 +360,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_STOP_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.strep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms end of control session */ if (info->sstate > PPTP_SESSION_STOPREQ) { DEBUGP("%s without STOP_SESS_REQUEST\n", @@ -364,11 +373,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.ocack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server accepted call, we now expect GRE frames */ if (info->sstate != PPTP_SESSION_CONFIRMED) { DEBUGP("%s but no session\n", pptp_msg_name[msg]); @@ -404,11 +408,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_IN_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call request */ if (info->sstate != PPTP_SESSION_CONFIRMED) { DEBUGP("%s but no session\n", pptp_msg_name[msg]); @@ -421,11 +420,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_IN_CALL_CONNECT: - if (reqlen < sizeof(_pptpReq.iccon)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call established */ if (info->sstate != PPTP_SESSION_CONFIRMED) { DEBUGP("%s but no session\n", pptp_msg_name[msg]); @@ -455,11 +449,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_CALL_DISCONNECT_NOTIFY: - if (reqlen < sizeof(_pptpReq.disc)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms disconnect */ cid = pptpReq->disc.callID; DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); @@ -470,8 +459,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_WAN_ERROR_NOTIFY: - break; - case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ @@ -522,6 +509,9 @@ pptp_outbound_pkt(struct sk_buff **pskb, msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ @@ -537,11 +527,6 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.ocreq)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* client initiating connection to server */ if (info->sstate != PPTP_SESSION_CONFIRMED) { DEBUGP("%s but no session\n", @@ -555,11 +540,6 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->pns_call_id = cid; break; case PPTP_IN_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* client answers incoming call */ if (info->cstate != PPTP_CALL_IN_REQ && info->cstate != PPTP_CALL_IN_REP) { @@ -595,7 +575,6 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate = PPTP_CALL_CLEAR_REQ; break; case PPTP_SET_LINK_INFO: - break; case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ From 4c651756d502e72a68b0bc6fb20bb18c68785227 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:10:06 -0700 Subject: [PATCH 0722/1063] [NETFILTER]: PPTP conntrack: consolidate header parsing Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 119 +++++++----------- 1 file changed, 47 insertions(+), 72 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 3b5464fa4217..9a98a6ce1901 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -291,60 +291,21 @@ out_unexpect_orig: goto out_put_both; } -static const unsigned int pptp_msg_size[] = { - [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), - [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), - [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), - [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), - [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), - [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), - [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), - [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), - [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), - [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), - [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), - [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), - [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), -}; - static inline int pptp_inbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 cid, pcid; - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); - if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) - return NF_ACCEPT; - switch (msg) { case PPTP_START_SESSION_REPLY: /* server confirms new control session */ @@ -480,38 +441,19 @@ pptp_inbound_pkt(struct sk_buff **pskb, static inline int pptp_outbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; __be16 cid, pcid; - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) - return NF_ACCEPT; - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) - return NF_ACCEPT; - msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); - if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) - return NF_ACCEPT; - switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ @@ -593,6 +535,21 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } +static const unsigned int pptp_msg_size[] = { + [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), + [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), + [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), + [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), + [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), + [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), + [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), + [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), + [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), + [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), + [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), + [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), + [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), +}; /* track caller id inside control connection, call expect_related */ static int @@ -600,16 +557,17 @@ conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct pptp_pkt_hdr _pptph, *pptph; - struct tcphdr _tcph, *tcph; - u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; - u_int32_t datalen; int dir = CTINFO2DIR(ctinfo); struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - unsigned int nexthdr_off; - + struct tcphdr _tcph, *tcph; + struct pptp_pkt_hdr _pptph, *pptph; + struct PptpControlHeader _ctlh, *ctlh; + union pptp_ctrl_union _pptpReq, *pptpReq; + unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; + u_int16_t msg; /* don't do any tracking before tcp handshake complete */ if (ctinfo != IP_CT_ESTABLISHED @@ -648,6 +606,23 @@ conntrack_pptp_help(struct sk_buff **pskb, return NF_ACCEPT; } + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + msg = ntohs(ctlh->messageType); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + oldsstate = info->sstate; oldcstate = info->cstate; @@ -657,11 +632,11 @@ conntrack_pptp_help(struct sk_buff **pskb, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); DEBUGP("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); From 87a0117afdfe64473a6c802501bc15aee145ebb8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:10:21 -0700 Subject: [PATCH 0723/1063] [NETFILTER]: PPTP conntrack: clean up debugging cruft Also make sure not to hand packets received in an invalid state to the NAT helper since it will mangle the packet with invalid data. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 128 +++++++----------- 1 file changed, 51 insertions(+), 77 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 9a98a6ce1901..7b6d5aaca4da 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -301,7 +301,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 cid, pcid; + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); @@ -309,11 +309,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, switch (msg) { case PPTP_START_SESSION_REPLY: /* server confirms new control session */ - if (info->sstate < PPTP_SESSION_REQUESTED) { - DEBUGP("%s without START_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate < PPTP_SESSION_REQUESTED) + goto invalid; if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; else @@ -322,11 +319,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_STOP_SESSION_REPLY: /* server confirms end of control session */ - if (info->sstate > PPTP_SESSION_STOPREQ) { - DEBUGP("%s without STOP_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate > PPTP_SESSION_STOPREQ) + goto invalid; if (pptpReq->strep.resultCode == PPTP_STOP_OK) info->sstate = PPTP_SESSION_NONE; else @@ -335,15 +329,12 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_OUT_CALL_REPLY: /* server accepted call, we now expect GRE frames */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; if (info->cstate != PPTP_CALL_OUT_REQ && - info->cstate != PPTP_CALL_OUT_CONF) { - DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); - break; - } + info->cstate != PPTP_CALL_OUT_CONF) + goto invalid; + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { info->cstate = PPTP_CALL_NONE; break; @@ -354,11 +345,8 @@ pptp_inbound_pkt(struct sk_buff **pskb, info->pac_call_id = cid; - if (info->pns_call_id != pcid) { - DEBUGP("%s for unknown callid %u\n", - pptp_msg_name[msg], ntohs(pcid)); - break; - } + if (info->pns_call_id != pcid) + goto invalid; DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], ntohs(cid), ntohs(pcid)); @@ -370,10 +358,9 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_IN_CALL_REQUEST: /* server tells us about incoming call request */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + pcid = pptpReq->icack.peersCallID; DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_REQ; @@ -382,25 +369,17 @@ pptp_inbound_pkt(struct sk_buff **pskb, case PPTP_IN_CALL_CONNECT: /* server tells us about incoming call established */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - if (info->cstate != PPTP_CALL_IN_REP - && info->cstate != PPTP_CALL_IN_CONF) { - DEBUGP("%s but never sent IN_CALL_REPLY\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + if (info->cstate != PPTP_CALL_IN_REP && + info->cstate != PPTP_CALL_IN_CONF) + goto invalid; pcid = pptpReq->iccon.peersCallID; cid = info->pac_call_id; - if (info->pns_call_id != pcid) { - DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(pcid)); - break; - } + if (info->pns_call_id != pcid) + goto invalid; DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; @@ -425,18 +404,21 @@ pptp_inbound_pkt(struct sk_buff **pskb, /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) - ? pptp_msg_name[msg]:pptp_msg_name[0], msg); - break; + goto invalid; } - if (ip_nat_pptp_hook_inbound) return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, pptpReq); - return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + return NF_ACCEPT; } static inline int @@ -449,7 +431,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 cid, pcid; + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); @@ -457,10 +439,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ - if (info->sstate != PPTP_SESSION_NONE) { - DEBUGP("%s but we already have one", - pptp_msg_name[msg]); - } + if (info->sstate != PPTP_SESSION_NONE) + goto invalid; info->sstate = PPTP_SESSION_REQUESTED; break; case PPTP_STOP_SESSION_REQUEST: @@ -470,11 +450,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, case PPTP_OUT_CALL_REQUEST: /* client initiating connection to server */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ cid = pptpReq->ocreq.callID; @@ -483,22 +460,17 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; case PPTP_IN_CALL_REPLY: /* client answers incoming call */ - if (info->cstate != PPTP_CALL_IN_REQ - && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", - pptp_msg_name[msg]); - break; - } + if (info->cstate != PPTP_CALL_IN_REQ && + info->cstate != PPTP_CALL_IN_REP) + goto invalid; + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { info->cstate = PPTP_CALL_NONE; break; } pcid = pptpReq->icack.peersCallID; - if (info->pac_call_id != pcid) { - DEBUGP("%s for unknown call %u\n", - pptp_msg_name[msg], ntohs(pcid)); - break; - } + if (info->pac_call_id != pcid) + goto invalid; DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid)); /* part two of the three-way handshake */ info->cstate = PPTP_CALL_IN_REP; @@ -507,10 +479,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, case PPTP_CALL_CLEAR_REQUEST: /* client requests hangup of call */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("CLEAR_CALL but no session\n"); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; /* FUTURE: iterate over all calls and check if * call ID is valid. We don't do this without newnat, * because we only know about last call */ @@ -522,16 +492,20 @@ pptp_outbound_pkt(struct sk_buff **pskb, /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0], msg); - /* unknown: no need to create GRE masq table entry */ - break; + goto invalid; } if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } From 750a58423309b56751076329e9edf61b93213e0f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:10:37 -0700 Subject: [PATCH 0724/1063] [NETFILTER]: PPTP conntrack: check call ID before changing state For rejected calls the state is set to PPTP_CALL_NONE even for non-matching call ids. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 7b6d5aaca4da..5cb6b61cd171 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -335,25 +335,19 @@ pptp_inbound_pkt(struct sk_buff **pskb, info->cstate != PPTP_CALL_OUT_CONF) goto invalid; - if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { - info->cstate = PPTP_CALL_NONE; - break; - } - cid = pptpReq->ocack.callID; pcid = pptpReq->ocack.peersCallID; - - info->pac_call_id = cid; - if (info->pns_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], ntohs(cid), ntohs(pcid)); - info->cstate = PPTP_CALL_OUT_CONF; - - exp_gre(ct, cid, pcid); + if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_OUT_CONF; + info->pac_call_id = cid; + exp_gre(ct, cid, pcid); + } else + info->cstate = PPTP_CALL_NONE; break; case PPTP_IN_CALL_REQUEST: @@ -464,17 +458,17 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate != PPTP_CALL_IN_REP) goto invalid; - if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { - info->cstate = PPTP_CALL_NONE; - break; - } pcid = pptpReq->icack.peersCallID; if (info->pac_call_id != pcid) goto invalid; DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid)); - /* part two of the three-way handshake */ - info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = pcid; + + if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = pcid; + } else + info->cstate = PPTP_CALL_NONE; break; case PPTP_CALL_CLEAR_REQUEST: From 62fbe9c82b20197a4f9c54f7add5d368418ba277 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:10:52 -0700 Subject: [PATCH 0725/1063] [NETFILTER]: PPTP conntrack: fix PPTP_IN_CALL message types Fix incorrectly used message types and call IDs: - PPTP_IN_CALL_REQUEST (PAC->PNS) contains a PptpInCallRequest (icreq) message and the PAC call ID - PPTP_IN_CALL_REPLY (PNS->PAC) contains a PptpInCallReply (icack) message and the PNS call ID Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 12 +++++++----- net/ipv4/netfilter/ip_nat_helper_pptp.c | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 5cb6b61cd171..b0225b65ca35 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -355,10 +355,10 @@ pptp_inbound_pkt(struct sk_buff **pskb, if (info->sstate != PPTP_SESSION_CONFIRMED) goto invalid; - pcid = pptpReq->icack.peersCallID; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + cid = pptpReq->icreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = pcid; + info->pac_call_id = cid; break; case PPTP_IN_CALL_CONNECT: @@ -458,15 +458,17 @@ pptp_outbound_pkt(struct sk_buff **pskb, info->cstate != PPTP_CALL_IN_REP) goto invalid; + cid = pptpReq->icack.callID; pcid = pptpReq->icack.peersCallID; if (info->pac_call_id != pcid) goto invalid; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { /* part two of the three-way handshake */ info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = pcid; + info->pns_call_id = cid; } else info->cstate = PPTP_CALL_NONE; break; diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 84f6bd09fcd4..2ff578807123 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -172,7 +172,7 @@ pptp_outbound_pkt(struct sk_buff **pskb, ct_pptp_info->pns_call_id = new_callid; break; case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); + cid_off = offsetof(union pptp_ctrl_union, icack.callID); break; case PPTP_CALL_CLEAR_REQUEST: cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); From fd5e3befa405ea64d4db6b393b821644bf963c57 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:11:12 -0700 Subject: [PATCH 0726/1063] [NETFILTER]: PPTP conntrack: fix GRE keymap leak When destroying the GRE expectations without having seen the GRE connection the keymap entry is not freed, leading to a memory leak and, in case of a following call within the same session, failure during expectation setup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index b0225b65ca35..98267b0d2a47 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -194,6 +194,7 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; + ip_ct_gre_keymap_destroy(ct); /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ From 4c5de695cf7f71c85ad8cfff509f6475b8bd4d27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 12:11:30 -0700 Subject: [PATCH 0727/1063] [NETFILTER]: PPTP conntrack: fix another GRE keymap leak When the master PPTP connection times out while still having unfullfilled expectations (and a GRE keymap entry) associated with it, the keymap entry is not destroyed. Add a destroy callback to struct ip_conntrack_helper and use it to destroy PPTP siblings when the master is destroyed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_helper.h | 2 ++ net/ipv4/netfilter/ip_conntrack_core.c | 5 +++++ net/ipv4/netfilter/ip_conntrack_helper_pptp.c | 12 ++---------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 8d69279ccfe4..77fe868d36ff 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -25,6 +25,8 @@ struct ip_conntrack_helper struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + void (*destroy)(struct ip_conntrack *ct); + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 2b6f24fc727e..c432b3163609 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -307,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; + struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -315,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + helper = ct->helper; + if (helper && helper->destroy) + helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 98267b0d2a47..fb0aee691721 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -553,15 +553,6 @@ conntrack_pptp_help(struct sk_buff **pskb, nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - if (tcph->fin || tcph->rst) { - DEBUGP("RST/FIN received, timeouting GRE\n"); - /* can't do this after real newnat */ - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - } - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { DEBUGP("no full PPTP header, can't track\n"); @@ -640,7 +631,8 @@ static struct ip_conntrack_helper pptp = { .protonum = 0xff } }, - .help = conntrack_pptp_help + .help = conntrack_pptp_help, + .destroy = pptp_destroy_siblings, }; extern void ip_ct_proto_gre_fini(void); From e21e0b5f19ac7835a244c2016f7ed726f971b3e9 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Fri, 22 Sep 2006 14:41:44 -0700 Subject: [PATCH 0728/1063] [IPV6] NDISC: Handle NDP messages to proxied addresses. It is required to respond to NDP messages sent directly to the "target" unicast address. Proxying node (router) is required to handle such messages. To achieve this, check if the packet in forwarding patch is NDP message. With this patch, the proxy neighbor entries are always looked up in forwarding path. We may want to optimize further. Based on MIPL2 kernel patch. Signed-off-by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ip6_output.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c14ea1ecf379..0f56e9e69a8f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -308,6 +308,46 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) return 0; } +static int ip6_forward_proxy_check(struct sk_buff *skb) +{ + struct ipv6hdr *hdr = skb->nh.ipv6h; + u8 nexthdr = hdr->nexthdr; + int offset; + + if (ipv6_ext_hdr(nexthdr)) { + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); + if (offset < 0) + return 0; + } else + offset = sizeof(struct ipv6hdr); + + if (nexthdr == IPPROTO_ICMPV6) { + struct icmp6hdr *icmp6; + + if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) + return 0; + + icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset); + + switch (icmp6->icmp6_type) { + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + /* For reaction involving unicast neighbor discovery + * message destined to the proxied address, pass it to + * input function. + */ + return 1; + default: + break; + } + } + + return 0; +} + static inline int ip6_forward_finish(struct sk_buff *skb) { return dst_output(skb); @@ -362,6 +402,11 @@ int ip6_forward(struct sk_buff *skb) return -ETIMEDOUT; } + if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + if (ip6_forward_proxy_check(skb)) + return ip6_input(skb); + } + if (!xfrm6_route_forward(skb)) { IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); goto drop; From 74553b09dcd9194cbda737016f0b89f245145670 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Fri, 22 Sep 2006 14:42:18 -0700 Subject: [PATCH 0729/1063] [IPV6]: Don't forward packets to proxied link-local address. Proxying router can't forward traffic sent to link-local address, so signal the sender and discard the packet. This behavior is clarified by Mobile IPv6 specification (RFC3775) but might be required for all proxying router. Based on MIPL2 kernel patch. Signed-off-by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ip6_output.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0f56e9e69a8f..b2be749d2217 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -345,6 +345,16 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) } } + /* + * The proxying router can't forward traffic sent to a link-local + * address, so signal the sender and discard the packet. This + * behavior is clarified by the MIPv6 specification. + */ + if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { + dst_link_failure(skb); + return -1; + } + return 0; } @@ -403,8 +413,13 @@ int ip6_forward(struct sk_buff *skb) } if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { - if (ip6_forward_proxy_check(skb)) + int proxied = ip6_forward_proxy_check(skb); + if (proxied > 0) return ip6_input(skb); + else if (proxied < 0) { + IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); + goto drop; + } } if (!xfrm6_route_forward(skb)) { From 5f3e6e9e19f50a6910aec2dbd479187aabba04b7 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Fri, 22 Sep 2006 14:42:46 -0700 Subject: [PATCH 0730/1063] [IPV6] NDISC: Avoid updating neighbor cache for proxied address in receiving NA. This aims at proxying router not updating neighbor cache entry for proxied address when it receives NA because either the proxied node is off link or it has already sent a NA to the proxied router. Based on MIPL2 kernel patch. Signed-off-by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ndisc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ed01f9a330d6..0e0d6ce69021 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -952,6 +952,15 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh->nud_state & NUD_FAILED) goto out; + /* + * Don't update the neighbor cache entry on a proxy NA from + * ourselves because either the proxied node is off link or it + * has already sent a NA to us. + */ + if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && + pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) + goto out; + neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| From 62dd93181aaa1d5a501a9cebcb254f44b8a48af7 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Fri, 22 Sep 2006 14:43:19 -0700 Subject: [PATCH 0731/1063] [IPV6] NDISC: Set per-entry is_router flag in Proxy NA. We have sent NA with router flag from the node-wide forwarding configuration. This is not appropriate for proxy NA, and it should be set according to each proxy entry's configuration. This is used by Mobile IPv6 home agent to support physical home link in acting as a proxy router for mobile node which is not a router, for example. Based on MIPL2 kernel patch. Signed-off-by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/net/neighbour.h | 1 + net/core/neighbour.c | 11 ++++++++--- net/ipv6/ndisc.c | 14 +++++++++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index bd187daffdb9..c8aacbd2e333 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -126,6 +126,7 @@ struct pneigh_entry { struct pneigh_entry *next; struct net_device *dev; + u8 flags; u8 key[0]; }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a45bd2124d6b..b6c69e1463e8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1544,9 +1544,14 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm->ndm_flags & NTF_PROXY) { - err = 0; - if (pneigh_lookup(tbl, dst, dev, 1) == NULL) - err = -ENOBUFS; + struct pneigh_entry *pn; + + err = -ENOBUFS; + pn = pneigh_lookup(tbl, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; + err = 0; + } goto out_dev_put; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0e0d6ce69021..ddf038636f01 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -736,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; + struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; + int is_router; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -822,7 +824,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { + (pneigh = pneigh_lookup(&nd_tbl, + &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -843,12 +846,17 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } + if (pneigh) + is_router = pneigh->flags & NTF_ROUTER; + else + is_router = idev->cnf.forwarding; + if (dad) { struct in6_addr maddr; ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &msg->target, - idev->cnf.forwarding, 0, (ifp != NULL), 1); + is_router, 0, (ifp != NULL), 1); goto out; } @@ -869,7 +877,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->hard_header) { ndisc_send_na(dev, neigh, saddr, &msg->target, - idev->cnf.forwarding, + is_router, 1, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); From fbea49e1e2404baa2d88ab47e2db89e49551b53b Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:43:49 -0700 Subject: [PATCH 0732/1063] [IPV6] NDISC: Add proxy_ndp sysctl. We do not always need proxy NDP functionality even we enable forwarding. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 3 +++ include/linux/ipv6.h | 2 ++ include/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 11 +++++++++++ net/ipv6/ip6_output.c | 4 +++- net/ipv6/ndisc.c | 8 +++++++- 6 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 307cd4ec8edd..935e298f674a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -765,6 +765,9 @@ conf/all/forwarding - BOOLEAN This referred to as global forwarding. +proxy_ndp - BOOLEAN + Do proxy ndp. + conf/interface/*: Change special settings per interface. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1d6d3ccc9413..caca57df0d7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -176,6 +176,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 proxy_ndp; void *sysctl; }; @@ -203,6 +204,7 @@ enum { DEVCONF_ACCEPT_RA_RTR_PREF, DEVCONF_RTR_PROBE_INTERVAL, DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, + DEVCONF_PROXY_NDP, DEVCONF_MAX }; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index af61d9235409..736ed917a4f8 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -556,6 +556,7 @@ enum { NET_IPV6_ACCEPT_RA_RTR_PREF=20, NET_IPV6_RTR_PROBE_INTERVAL=21, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, + NET_IPV6_PROXY_NDP=23, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1e5a296d0a82..825a291d5aa5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -175,6 +175,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -205,6 +206,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .proxy_ndp = 0, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -3337,6 +3339,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; } /* Maximum length of ifinfomsg attributes */ @@ -3859,6 +3862,14 @@ static struct addrconf_sysctl_table }, #endif #endif + { + .ctl_name = NET_IPV6_PROXY_NDP, + .procname = "proxy_ndp", + .data = &ipv6_devconf.proxy_ndp, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0, /* sentinel */ } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b2be749d2217..66716911962e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -412,7 +412,9 @@ int ip6_forward(struct sk_buff *skb) return -ETIMEDOUT; } - if (pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + /* XXX: idev->cnf.proxy_ndp? */ + if (ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ddf038636f01..76517a5f6576 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -824,6 +824,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && + (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && (pneigh = pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && @@ -966,8 +967,13 @@ static void ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) + ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && + pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + /* XXX: idev->cnf.prixy_ndp */ + WARN_ON(skb->dst != NULL && + ((struct rt6_info *)skb->dst)->rt6i_idev); goto out; + } neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, From 8814c4b533817df825485ff32ce6ac406c3a54d1 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:44:24 -0700 Subject: [PATCH 0733/1063] [IPV6] ADDRCONF: Convert addrconf_lock to RCU. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++----- include/net/if_inet6.h | 1 + net/core/pktgen.c | 4 ++-- net/ipv6/addrconf.c | 46 ++++++++++++++++++++++-------------------- net/ipv6/anycast.c | 4 ++-- net/ipv6/ipv6_syms.c | 1 - net/sctp/ipv6.c | 6 +++--- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 5fc8627435eb..aa2ed8f0a9dd 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -133,20 +133,18 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb); static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { - return (struct inet6_dev *)dev->ip6_ptr; + return rcu_dereference(dev->ip6_ptr); } -extern rwlock_t addrconf_lock; - static inline struct inet6_dev * in6_dev_get(struct net_device *dev) { struct inet6_dev *idev = NULL; - read_lock(&addrconf_lock); - idev = dev->ip6_ptr; + rcu_read_lock(); + idev = __in6_dev_get(dev); if (idev) atomic_inc(&idev->refcnt); - read_unlock(&addrconf_lock); + rcu_read_unlock(); return idev; } diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index e459e1a0ae4a..34489c13c119 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -189,6 +189,7 @@ struct inet6_dev struct ipv6_devconf cnf; struct ipv6_devstat stats; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ + struct rcu_head rcu; }; extern struct ipv6_devconf ipv6_devconf; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6a7320b39ed0..72145d4a2600 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) * use ipv6_get_lladdr if/when it's get exported */ - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); if (err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 825a291d5aa5..c09ebb7bb98a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -119,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_RWLOCK(addrconf_hash_lock); -/* Protects inet6 devices */ -DEFINE_RWLOCK(addrconf_lock); - static void addrconf_verify(unsigned long); static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); @@ -318,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, /* Nobody refers to this device, we may destroy it. */ +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + kfree(idev); +} + void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; @@ -332,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) return; } snmp6_free_dev(idev); - kfree(idev); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } static struct inet6_dev * ipv6_add_dev(struct net_device *dev) @@ -408,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = ndev; - write_unlock_bh(&addrconf_lock); + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, ndev); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -474,7 +476,7 @@ static void addrconf_forward_change(void) read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); @@ -482,7 +484,7 @@ static void addrconf_forward_change(void) if (changed) dev_forward_change(idev); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); } read_unlock(&dev_base_lock); } @@ -543,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, int hash; int err = 0; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -612,7 +614,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); write_unlock(&idev->lock); out2: - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); if (likely(err == 0)) atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); @@ -915,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, memset(&hiscore, 0, sizeof(hiscore)); read_lock(&dev_base_lock); - read_lock(&addrconf_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev=dev->next) { struct inet6_dev *idev; @@ -1127,7 +1129,7 @@ record_it: } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); read_unlock(&dev_base_lock); if (!ifa_result) @@ -1151,7 +1153,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) struct inet6_dev *idev; int err = -EADDRNOTAVAIL; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(dev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1165,7 +1167,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return err; } @@ -1466,7 +1468,7 @@ static void ipv6_regen_rndid(unsigned long data) struct inet6_dev *idev = (struct inet6_dev *) data; unsigned long expires; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); write_lock_bh(&idev->lock); if (idev->dead) @@ -1490,7 +1492,7 @@ static void ipv6_regen_rndid(unsigned long data) out: write_unlock_bh(&idev->lock); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); in6_dev_put(idev); } @@ -2342,10 +2344,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) Do not dev_put! */ if (how == 1) { - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = NULL; idev->dead = 1; - write_unlock_bh(&addrconf_lock); + + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -3573,10 +3575,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b80fc502ca03..a9604764e015 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) int onlink; onlink = 0; - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); @@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return onlink; } diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 7b7b90d9c3d0..0e8e0676a033 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -14,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); EXPORT_SYMBOL(inet6_register_protosw); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fd87e3ceb56e..249e5033c1a8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -321,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((in6_dev = __in6_dev_get(dev)) == NULL) { - read_unlock(&addrconf_lock); + rcu_read_unlock(); return; } @@ -342,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_unlock(&in6_dev->lock); - read_unlock(&addrconf_lock); + rcu_read_unlock(); } /* Initialize a sockaddr_storage from in incoming skb. */ From fc26d0abd5afd2b5268a7dbdbf8be1095ce5703e Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:44:53 -0700 Subject: [PATCH 0734/1063] [IPV6] NDISC: Fix is_router flag setting. We did not send appropriate IsRouter flag if the forwarding setting is positive even value. Let's give 1/0 value to ndisc_send_na(). Also, existing users of ndisc_send_na() give 0/1 to override, we can omit redundant operation in that function. Bug hinted by Nicolas Dichtel . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76517a5f6576..0304b5fe8d6a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -496,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, msg->icmph.icmp6_unused = 0; msg->icmph.icmp6_router = router; msg->icmph.icmp6_solicited = solicited; - msg->icmph.icmp6_override = !!override; + msg->icmph.icmp6_override = override; /* Set the target address. */ ipv6_addr_copy(&msg->target, solicited_addr); @@ -847,10 +847,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } - if (pneigh) - is_router = pneigh->flags & NTF_ROUTER; - else - is_router = idev->cnf.forwarding; + is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); if (dad) { struct in6_addr maddr; From 55ebaef1d5db9c1c76ba01a87fd986db5dee550d Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:27 -0700 Subject: [PATCH 0735/1063] [IPV6] ADDRCONF: Allow non-DAD'able addresses. IFA_F_NODAD flag, similar to IN6_IFF_NODAD in BSDs, is introduced to skip DAD. This flag should be set to Mobile IPv6 Home Address(es) on Mobile Node because DAD would fail if we should perform DAD; our Home Agent protects our Home Address(es). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + net/ipv6/addrconf.c | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index e1590454db59..ca24b9de13fb 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -38,6 +38,7 @@ enum #define IFA_F_SECONDARY 0x01 #define IFA_F_TEMPORARY IFA_F_SECONDARY +#define IFA_F_NODAD 0x02 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c09ebb7bb98a..adb583a26151 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1873,12 +1873,11 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u32 prefered_lft, __u32 valid_lft) + __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; - __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); @@ -1971,7 +1970,7 @@ int addrconf_add_ifaddr(void __user *arg) rtnl_lock(); err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2514,7 +2513,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - !(ifp->flags&IFA_F_TENTATIVE)) { + !(ifp->flags&IFA_F_TENTATIVE) || + ifp->flags & IFA_F_NODAD) { ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); read_unlock_bh(&idev->lock); @@ -2912,28 +2912,25 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 prefered_lft, - u32 valid_lft) +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, + u32 prefered_lft, u32 valid_lft) { - int ifa_flags = 0; - if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; if (valid_lft == INFINITY_LIFE_TIME) - ifa_flags = IFA_F_PERMANENT; + ifa_flags |= IFA_F_PERMANENT; else if (valid_lft >= 0x7FFFFFFF/HZ) valid_lft = 0x7FFFFFFF/HZ; if (prefered_lft == 0) - ifa_flags = IFA_F_DEPRECATED; + ifa_flags |= IFA_F_DEPRECATED; else if ((prefered_lft >= 0x7FFFFFFF/HZ) && (prefered_lft != INFINITY_LIFE_TIME)) prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; - + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2955,7 +2952,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct in6_addr *pfx; struct inet6_ifaddr *ifa; struct net_device *dev; - u32 valid_lft, preferred_lft; + u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; + u8 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -2982,6 +2980,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dev == NULL) return -ENODEV; + /* We ignore other flags so far. */ + ifa_flags = ifm->ifa_flags & IFA_F_NODAD; + ifa = ipv6_get_ifaddr(pfx, dev, 1); if (ifa == NULL) { /* @@ -2989,14 +2990,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * userspace alreay relies on not having to provide this. */ return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - preferred_lft, valid_lft); + ifa_flags, preferred_lft, valid_lft); } if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) err = -EEXIST; else - err = inet6_addr_modify(ifa, preferred_lft, valid_lft); + err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); in6_ifa_put(ifa); From 3b9f9a1c3903b64c38505f9fed3bb11e48dbc931 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:56 -0700 Subject: [PATCH 0736/1063] [IPV6] ADDRCONF: Mobile IPv6 Home Address support. IFA_F_HOMEADDRESS is introduced for Mobile IPv6 Home Addresses on Mobile Node. The IFA_F_HOMEADDRESS flag should be set for Mobile IPv6 Home Addresses for 2 purposes. 1) We need to check this on receipt of Type 2 Routing Header (RFC3775 Secion 6.4), 2) We prefer Home Address(es) in source address selection (RFC3484 Section 5 Rule 4). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + include/net/addrconf.h | 6 +----- net/ipv6/addrconf.c | 44 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index ca24b9de13fb..dbe8f6120a40 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -39,6 +39,7 @@ enum #define IFA_F_TEMPORARY IFA_F_SECONDARY #define IFA_F_NODAD 0x02 +#define IFA_F_HOMEADDRESS 0x10 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 diff --git a/include/net/addrconf.h b/include/net/addrconf.h index aa2ed8f0a9dd..44f1b673f916 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,12 +61,8 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -/* XXX: this is a placeholder till addrconf supports */ #ifdef CONFIG_IPV6_MIP6 -static inline int ipv6_chk_home_addr(struct in6_addr *addr) -{ - return 0; -} +extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index adb583a26151..c18676352397 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1038,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - /* Rule 4: Prefer home address -- not implemented yet */ + /* Rule 4: Prefer home address */ +#ifdef CONFIG_IPV6_MIP6 + if (hiscore.rule < 4) { + if (ifa_result->flags & IFA_F_HOMEADDRESS) + hiscore.attrs |= IPV6_SADDR_SCORE_HOA; + hiscore.rule++; + } + if (ifa->flags & IFA_F_HOMEADDRESS) { + score.attrs |= IPV6_SADDR_SCORE_HOA; + if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { + score.rule = 4; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) + continue; + } +#else if (hiscore.rule < 4) hiscore.rule++; +#endif /* Rule 5: Prefer outgoing interface */ if (hiscore.rule < 5) { @@ -2759,6 +2777,26 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_IPV6_MIP6 +/* Check if address is a home address configured on any interface. */ +int ipv6_chk_home_addr(struct in6_addr *addr) +{ + int ret = 0; + struct inet6_ifaddr * ifp; + u8 hash = ipv6_addr_hash(addr); + read_lock_bh(&addrconf_hash_lock); + for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + (ifp->flags & IFA_F_HOMEADDRESS)) { + ret = 1; + break; + } + } + read_unlock_bh(&addrconf_hash_lock); + return ret; +} +#endif + /* * Periodic address status verification */ @@ -2930,7 +2968,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags; + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2981,7 +3019,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & IFA_F_NODAD; + ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); ifa = ipv6_get_ifaddr(pfx, dev, 1); if (ifa == NULL) { From fab97220c9e409a98b1956ba677ddd2dd43b0b95 Mon Sep 17 00:00:00 2001 From: Heiko J Schick Date: Fri, 22 Sep 2006 15:22:22 -0700 Subject: [PATCH 0737/1063] IB/ehca: Add driver for IBM eHCA InfiniBand adapters Add a driver for IBM GX bus InfiniBand adapters, which are usable with some pSeries/System p systems. Signed-off-by: Heiko J Schick Signed-off-by: Roland Dreier --- MAINTAINERS | 8 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/Makefile | 1 + drivers/infiniband/hw/ehca/Kconfig | 16 + drivers/infiniband/hw/ehca/Makefile | 16 + drivers/infiniband/hw/ehca/ehca_av.c | 271 ++ drivers/infiniband/hw/ehca/ehca_classes.h | 346 +++ .../infiniband/hw/ehca/ehca_classes_pSeries.h | 236 ++ drivers/infiniband/hw/ehca/ehca_cq.c | 427 ++++ drivers/infiniband/hw/ehca/ehca_eq.c | 185 ++ drivers/infiniband/hw/ehca/ehca_hca.c | 241 ++ drivers/infiniband/hw/ehca/ehca_irq.c | 762 ++++++ drivers/infiniband/hw/ehca/ehca_irq.h | 77 + drivers/infiniband/hw/ehca/ehca_iverbs.h | 181 ++ drivers/infiniband/hw/ehca/ehca_main.c | 818 ++++++ drivers/infiniband/hw/ehca/ehca_mcast.c | 131 + drivers/infiniband/hw/ehca/ehca_mrmw.c | 2261 +++++++++++++++++ drivers/infiniband/hw/ehca/ehca_mrmw.h | 140 + drivers/infiniband/hw/ehca/ehca_pd.c | 114 + drivers/infiniband/hw/ehca/ehca_qes.h | 259 ++ drivers/infiniband/hw/ehca/ehca_qp.c | 1506 +++++++++++ drivers/infiniband/hw/ehca/ehca_reqs.c | 653 +++++ drivers/infiniband/hw/ehca/ehca_sqp.c | 111 + drivers/infiniband/hw/ehca/ehca_tools.h | 172 ++ drivers/infiniband/hw/ehca/ehca_uverbs.c | 392 +++ drivers/infiniband/hw/ehca/hcp_if.c | 874 +++++++ drivers/infiniband/hw/ehca/hcp_if.h | 261 ++ drivers/infiniband/hw/ehca/hcp_phyp.c | 80 + drivers/infiniband/hw/ehca/hcp_phyp.h | 90 + drivers/infiniband/hw/ehca/hipz_fns.h | 68 + drivers/infiniband/hw/ehca/hipz_fns_core.h | 100 + drivers/infiniband/hw/ehca/hipz_hw.h | 388 +++ drivers/infiniband/hw/ehca/ipz_pt_fn.c | 149 ++ drivers/infiniband/hw/ehca/ipz_pt_fn.h | 247 ++ 34 files changed, 11582 insertions(+) create mode 100644 drivers/infiniband/hw/ehca/Kconfig create mode 100644 drivers/infiniband/hw/ehca/Makefile create mode 100644 drivers/infiniband/hw/ehca/ehca_av.c create mode 100644 drivers/infiniband/hw/ehca/ehca_classes.h create mode 100644 drivers/infiniband/hw/ehca/ehca_classes_pSeries.h create mode 100644 drivers/infiniband/hw/ehca/ehca_cq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_eq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_hca.c create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.c create mode 100644 drivers/infiniband/hw/ehca/ehca_irq.h create mode 100644 drivers/infiniband/hw/ehca/ehca_iverbs.h create mode 100644 drivers/infiniband/hw/ehca/ehca_main.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mcast.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.c create mode 100644 drivers/infiniband/hw/ehca/ehca_mrmw.h create mode 100644 drivers/infiniband/hw/ehca/ehca_pd.c create mode 100644 drivers/infiniband/hw/ehca/ehca_qes.h create mode 100644 drivers/infiniband/hw/ehca/ehca_qp.c create mode 100644 drivers/infiniband/hw/ehca/ehca_reqs.c create mode 100644 drivers/infiniband/hw/ehca/ehca_sqp.c create mode 100644 drivers/infiniband/hw/ehca/ehca_tools.h create mode 100644 drivers/infiniband/hw/ehca/ehca_uverbs.c create mode 100644 drivers/infiniband/hw/ehca/hcp_if.c create mode 100644 drivers/infiniband/hw/ehca/hcp_if.h create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.c create mode 100644 drivers/infiniband/hw/ehca/hcp_phyp.h create mode 100644 drivers/infiniband/hw/ehca/hipz_fns.h create mode 100644 drivers/infiniband/hw/ehca/hipz_fns_core.h create mode 100644 drivers/infiniband/hw/ehca/hipz_hw.h create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.c create mode 100644 drivers/infiniband/hw/ehca/ipz_pt_fn.h diff --git a/MAINTAINERS b/MAINTAINERS index ed2a83cfad7c..830bec779d47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -991,6 +991,14 @@ EFS FILESYSTEM W: http://aeschi.ch.eu.org/efs/ S: Orphan +EHCA (IBM GX bus InfiniBand adapter) DRIVER: +P: Hoang-Nam Nguyen +M: hnguyen@de.ibm.com +P: Christoph Raisch +M: raisch@de.ibm.com +L: openib-general@openib.org +S: Supported + EMU10K1 SOUND DRIVER P: James Courtier-Dutton M: James@superbug.demon.co.uk diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 69a53d476b5b..fd2d528daa3a 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -36,6 +36,7 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ipath/Kconfig" +source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index c7ff58c1d0e5..893bee0a50b5 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ obj-$(CONFIG_IPATH_CORE) += hw/ipath/ +obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig new file mode 100644 index 000000000000..922389b64394 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -0,0 +1,16 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS && INFINIBAND + ---help--- + This driver supports the IBM pSeries eHCA InfiniBand adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + +config INFINIBAND_EHCA_SCALING + bool "Scaling support (EXPERIMENTAL)" + depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL + ---help--- + eHCA scaling support schedules the CQ callbacks to different CPUs. + + To enable this feature choose Y here. diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile new file mode 100644 index 000000000000..74d284e46a40 --- /dev/null +++ b/drivers/infiniband/hw/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# Joachim Fenkes +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c new file mode 100644 index 000000000000..3bac197f9014 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -0,0 +1,271 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * adress vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, SLAB_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + int ah_mult = ib_rate_to_mult(ah_attr->static_rate); + int ehca_mult = + ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); + + if (ah_mult >= ehca_mult) + av->av.ipd = 0; + else + av->av.ipd = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + /* for the time being we use a hard coded PMTU of 2048 Bytes */ + av->av.pmtu = 4; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = 4; /* see also comment in create_ah() */ + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h new file mode 100644 index 000000000000..1c722032319c --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -0,0 +1,346 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +#include "ehca_classes.h" +#include "ipz_pt_fn.h" + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif + +#include +#include + +#include "ehca_irq.h" + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_aqp1; + enum ib_rate rate; + enum ib_port_state port_state; +}; + +struct ehca_shca { + struct ib_device ib_device; + struct ibmebus_dev *ibmebus_dev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + u32 ownpid; +}; + +struct ehca_qp { + struct ib_qp ib_qp; + u32 qp_type; + struct ipz_queue ipz_squeue; + struct ipz_queue ipz_rqueue; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + u64 uspace_squeue; + u64 uspace_rqueue; + u64 uspace_fwh; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; +}; + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + u64 uspace_queue; + u64 uspace_fwh; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; + spinlock_t task_lock; + u32 ownpid; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_pages; /* number of MR pages */ + u32 num_4k; /* number of 4k "page" portions to form MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; + /* data for userspace bridge */ + u32 nr_of_pages; + void *pagearray; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_pages; + u64 page_cnt; + u64 num_4k; /* number of 4k "page" portions */ + u64 page_4k_cnt; /* counter for 4k "page" portions */ + u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */ + + /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + + /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct ib_umem_chunk *next_chunk; + u64 next_nmap; + + /* type EHCA_MR_PGI_FMR section */ + u64 *page_list; + u64 next_listelem; + /* next_4k also used within EHCA_MR_PGI_FMR */ +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +struct ehca_module *ehca_module_new(void); + +int ehca_module_delete(struct ehca_module *me); + +int ehca_eq_ctor(struct ehca_eq *eq); + +int ehca_eq_dtor(struct ehca_eq *eq); + +struct ehca_shca *ehca_shca_new(void); + +int ehca_shca_delete(struct ehca_shca *me); + +struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor); + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); + +extern spinlock_t ehca_qp_idr_lock; +extern spinlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern int ehca_use_hp_mr; + +struct ipzu_queue_resp { + u64 queue; /* points to first queue entry */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 dummy; /* padding for 8 byte alignment */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + struct h_galpas galpas; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 dummy; /* padding for 8 byte alignment */ + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; + struct h_galpas galpas; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +struct ehca_alloc_qp_parms { + int servicetype; + int sigtype; + int daqp_ctrl; + int max_send_sge; + int max_recv_sge; + int ud_av_l_key_ctl; + + u16 act_nr_send_wqes; + u16 act_nr_recv_wqes; + u8 act_nr_recv_sges; + u8 act_nr_send_sges; + + u32 nr_rq_pages; + u32 nr_sq_pages; + + struct ipz_eq_handle ipz_eq_handle; + struct ipz_pd pd; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h new file mode 100644 index 000000000000..5665f213b81a --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -0,0 +1,236 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9) +#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20) +#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21) +#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22) +#define MQPCB_DLID EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23) +#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24) +#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25) +#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26) +#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27) +#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28) +#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32) +#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33) +#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34) +#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35) +#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36) +#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37) +#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38) +#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39) +#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40) +#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41) +#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42) +#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45) +#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46) +#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47) +#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31) +#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) +#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) +#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) +#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c new file mode 100644 index 000000000000..458fe19648a1 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -0,0 +1,427 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long spl_flags; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long spl_flags; + + spin_lock_irqsave(&cq->spinlock, spl_flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, spl_flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, ret, i; + unsigned long flags; + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + return ERR_PTR(-ENOMEM); + } + + memset(my_cq, 0, sizeof(struct ehca_cq)); + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + my_cq->ownpid = current->tgid; + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + do { + if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't reserve idr nr. device=%p", + device); + goto create_cq_exit1; + } + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lx device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lx", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lx" + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + struct vm_area_struct *vma; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000, + ipz_queue->queue_length, + (void**)&resp.ipz_queue.queue, + &vma); + if (ret) { + ehca_err(device, "Could not mmap queue pages"); + cq = ERR_PTR(ret); + goto create_cq_exit4; + } + my_cq->uspace_queue = resp.ipz_queue.queue; + resp.galpas = my_cq->galpas; + ret = ehca_mmap_register(my_cq->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + ehca_err(device, "Could not mmap fw_handle"); + cq = ERR_PTR(ret); + goto create_cq_exit5; + } + my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + goto create_cq_exit6; + } + } + + return cq; + +create_cq_exit6: + ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + +create_cq_exit5: + ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length); + +create_cq_exit4: + ipz_queue_dtor(&my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + int ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + u32 cur_pid = current->tgid; + unsigned long flags; + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + while (my_cq->nr_callbacks) + yield(); + + idr_remove(&ehca_cq_idr, my_cq->token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + ehca_err(device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* un-mmap if vma alloc */ + if (my_cq->uspace_queue ) { + ret = ehca_munmap(my_cq->uspace_queue, + my_cq->ipz_queue.queue_length); + if (ret) + ehca_err(device, "Could not munmap queue ehca_cq=%p " + "cq_num=%x", my_cq, cq_num); + ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE); + if (ret) + ehca_err(device, "Could not munmap fwh ehca_cq=%p " + "cq_num=%x", my_cq, cq_num); + } + + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(&my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + u32 cur_pid = current->tgid; + + if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) { + ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_cq->ownpid); + return -EINVAL; + } + + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c new file mode 100644 index 000000000000..5281dec66f12 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -0,0 +1,185 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + u64 ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) { + ret = H_RESOURCE; + goto create_eq_exit2; + } + + rpage = virt_to_abs(vpage); + ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (ret != H_PAGE_REGISTERED || !vpage) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq, + SA_INTERRUPT, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + } else if (type == EHCA_NEQ) { + ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq, + SA_INTERRUPT, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(&eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + spin_lock_irqsave(&eq->spinlock, flags); + ibmebus_free_irq(NULL, eq->ist, (void *)shca); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + spin_unlock_irqrestore(&eq->spinlock, flags); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(&eq->ipz_queue); + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c new file mode 100644 index 000000000000..5eae6ac48425 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -0,0 +1,241 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_tools.h" +#include "hcp_if.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = min_t(int, rblock->max_qp, INT_MAX); + props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX); + props->max_sge = min_t(int, rblock->max_sge, INT_MAX); + props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX); + props->max_cq = min_t(int, rblock->max_cq, INT_MAX); + props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX); + props->max_mr = min_t(int, rblock->max_mr, INT_MAX); + props->max_mw = min_t(int, rblock->max_mw, INT_MAX); + props->max_pd = min_t(int, rblock->max_pd, INT_MAX); + props->max_ah = min_t(int, rblock->max_ah, INT_MAX); + props->max_fmr = min_t(int, rblock->max_mr, INT_MAX); + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 16; + props->local_ca_ack_delay + = rblock->local_ca_ack_delay; + props->max_raw_ipv6_qp + = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX); + props->max_raw_ethy_qp + = min_t(int, rblock->max_raw_ethy_qp, INT_MAX); + props->max_mcast_grp + = min_t(int, rblock->max_mcast_grp, INT_MAX); + props->max_mcast_qp_attach + = min_t(int, rblock->max_mcast_qp_attach, INT_MAX); + props->max_total_mcast_qp_attach + = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + +query_device1: + kfree(rblock); + + return ret; +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + props->state = rblock->state; + + switch (rblock->max_mtu) { + case 0x1: + props->active_mtu = props->max_mtu = IB_MTU_256; + break; + case 0x2: + props->active_mtu = props->max_mtu = IB_MTU_512; + break; + case 0x3: + props->active_mtu = props->max_mtu = IB_MTU_1024; + break; + case 0x4: + props->active_mtu = props->max_mtu = IB_MTU_2048; + break; + case 0x5: + props->active_mtu = props->max_mtu = IB_MTU_4096; + break; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + rblock->max_mtu); + break; + } + + props->gid_tbl_len = rblock->gid_tbl_len; + props->max_msg_sz = rblock->max_msg_sz; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + +query_port1: + kfree(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct hipz_query_port *rblock; + + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + kfree(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + kfree(rblock); + + return ret; +} + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + /* Not implemented yet */ + return -EFAULT; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c new file mode 100644 index 000000000000..2a65b5be1979 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -0,0 +1,762 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) + +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool* pool; +static struct notifier_block comp_pool_callback_nb; + +#endif + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca * shca, void* data, + u64* rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp*)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%lx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq*)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%lx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown errror type: %lx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%lx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) { + ehca_err(&shca->ib_device, + "No error data is available: %lx.", resource); + } + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > PAGE_SIZE) + length = PAGE_SIZE; + + print_error_data(shca, data, rblock, length); + } + else { + ehca_err(&shca->ib_device, + "Error data could not be fetched: %lx", resource); + } + + kfree(rblock); + +error_data1: + return ret; + +} + +static void qp_event_callback(struct ehca_shca *shca, + u64 eqe, + enum ib_event_type event_type) +{ + struct ib_event event; + struct ehca_qp *qp; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + + if (!qp) + return; + + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + if (!qp->ib_qp.event_handler) + return; + + event.device = &shca->ib_device; + event.event = event_type; + event.element.qp = &qp->ib_qp; + + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + unsigned long flags; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, token); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating error */ + ehca_err(&shca->ib_device, "Path migration error."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + struct ib_event event; + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + ehca_info(&shca->ib_device, + "port %x is active.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + } else { + ehca_info(&shca->ib_device, + "port %x is inactive.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + ehca_warn(&shca->ib_device, + "disruptive port %x configuration change", port); + + ehca_info(&shca->ib_device, + "port %x is inactive.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + + ehca_info(&shca->ib_device, + "port %x is active.", port); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ACTIVE; + event.element.port_num = port; + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + ib_dispatch_event(&event); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_eq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + int int_state; + int query_cnt = 0; + + do { + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + if ((shca->hw_level >= 2) && eqe) + int_state = 1; + else + int_state = 0; + + while ((int_state == 1) || eqe) { + while (eqe) { + u64 eqe_value = eqe->entry; + + ehca_dbg(&shca->ib_device, + "eqe_value=%lx", eqe_value); + + /* TODO: better structure */ + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, + eqe_value)) { + unsigned long flags; + u32 token; + struct ehca_cq *cq; + + ehca_dbg(&shca->ib_device, + "... completion event"); + token = + EHCA_BMASK_GET(EQE_CQ_TOKEN, + eqe_value); + spin_lock_irqsave(&ehca_cq_idr_lock, + flags); + cq = idr_find(&ehca_cq_idr, token); + + if (cq == NULL) { + spin_unlock(&ehca_cq_idr_lock); + break; + } + + reset_eq_pending(cq); +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + queue_comp_task(cq); + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); +#else + spin_unlock_irqrestore(&ehca_cq_idr_lock, + flags); + comp_event_callback(cq); +#endif + } else { + ehca_dbg(&shca->ib_device, + "... non completion event"); + parse_identifier(shca, eqe_value); + } + eqe = + (struct ehca_eqe *)ehca_poll_eq(shca, + &shca->eq); + } + + if (shca->hw_level >= 2) { + int_state = + hipz_h_query_int_state(shca->ipz_hca_handle, + shca->eq.ist); + query_cnt++; + iosync(); + if (query_cnt >= 100) { + query_cnt = 0; + int_state = 0; + } + } + eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + + } + } while (int_state != 0); + + return; +} + +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + +static inline int find_next_online_cpu(struct ehca_comp_pool* pool) +{ + unsigned long flags_last_cpu; + + if (ehca_debug_level) + ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); + pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); + if (pool->last_cpu == NR_CPUS) + pool->last_cpu = first_cpu(cpu_online_map); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); + + return pool->last_cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct) +{ + unsigned long flags_cct; + unsigned long flags_cq; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + spin_lock_irqsave(&__cq->task_lock, flags_cq); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up(&cct->wait_queue); + } + else + __cq->nr_callbacks++; + + spin_unlock_irqrestore(&__cq->task_lock, flags_cq); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu; + int cpu_id; + struct ehca_cpu_comp_task *cct; + + cpu = get_cpu(); + cpu_id = find_next_online_cpu(pool); + + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + + if (cct->cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + } + + __queue_comp_task(__cq, cct); + + put_cpu(); + + return; +} + +static void run_comp_task(struct ehca_cpu_comp_task* cct) +{ + struct ehca_cq *cq; + unsigned long flags_cct; + unsigned long flags_cq; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + comp_event_callback(cq); + spin_lock_irqsave(&cct->task_lock, flags_cct); + + spin_lock_irqsave(&cq->task_lock, flags_cq); + cq->nr_callbacks--; + if (cq->nr_callbacks == 0) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock_irqrestore(&cq->task_lock, flags_cq); + + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + return; +} + +static int comp_task(void *__cct) +{ + struct ehca_cpu_comp_task* cct = __cct; + DECLARE_WAITQUEUE(wait, current); + + set_current_state(TASK_INTERRUPTIBLE); + while(!kthread_should_stop()) { + add_wait_queue(&cct->wait_queue, &wait); + + if (list_empty(&cct->cq_list)) + schedule(); + else + __set_current_state(TASK_RUNNING); + + remove_wait_queue(&cct->wait_queue, &wait); + + if (!list_empty(&cct->cq_list)) + run_comp_task(__cct); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + + return 0; +} + +static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + init_waitqueue_head(&cct->wait_queue); + cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); + + return cct->task; +} + +static void destroy_comp_task(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct; + struct task_struct *task; + unsigned long flags_cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + task = cct->task; + cct->task = NULL; + cct->cq_jobs = 0; + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + + if (task) + kthread_stop(task); + + return; +} + +static void take_over_work(struct ehca_comp_pool *pool, + int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + LIST_HEAD(list); + struct ehca_cq *cq; + unsigned long flags_cct; + + spin_lock_irqsave(&cct->task_lock, flags_cct); + + list_splice_init(&cct->cq_list, &list); + + while(!list_empty(&list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + + list_del(&cq->entry); + __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, + smp_processor_id())); + } + + spin_unlock_irqrestore(&cct->task_lock, flags_cct); + +} + +static int comp_pool_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct ehca_cpu_comp_task *cct; + + switch (action) { + case CPU_UP_PREPARE: + ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); + if(!create_comp_task(pool, cpu)) { + ehca_gen_err("Can't create comp_task for cpu: %x", cpu); + return NOTIFY_BAD; + } + break; + case CPU_UP_CANCELED: + ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, any_online_cpu(cpu_online_map)); + destroy_comp_task(pool, cpu); + break; + case CPU_ONLINE: + ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + kthread_bind(cct->task, cpu); + wake_up_process(cct->task); + break; + case CPU_DOWN_PREPARE: + ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); + break; + case CPU_DOWN_FAILED: + ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); + break; + case CPU_DEAD: + ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); + destroy_comp_task(pool, cpu); + take_over_work(pool, cpu); + break; + } + + return NOTIFY_OK; +} + +#endif + +int ehca_create_comp_pool(void) +{ +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + int cpu; + struct task_struct *task; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = any_online_cpu(cpu_online_map); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (pool->cpu_comp_tasks == NULL) { + kfree(pool); + return -EINVAL; + } + + for_each_online_cpu(cpu) { + task = create_comp_task(pool, cpu); + if (task) { + kthread_bind(task, cpu); + wake_up_process(task); + } + } + + comp_pool_callback_nb.notifier_call = comp_pool_callback; + comp_pool_callback_nb.priority =0; + register_cpu_notifier(&comp_pool_callback_nb); +#endif + + return 0; +} + +void ehca_destroy_comp_pool(void) +{ +#ifdef CONFIG_INFINIBAND_EHCA_SCALING + int i; + + unregister_cpu_notifier(&comp_pool_callback_nb); + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_online(i)) + destroy_comp_task(pool, i); + } +#endif + + return; +} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h new file mode 100644 index 000000000000..85bf1fe16fe4 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs); +void ehca_tasklet_eq(unsigned long data); + +struct ehca_cpu_comp_task { + wait_queue_head_t wait_queue; + struct list_head cq_list; + struct task_struct *task; + spinlock_t task_lock; + int cq_jobs; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task *cpu_comp_tasks; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h new file mode 100644 index 000000000000..bbdc437f5167 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -0,0 +1,181 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +void ehca_poll_eqs(unsigned long data); + +int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped, + struct vm_area_struct **vma); + +int ehca_mmap_register(u64 physical,void **mapped, + struct vm_area_struct **vma); + +int ehca_munmap(unsigned long addr, size_t len); + +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c new file mode 100644 index 000000000000..2a99f2d13cdb --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -0,0 +1,818 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION("SVNEHCA_0016"); + +int ehca_open_aqp1 = 0; +int ehca_debug_level = 0; +int ehca_hw_level = 0; +int ehca_nr_ports = 2; +int ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_poll_all_eqs = 1; +int ehca_static_rate = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, int, 0); +module_param_named(debug_level, ehca_debug_level, int, 0); +module_param_named(hw_level, ehca_hw_level, int, 0); +module_param_named(nr_ports, ehca_nr_ports, int, 0); +module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0); +module_param_named(port_act_time, ehca_port_act_time, int, 0); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0); +module_param_named(static_rate, ehca_static_rate, int, 0); + +MODULE_PARM_DESC(open_aqp1, + "AQP1 on startup (0: no (default), 1: yes)"); +MODULE_PARM_DESC(debug_level, + "debug level" + " (0: no debug traces (default), 1: with debug traces)"); +MODULE_PARM_DESC(hw_level, + "hardware level" + " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (default: 2)"); +MODULE_PARM_DESC(use_hp_mr, + "high performance MRs (0: no (default), 1: yes)"); +MODULE_PARM_DESC(port_act_time, + "time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "polls all event queues periodically" + " (0: no, 1: yes (default))"); +MODULE_PARM_DESC(static_rate, + "set permanent static rate (default: disabled)"); + +spinlock_t ehca_qp_idr_lock; +spinlock_t ehca_cq_idr_lock; +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static struct list_head shca_list; /* list of all registered ehcas */ +static spinlock_t shca_list_lock; + +static struct timer_list poll_eqs_timer; + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + return 0; + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) +#define EHCA_REVID EHCA_BMASK_IBM(40,63) + +int ehca_sense_attributes(struct ehca_shca *shca) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lx", + h_ret); + ret = -EPERM; + goto num_ports1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if ((hcaaver == 1) && (revid == 0)) + shca->hw_level = 0; + else if ((hcaaver == 1) && (revid == 1)) + shca->hw_level = 1; + else if ((hcaaver == 1) && (revid == 2)) + shca->hw_level = 2; + } + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->sport[0].rate = IB_RATE_30_GBPS; + shca->sport[1].rate = IB_RATE_30_GBPS; + +num_ports1: + kfree(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + kfree(rblock); + return ret; +} + +int ehca_register_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 5; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = IB_NODE_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + /* shca->ib_device.process_mad = ehca_process_mad; */ + shca->ib_device.mmap = ehca_mmap; + + ret = ib_register_device(&shca->ib_device); + if (ret) + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%x", ret); + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_aqp1) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_aqp1 = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_aqp1); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", + ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +void ehca_create_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_create_file(&drv->driver, &driver_attr_debug_level); +} + +void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) +{ + driver_remove_file(&drv->driver, &driver_attr_debug_level); +} + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev->driver_data; \ + \ + rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory."); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties"); \ + kfree(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + kfree(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev->driver_data; + + return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + + +void ehca_create_device_sysfs(struct ibmebus_dev *dev) +{ + device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_create_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_create_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_create_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_create_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_create_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_create_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +void ehca_remove_device_sysfs(struct ibmebus_dev *dev) +{ + device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle); + device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports); + device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd); + device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah); +} + +static int __devinit ehca_probe(struct ibmebus_dev *dev, + const struct of_device_id *id) +{ + struct ehca_shca *shca; + u64 *handle; + struct ib_pd *ibpd; + int ret; + + handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->ofdev.node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + shca->ibmebus_dev = dev; + shca->ipz_hca_handle.handle = *handle; + dev->ofdev.dev.driver_data = shca; + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_register_device(shca); + if (ret) { + ehca_gen_err("Cannot register Infiniband device"); + goto probe1; + } + + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe2; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x", + ret); + goto probe5; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe6; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe7; + } + } + + ehca_create_device_sysfs(dev); + + spin_lock(&shca_list_lock); + list_add(&shca->shca_list, &shca_list); + spin_unlock(&shca_list_lock); + + return 0; + +probe7: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%x", ret); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe2: + ib_unregister_device(&shca->ib_device); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int __devexit ehca_remove(struct ibmebus_dev *dev) +{ + struct ehca_shca *shca = dev->ofdev.dev.driver_data; + int ret; + + ehca_remove_device_sysfs(dev); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%x", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock(&shca_list_lock); + list_del(&shca->shca_list); + spin_unlock(&shca_list_lock); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; + +static struct ibmebus_driver ehca_driver = { + .name = "ehca", + .id_table = ehca_device_table, + .probe = ehca_probe, + .remove = ehca_remove, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) + ehca_tasklet_eq((unsigned long)(void*)shca); + } + mod_timer(&poll_eqs_timer, jiffies + HZ); + spin_unlock(&shca_list_lock); +} + +int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Rel.: SVNEHCA_0016)\n"); + idr_init(&ehca_qp_idr); + idr_init(&ehca_cq_idr); + spin_lock_init(&ehca_qp_idr_lock); + spin_lock_init(&ehca_cq_idr_lock); + + INIT_LIST_HEAD(&shca_list); + spin_lock_init(&shca_list_lock); + + if ((ret = ehca_create_comp_pool())) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + if ((ret = ehca_create_slab_caches())) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + if ((ret = ibmebus_register_driver(&ehca_driver))) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init2; + } + + ehca_create_driver_sysfs(&ehca_driver); + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ehca_remove_driver_sysfs(&ehca_driver); + ibmebus_unregister_driver(&ehca_driver); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c new file mode 100644 index 000000000000..32a870660bfe --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lx", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c new file mode 100644 index 000000000000..5ca65441e1da --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -0,0 +1,2261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_alloc(mr_cache, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mr)); + spin_lock_init(&me->mrlock); + } else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_alloc(mw_cache, SLAB_KERNEL); + if (me) { + memset(me, 0, sizeof(struct ehca_mw)); + spin_lock_init(&me->mwlock); + } else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE, + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%lx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + int ret; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + if (!region) { + ehca_err(pd->device, "bad input values: region=%p", region); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + if (region->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "region->page_size=%x", region->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if ((region->length == 0) || + ((region->virt_base + region->length) < region->virt_base)) { + ehca_err(pd->device, "bad input values: length=%lx " + "virt_base=%lx", region->length, region->virt_base); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + /* determine number of MR pages */ + num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + /* register MR on HCA */ + pginfo.type = EHCA_MR_PGI_USER; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.region = region; + pginfo.next_4k = region->offset / EHCA_PAGESIZE; + pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, + (®ion->chunk_list), + list); + + ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, + region->length, mr_access_flags, e_pd, &pginfo, + &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit1; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" + " udata=%p", + PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_pages_mr = 0; + u32 num_pages_4k = 0; /* 4k portion "pages" */ + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; /* new == old address */ + new_size = e_mr->size; /* new == old length */ + new_acl = e_mr->acl; /* new == old access control */ + new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/ + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%lx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = num_phys_buf; + pginfo.phys_buf_array = phys_buf_array; + pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) / + EHCA_PAGESIZE); + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto query_mr_exit0; + } + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p " + "hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_query_mr(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + (my_pd->ownpid != cur_pid)) { + ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p " + "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto dereg_mr_exit0; + } + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%x mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout = {{0},0}; + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx " + "shca=%p hca_hndl=%lx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p " + "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca_mrmw_map_hrc_free_mw(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) && + ((1 << fmr_attr->page_shift) != PAGE_SIZE)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + if (IS_ERR(ib_fmr)) + ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x " + "fmr_attr=%p", PTR_ERR(ib_fmr), pd, + mr_access_flags, fmr_attr); + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = list_len; + pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE); + pginfo.page_list = page_list; + pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) / + EHCA_PAGESIZE); + + ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x " + "iova=%lx", + ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + if (!ib_fmr) { + ehca_gen_err("bad fmr=%p in list", ib_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p " + "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx " + "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca_mrmw_map_hrc_alloc(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_pages=%lx num_4k=%lx ret=%x", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_pages, + pginfo->num_4k, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " + "num_pages=%lx num_4k=%lx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_pages, pginfo->num_4k); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max 512 pages per shot */ + for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) { + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + rnum = pginfo->num_4k % 512; /* last shot */ + if (rnum == 0) + rnum = 512; /* last shot is full */ + } else + rnum = 512; + + if (rnum > 1) { + ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%x rnum=%x kpage=%p", + ret, rnum, kpage); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else { /* rnum==1 */ + ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 " + "bad rc, ret=%x i=%x", ret, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr, + 0, /* pagesize 4k */ + 0, rpage, rnum); + + if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lx " + "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_rrpg_last(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx " + "mr_hndl=%lx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + kfree(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p " + "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo, + pginfo->num_pages, pginfo->num_4k); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p", + e_mr, pginfo, pginfo->type, pginfo->num_pages, + pginfo->num_4k,kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = virt_to_abs(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64*)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " + "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_pages = pginfo->num_pages; + e_mr->num_4k = pginfo->num_4k; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + kfree(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x " + "pginfo=%p num_pages=%lx num_4k=%lx", + ret, *lkey, *rkey, pginfo, pginfo->num_pages, + pginfo->num_4k); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) || + (pginfo->num_4k > e_mr->num_4k)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx " + "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p " + "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " + "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */ + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + /* first check if reregistration hCall can be used for unmap */ + if (e_fmr->fmr_max_pages > 512) { + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (rereg_1_hcall) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx " + "mr_hndl=%lx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + rereg_3_hcall = 1; + } else { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca_mrmw_map_hrc_free_mr(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_pages = 0; + pginfo.num_4k = 0; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_unmap_one_fmr_exit0; + } + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca_mrmw_map_hrc_reg_smr(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0}; + struct ib_phys_buf ib_pbuf; + u32 num_pages_mr; + u32 num_pages_4k; /* 4k portion "pages" */ + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = (u64)high_memory - PAGE_OFFSET; + iova_start = (u64*)KERNELBASE; + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr + + PAGE_SIZE - 1) / PAGE_SIZE); + num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); + + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_pages = num_pages_mr; + pginfo.num_4k = num_pages_4k; + pginfo.num_phys_buf = 1; + pginfo.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x " + "num_pages_4k=%x", e_mr, iova_start, size_maxmr, + num_pages_mr, num_pages_4k); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0}; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx " + "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca_mrmw_map_hrc_reg_smr(h_ret); + } + /* successful registration */ + e_newmr->num_pages = e_origmr->num_pages; + e_newmr->num_4k = e_origmr->num_4k; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%x e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%lx pbuf->size=%lx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx " + "pbuf->size=%lx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%lx " + "pbuf->size=%lx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%lx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + struct ib_umem_chunk *prev_chunk; + struct ib_umem_chunk *chunk; + struct ib_phys_buf *pbuf; + u64 *fmrlist; + u64 num4k, pgaddr, offs4k; + u32 i = 0; + u32 j = 0; + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + while (pginfo->next_4k < offs4k + num4k) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + ehca_gen_err("page_cnt >= num_pages, " + "page_cnt=%lx " + "num_pages=%lx " + "page_4k_cnt=%lx " + "num_4k=%lx i=%x", + pginfo->page_cnt, + pginfo->num_pages, + pginfo->page_4k_cnt, + pginfo->num_4k, i); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + *kpage = phys_to_abs( + (pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%lx " + "pbuf->size=%lx " + "next_4k=%lx", pbuf->addr, + pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + /* loop over desired chunk entries */ + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + for (i = pginfo->next_nmap; i < chunk->nmap; ) { + pgaddr = ( page_to_pfn(chunk->page_list[i].page) + << PAGE_SHIFT ); + *kpage = phys_to_abs(pgaddr + + (pginfo->next_4k * + EHCA_PAGESIZE)); + if ( !(*kpage) ) { + ehca_gen_err("pgaddr=%lx " + "chunk->page_list[i]=%lx " + "i=%x next_4k=%lx mr=%p", + pgaddr, + (u64)sg_dma_address( + &chunk-> + page_list[i]), + i, pginfo->next_4k, e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + i++; + } + j++; + if (j >= number) break; + } + if ((pginfo->next_nmap >= chunk->nmap) && + (j >= number)) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + break; + } else if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } else if (j >= number) + break; + else + prev_chunk = chunk; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + /* loop over desired page_list entries */ + fmrlist = pginfo->page_list + pginfo->next_listelem; + for (i = 0; i < number; i++) { + *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%lx fmrlist=%p " + "next_listelem=%lx next_4k=%lx", + *fmrlist, fmrlist, + pginfo->next_listelem, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + kpage++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + fmrlist++; + pginfo->next_4k = 0; + } + } + } else { + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_exit0; + } + +ehca_set_pagebuf_exit0: + if (ret) + ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx number=%x " + "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x " + "next_listelem=%lx region=%p next_chunk=%p " + "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type, + pginfo->num_pages, pginfo->num_4k, + pginfo->next_buf, pginfo->next_4k, number, kpage, + pginfo->page_cnt, pginfo->page_4k_cnt, i, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* setup 1 page from page info page buffer */ +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage) +{ + int ret = 0; + struct ib_phys_buf *tmp_pbuf; + u64 *fmrlist; + struct ib_umem_chunk *chunk; + struct ib_umem_chunk *prev_chunk; + u64 pgaddr, num4k, offs4k; + + if (pginfo->type == EHCA_MR_PGI_PHYS) { + /* sanity check */ + if ((pginfo->page_cnt >= pginfo->num_pages) || + (pginfo->page_4k_cnt >= pginfo->num_4k)) { + ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx " + "num_pages=%lx page_4k_cnt=%lx num_4k=%lx", + pginfo->page_cnt, pginfo->num_pages, + pginfo->page_4k_cnt, pginfo->num_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf; + num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size + + EHCA_PAGESIZE - 1) / EHCA_PAGESIZE; + offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE; + *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*rpage) && tmp_pbuf->addr ) { + ehca_gen_err("tmp_pbuf->addr=%lx" + " tmp_pbuf->size=%lx next_4k=%lx", + tmp_pbuf->addr, tmp_pbuf->size, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0) + (pginfo->page_cnt)++; + if (pginfo->next_4k >= offs4k + num4k) { + (pginfo->next_buf)++; + pginfo->next_4k = 0; + } + } else if (pginfo->type == EHCA_MR_PGI_USER) { + chunk = pginfo->next_chunk; + prev_chunk = pginfo->next_chunk; + list_for_each_entry_continue(chunk, + (&(pginfo->region->chunk_list)), + list) { + pgaddr = ( page_to_pfn(chunk->page_list[ + pginfo->next_nmap].page) + << PAGE_SHIFT); + *rpage = phys_to_abs(pgaddr + + (pginfo->next_4k * EHCA_PAGESIZE)); + if ( !(*rpage) ) { + ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx" + " next_nmap=%lx next_4k=%lx mr=%p", + pgaddr, (u64)sg_dma_address( + &chunk->page_list[ + pginfo-> + next_nmap]), + pginfo->next_nmap, pginfo->next_4k, + e_mr); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (PAGE_SIZE / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_nmap)++; + pginfo->next_4k = 0; + } + if (pginfo->next_nmap >= chunk->nmap) { + pginfo->next_nmap = 0; + prev_chunk = chunk; + } + break; + } + pginfo->next_chunk = + list_prepare_entry(prev_chunk, + (&(pginfo->region->chunk_list)), + list); + } else if (pginfo->type == EHCA_MR_PGI_FMR) { + fmrlist = pginfo->page_list + pginfo->next_listelem; + *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) + + pginfo->next_4k * EHCA_PAGESIZE); + if ( !(*rpage) ) { + ehca_gen_err("*fmrlist=%lx fmrlist=%p " + "next_listelem=%lx next_4k=%lx", + *fmrlist, fmrlist, pginfo->next_listelem, + pginfo->next_4k); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + (pginfo->page_4k_cnt)++; + (pginfo->next_4k)++; + if (pginfo->next_4k % + (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) { + (pginfo->page_cnt)++; + (pginfo->next_listelem)++; + pginfo->next_4k = 0; + } + } else { + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + goto ehca_set_pagebuf_1_exit0; + } + +ehca_set_pagebuf_1_exit0: + if (ret) + ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx " + "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p " + "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx " + "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr, + pginfo, pginfo->type, pginfo->num_pages, + pginfo->num_4k, pginfo->next_buf, pginfo->next_4k, + rpage, pginfo->page_cnt, pginfo->page_4k_cnt, + pginfo->next_listelem, pginfo->region, + pginfo->next_chunk, pginfo->next_nmap); + return ret; +} /* end ehca_set_pagebuf_1() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ((u64)high_memory - PAGE_OFFSET)) && + (iova_start == (void*)KERNELBASE)) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/ +{ + return; /* HCA supports only 4k */ +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR/MW allocations + * Used for hipz_mr_reg_alloc and hipz_mw_alloc. + */ +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RT_PARM: /* invalid resource type */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_MLENGTH_PARM: /* invalid memory length */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_CONSTRAINED: /* resource constraint */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_alloc() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering last page + */ +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* registration complete */ + return 0; + case H_PAGE_REGISTERED: /* page registered */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_last() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for MR register rpage + * Used for hipz_h_register_rpage_mr at registering one page, but not last page + */ +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_PAGE_REGISTERED: /* page registered */ + return 0; + case H_SUCCESS: /* registration complete */ + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ +/* case H_QT_PARM: invalid queue type */ + case H_PARAMETER: /* + * invalid logical address, + * or count zero or greater 512 + */ + case H_TABLE_FULL: /* page table full */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_rrpg_notlast() */ + +/*----------------------------------------------------------------------*/ + +/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */ +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_query_mr() */ + +/*----------------------------------------------------------------------*/ +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MR resource + * Used for hipz_h_free_resource_mr + */ +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mr() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for freeing MW resource + * Used for hipz_h_free_resource_mw + */ +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* resource freed */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_R_STATE: /* invalid resource state */ + case H_HARDWARE: /* HCA not operational */ + return -EINVAL; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_free_mw() */ + +/*----------------------------------------------------------------------*/ + +/* + * map HIPZ rc to IB retcodes for SMR registrations + * Used for hipz_h_register_smr. + */ +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc) +{ + switch (hipz_rc) { + case H_SUCCESS: /* successful completion */ + return 0; + case H_ADAPTER_PARM: /* invalid adapter handle */ + case H_RH_PARM: /* invalid resource handle */ + case H_MEM_PARM: /* invalid MR virtual address */ + case H_MEM_ACCESS_PARM: /* invalid access controls */ + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + return -EINVAL; + case H_BUSY: /* long busy */ + return -EBUSY; + default: + return -EINVAL; + } +} /* end ehca_mrmw_map_hrc_reg_smr() */ + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_pages = 0; + mr->num_4k = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); + mr->nr_of_pages = 0; + mr->pagearray = NULL; +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h new file mode 100644 index 000000000000..d936e40a5748 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -0,0 +1,140 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_set_pagebuf_1(struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo, + u64 *rpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc); + +int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c new file mode 100644 index 000000000000..2c3cdc6f7b39 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -0,0 +1,114 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + + pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + memset(pd, 0, sizeof(struct ehca_pd)); + pd->ownpid = current->tgid; + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + u32 cur_pid = current->tgid; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + kmem_cache_free(pd_cache, + container_of(pd, struct ehca_pd, ib_pd)); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h new file mode 100644 index 000000000000..8707d297ce4c --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -0,0 +1,259 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote adresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_adress; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1,1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2) +#define WC_SE_BIT EHCA_BMASK_IBM(3,3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c new file mode 100644 index 000000000000..4b27bedc6c24 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -0,0 +1,1506 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ + IB_QP_MAX_QP_RD_ATOMIC | \ + IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3 +}; + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETY: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init_qp_queues initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queues(struct ehca_shca *shca, + struct ehca_qp *my_qp, + int nr_sq_pages, + int nr_rq_pages, + int swqe_size, + int rwqe_size, + int nr_send_sges, int nr_receive_sges) +{ + int ret, cnt, ipz_rc; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, + nr_sq_pages, + EHCA_PAGESIZE, swqe_size, nr_send_sges); + if (!ipz_rc) { + ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", + ipz_rc); + return -EBUSY; + } + + ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, + nr_rq_pages, + EHCA_PAGESIZE, rwqe_size, nr_receive_sges); + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x", + ipz_rc); + ret = -EBUSY; + goto init_qp_queues0; + } + /* register SQ pages */ + for (cnt = 0; cnt < nr_sq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + if (!vpage) { + ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 0, + rpage, 1, + my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" + " failed rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + + ipz_qeit_reset(&my_qp->ipz_squeue); + + /* register RQ pages */ + for (cnt = 0; cnt < nr_rq_pages; cnt++) { + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (!vpage) { + ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() " + "failed p_vpage = %p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + + rpage = virt_to_abs(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, 0, 1, + rpage, 1,my_qp->galpas.kernel); + if (h_ret < H_SUCCESS) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed " + "rc=%lx", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + if (cnt == (nr_rq_pages - 1)) { /* last page! */ + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queues1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + "h_ret= %lx ", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queues1; + } + } + } + + ipz_qeit_reset(&my_qp->ipz_rqueue); + + return 0; + +init_qp_queues1: + ipz_queue_dtor(&my_qp->ipz_rqueue); +init_qp_queues0: + ipz_queue_dtor(&my_qp->ipz_squeue); + return ret; +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; + static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; + struct ehca_qp *my_qp; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0; + u8 daqp_completion, isdaqp; + unsigned long flags; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + return ERR_PTR(-EINVAL); + } + + /* save daqp completion bits */ + daqp_completion = init_attr->qp_type & 0x60; + /* save daqp bit */ + isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; + init_attr->qp_type = init_attr->qp_type & 0x1F; + + if (init_attr->qp_type != IB_QPT_UD && + init_attr->qp_type != IB_QPT_SMI && + init_attr->qp_type != IB_QPT_GSI && + init_attr->qp_type != IB_QPT_UC && + init_attr->qp_type != IB_QPT_RC) { + ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) + && isdaqp) { + ehca_err(pd->device, "unsupported LL QP Type=%x", + init_attr->qp_type); + return ERR_PTR(-EINVAL); + } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && + (init_attr->cap.max_send_wr > 255 || + init_attr->cap.max_recv_wr > 255 )) { + ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " + "or max_rq_wr=%x for QP Type=%x", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr,init_attr->qp_type); + return ERR_PTR(-EINVAL); + } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && + init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + if (pd->uobject && udata) + context = pd->uobject->context; + + my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + return ERR_PTR(-ENOMEM); + } + + memset(my_qp, 0, sizeof(struct ehca_qp)); + memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + my_qp->init_attr = *init_attr; + + do { + if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't reserve idr resources."); + goto create_qp_exit0; + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + } while (ret == -EAGAIN); + + if (ret) { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + goto create_qp_exit0; + } + + parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); + goto create_qp_exit0; + } + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + parms.sigtype = HCALL_SIGT_EVERY; + else + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (IB_QPT_UD == init_attr->qp_type || + IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.ipz_eq_handle = shca->eq.ipz_eq_handle; + parms.daqp_ctrl = isdaqp | daqp_completion; + parms.pd = my_pd->fw_pd; + parms.max_recv_sge = max_recv_sge; + parms.max_send_sge = max_send_sge; + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); + + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + switch (init_attr->qp_type) { + case IB_QPT_RC: + if (isdaqp == 0) { + swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_send_sges)]); + rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ + (parms.act_nr_recv_sges)]); + } else { /* for daqp we need to use msg size, not wqe size */ + swqe_size = da_rc_msg_size[max_send_sge]; + rwqe_size = da_rc_msg_size[max_recv_sge]; + parms.act_nr_send_sges = 1; + parms.act_nr_recv_sges = 1; + } + break; + case IB_QPT_UC: + swqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.nud.sg_list[parms.act_nr_recv_sges]); + break; + + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + parms.act_nr_recv_sges -= 2; + parms.act_nr_send_sges -= 2; + if (isdaqp) { + swqe_size = da_ud_sq_msg_size[max_send_sge]; + rwqe_size = da_rc_msg_size[max_recv_sge]; + parms.act_nr_send_sges = 1; + parms.act_nr_recv_sges = 1; + } else { + swqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_send_sges]); + rwqe_size = offsetof(struct ehca_wqe, + u.ud_av.sg_list[parms.act_nr_recv_sges]); + } + + if (IB_QPT_GSI == init_attr->qp_type || + IB_QPT_SMI == init_attr->qp_type) { + parms.act_nr_send_wqes = init_attr->cap.max_send_wr; + parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; + parms.act_nr_send_sges = init_attr->cap.max_send_sge; + parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; + my_qp->real_qp_num = + (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initializes r/squeue and registers queue pages */ + ret = init_qp_queues(shca, my_qp, + parms.nr_sq_pages, parms.nr_rq_pages, + swqe_size, rwqe_size, + parms.act_nr_send_sges, parms.act_nr_recv_sges); + if (ret) { + ehca_err(pd->device, + "Couldn't initialize r/squeue and pages ret=%x", ret); + goto create_qp_exit2; + } + + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_num = my_qp->real_qp_num; + my_qp->ib_qp.qp_type = init_attr->qp_type; + + my_qp->qp_type = init_attr->qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; + init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; + init_attr->cap.max_send_sge = parms.act_nr_send_sges; + init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + + /* NOTE: define_apq0() not supported yet */ + if (init_attr->qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit3; + } + } + if (init_attr->send_cq) { + struct ehca_cq *cq = container_of(init_attr->send_cq, + struct ehca_cq, ib_cq); + ret = ehca_cq_assign_qp(cq, my_qp); + if (ret) { + ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", + ret); + goto create_qp_exit3; + } + my_qp->send_cq = cq; + } + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; + struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; + struct ehca_create_qp_resp resp; + struct vm_area_struct * vma; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + /* rqueue properties */ + resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; + resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; + resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; + resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; + resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000, + ipz_rqueue->queue_length, + (void**)&resp.ipz_rqueue.queue, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap rqueue pages"); + goto create_qp_exit3; + } + my_qp->uspace_rqueue = resp.ipz_rqueue.queue; + /* squeue properties */ + resp.ipz_squeue.qe_size = ipz_squeue->qe_size; + resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; + resp.ipz_squeue.queue_length = ipz_squeue->queue_length; + resp.ipz_squeue.pagesize = ipz_squeue->pagesize; + resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000, + ipz_squeue->queue_length, + (void**)&resp.ipz_squeue.queue, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap squeue pages"); + goto create_qp_exit4; + } + my_qp->uspace_squeue = resp.ipz_squeue.queue; + /* fw_handle */ + resp.galpas = my_qp->galpas; + ret = ehca_mmap_register(my_qp->galpas.user.fw_handle, + (void**)&resp.galpas.kernel.fw_handle, + &vma); + if (ret) { + ehca_err(pd->device, "Could not mmap fw_handle"); + goto create_qp_exit5; + } + my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle; + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit6; + } + } + + return &my_qp->ib_qp; + +create_qp_exit6: + ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + +create_qp_exit5: + ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length); + +create_qp_exit4: + ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length); + +create_qp_exit3: + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + return ERR_PTR(ret); +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + void *squeue_start_p, *squeue_end_p; + void *squeue_start_v, *squeue_end_v; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + if (ehca_debug_level) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L)); + squeue_end_p = squeue_start_p+squeue->queue_length; + squeue_start_v = abs_to_virt((u64)squeue_start_p); + squeue_end_v = abs_to_virt((u64)squeue_end_p); + ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p", + qp_num, squeue_start_v, squeue_end_v); + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe*)bad_send_wqe_v; + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + if ((void*)wqe >= squeue_end_v) { + wqe = squeue_start_v; + } + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int squeue_locked = 0; + unsigned long spl_flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); + if (mqpcb == NULL) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%x", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state))) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (my_qp->uspace_squeue == 0) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe*) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%x", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> + init_attr.port_num].rate); + + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate = 0; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); + int ehca_mult = ib_rate_to_mult( + shca->sport[my_qp->init_attr.port_num].rate); + + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1); + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1); + mqpcb->service_level_al = attr->alt_ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1); + + if (ah_mult < ehca_mult) + mqpcb->max_static_rate = (ah_mult > 0) ? + ((ehca_mult - 1) / ah_mult) : 0; + else + mqpcb->max_static_rate_al = 0; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1 << 31; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1); + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1); + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1); + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + mqpcb->path_migration_state = attr->path_mig_state; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lx ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + kfree(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + u32 cur_pid = current->tgid; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + return internal_modify_qp(ibqp, attr, attr_mask, 0); +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + u32 cur_pid = current->tgid; + int cnt, ret = 0; + u64 h_ret; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device,"Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL ); + if (!qpcb) { + ehca_err(qp->device,"Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device,"hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device,"Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); + + qp_attr->port_num = + EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); + + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = + EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); + + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + kfree(qpcb); + + return ret; +} + +int ehca_destroy_qp(struct ib_qp *ibqp) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + u32 cur_pid = current->tgid; + u32 qp_num = ibqp->qp_num; + int ret; + u64 h_ret; + u8 port_num; + enum ib_qp_type qp_type; + unsigned long flags; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, + my_qp->real_qp_num); + if (ret) { + ehca_err(ibqp->device, "Couldn't unassign qp from " + "send_cq ret=%x qp_num=%x cq_num=%x", ret, + my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* un-mmap if vma alloc */ + if (my_qp->uspace_rqueue) { + ret = ehca_munmap(my_qp->uspace_rqueue, + my_qp->ipz_rqueue.queue_length); + if (ret) + ehca_err(ibqp->device, "Could not munmap rqueue " + "qp_num=%x", qp_num); + ret = ehca_munmap(my_qp->uspace_squeue, + my_qp->ipz_squeue.queue_length); + if (ret) + ehca_err(ibqp->device, "Could not munmap squeue " + "qp_num=%x", qp_num); + ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE); + if (ret) + ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x", + qp_num); + } + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(ibqp->device, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + ipz_queue_dtor(&my_qp->ipz_rqueue); + ipz_queue_dtor(&my_qp->ipz_squeue); + kmem_cache_free(qp_cache, my_qp); + return 0; +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c new file mode 100644 index 000000000000..b46bda1bf85d --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -0,0 +1,653 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x",idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = (u8 *) abs_to_virt(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = send_wr->wr_id; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if (send_wr->send_flags & IB_SEND_SIGNALED) + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (!send_wr->wr.ud.ah) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_adress = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ib_send_wr *cur_send_wr; + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags; + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + + /* loop processes list of send reqs */ + for (cur_send_wr = send_wr; cur_send_wr != NULL; + cur_send_wr = cur_send_wr->next) { + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_send_wr) + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + ehca_err(qp->device, "Too many posted WQEs " + "qp_num=%x", qp->qp_num); + } + goto post_send_exit0; + } + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + *bad_send_wr = cur_send_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + ehca_err(qp->device, "Could not write WQE " + "qp_num=%x", qp->qp_num); + } + goto post_send_exit0; + } + wqe_cnt++; + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_send_wr */ + +post_send_exit0: + /* UNLOCK the QUEUE */ + spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ib_recv_wr *cur_recv_wr; + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + unsigned long spl_flags; + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + + /* loop processes list of send reqs */ + for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; + cur_recv_wr = cur_recv_wr->next) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + if (bad_recv_wr) + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -ENOMEM; + ehca_err(qp->device, "Too many posted WQEs " + "qp_num=%x", qp->qp_num); + } + goto post_recv_exit0; + } + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + *bad_recv_wr = cur_recv_wr; + if (wqe_cnt == 0) { + ret = -EINVAL; + ehca_err(qp->device, "Could not write WQE " + "qp_num=%x", qp->qp_num); + } + goto post_recv_exit0; + } + wqe_cnt++; + ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, qp->qp_num, wqe_cnt); + } /* eof for cur_recv_wr */ + +post_recv_exit0: + spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + return ret; +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + int cqe_count = 0; + +poll_cq_one_read_cqe: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " + "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + int purgeflag; + unsigned long spl_flags; + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto poll_cq_one_read_cqe; + } + spin_lock_irqsave(&qp->spinlock_s, spl_flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + + if (purgeflag) { + ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " + "src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto poll_cq_one_read_cqe; + } + } + + /* tracing cqe */ + if (ehca_debug_level) { + ehca_dbg(cq->device, + "Received COMPLETION ehca_cq=%p cq_num=%x -----", + my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + /* we got a completion! */ + wc->wr_id = cqe->work_request_id; + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto poll_cq_one_exit0; + } + /* eval ib_wc_status */ + if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->qp_num = cqe->local_qp_number; + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + wc->wc_flags = cqe->w_completion_flags; + wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + + if (wc->status != IB_WC_SUCCESS) + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " + "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " + "cqe=%p", my_cq, my_cq->cq_number, cqe->optype, + cqe->status, cqe->local_qp_number, + cqe->remote_qp_number, cqe->work_request_id, cqe); + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long spl_flags; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + for (nr = 0; nr < num_entries; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + if (ret == -EAGAIN || !ret) + ret = nr; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + + switch (cq_notify) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c new file mode 100644 index 000000000000..9f16e9c79394 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -0,0 +1,111 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. rc=%lx", + port, ret); + return ret; + } + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h new file mode 100644 index 000000000000..9f56bb846d93 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -0,0 +1,172 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\ + get_paca()->paca_index, __FUNCTION__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\ + get_paca()->paca_index, __FUNCTION__, ## arg); \ + } while (0) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + get_paca()->paca_index, __FUNCTION__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char*)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk("EHCA_DMP:%s" format \ + " adr=%p ofs=%04x %016lx %016lx\n", \ + __FUNCTION__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos,length) (((pos)<<16)+(length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask,value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<>EHCA_BMASK_SHIFTPOS(mask))) + + +/* Converts ehca to ib return code */ +static inline int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_BUSY: + return -EBUSY; + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c new file mode 100644 index 000000000000..e08764e4aef2 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -0,0 +1,392 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +struct page *ehca_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct page *mypage = NULL; + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 cur_pid = current->tgid; + unsigned long flags; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ehca_pd *pd; + u64 offset; + void *vaddr; + + switch (q_type) { + case 1: /* CQ */ + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) { + ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + + if (cq->ownpid != cur_pid) { + ehca_err(cq->ib_cq.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return NOPAGE_SIGBUS; + } + + if (rsrc_type == 2) { + ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&cq->ipz_queue, offset); + ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } + break; + + case 2: /* QP */ + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) { + ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + ehca_err(qp->ib_qp.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return NOPAGE_SIGBUS; + } + + if (rsrc_type == 2) { /* rqueue */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset); + ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } else if (rsrc_type == 3) { /* squeue */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp); + offset = address - vma->vm_start; + vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset); + ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p", + offset, vaddr); + mypage = virt_to_page(vaddr); + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return NOPAGE_SIGBUS; + } + + if (!mypage) { + ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS"); + return NOPAGE_SIGBUS; + } + get_page(mypage); + + return mypage; +} + +static struct vm_operations_struct ehcau_vm_ops = { + .nopage = ehca_nopage, +}; + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT; + u32 idr_handle = fileoffset >> 32; + u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ + u32 cur_pid = current->tgid; + u32 ret; + u64 vsize, physical; + unsigned long flags; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ehca_pd *pd; + + switch (q_type) { + case 1: /* CQ */ + spin_lock_irqsave(&ehca_cq_idr_lock, flags); + cq = idr_find(&ehca_cq_idr, idr_handle); + spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (cq->ownpid != cur_pid) { + ehca_err(cq->ib_cq.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, cq->ownpid); + return -ENOMEM; + } + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_err(cq->ib_cq.device, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = cq->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + ehca_dbg(cq->ib_cq.device, + "vsize=%lx physical=%lx", vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + ehca_err(cq->ib_cq.device, + "remap_pfn_range() failed ret=%x", + ret); + return -ENOMEM; + } + break; + + case 2: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type %x", + rsrc_type); + return -EINVAL; + } + break; + + case 2: /* QP */ + spin_lock_irqsave(&ehca_qp_idr_lock, flags); + qp = idr_find(&ehca_qp_idr, idr_handle); + spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd); + if (pd->ownpid != cur_pid) { + ehca_err(qp->ib_qp.device, + "Invalid caller pid=%x ownpid=%x", + cur_pid, pd->ownpid); + return -ENOMEM; + } + + if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) + return -EINVAL; + + switch (rsrc_type) { + case 1: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp); + vma->vm_flags |= VM_RESERVED; + vsize = vma->vm_end - vma->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_err(qp->ib_qp.device, "invalid vsize=%lx", + vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = qp->galpas.user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_RESERVED; + + ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx", + vsize, physical); + ret = remap_pfn_range(vma, vma->vm_start, + physical >> PAGE_SHIFT, vsize, + vma->vm_page_prot); + if (ret) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%x", + ret); + return -ENOMEM; + } + break; + + case 2: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + case 3: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp); + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &ehcau_vm_ops; + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type %x", + rsrc_type); + return -EINVAL; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped, + struct vm_area_struct **vma) +{ + down_write(¤t->mm->mmap_sem); + *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + foffset); + up_write(¤t->mm->mmap_sem); + if (!(*mapped)) { + ehca_gen_err("couldn't mmap foffset=%lx length=%lx", + foffset, length); + return -EINVAL; + } + + *vma = find_vma(current->mm, (u64)*mapped); + if (!(*vma)) { + down_write(¤t->mm->mmap_sem); + do_munmap(current->mm, 0, length); + up_write(¤t->mm->mmap_sem); + ehca_gen_err("couldn't find vma queue=%p", *mapped); + return -EINVAL; + } + (*vma)->vm_flags |= VM_RESERVED; + (*vma)->vm_ops = &ehcau_vm_ops; + + return 0; +} + +int ehca_mmap_register(u64 physical, void **mapped, + struct vm_area_struct **vma) +{ + int ret; + unsigned long vsize; + /* ehca hw supports only 4k page */ + ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma); + if (ret) { + ehca_gen_err("could'nt mmap physical=%lx", physical); + return ret; + } + + (*vma)->vm_flags |= VM_RESERVED; + vsize = (*vma)->vm_end - (*vma)->vm_start; + if (vsize != EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", + (*vma)->vm_end - (*vma)->vm_start); + return -EINVAL; + } + + (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot); + (*vma)->vm_flags |= VM_IO | VM_RESERVED; + + ret = remap_pfn_range((*vma), (*vma)->vm_start, + physical >> PAGE_SHIFT, vsize, + (*vma)->vm_page_prot); + if (ret) { + ehca_gen_err("remap_pfn_range() failed ret=%x", ret); + return -ENOMEM; + } + + return 0; + +} + +int ehca_munmap(unsigned long addr, size_t len) { + int ret = 0; + struct mm_struct *mm = current->mm; + if (mm) { + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + } + return ret; +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c new file mode 100644 index 000000000000..3fb46e67df87 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -0,0 +1,874 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +/* direct access qp controls */ +#define DAQP_CTRL_ENABLE 0x01 +#define DAQP_CTRL_SEND_COMP 0x20 +#define DAQP_CTRL_RECV_COMP 0x40 + +static u32 get_longbusy_msecs(int longbusy_rc) +{ + switch (longbusy_rc) { + case H_LONG_BUSY_ORDER_1_MSEC: + return 1; + case H_LONG_BUSY_ORDER_10_MSEC: + return 10; + case H_LONG_BUSY_ORDER_100_MSEC: + return 100; + case H_LONG_BUSY_ORDER_1_SEC: + return 1000; + case H_LONG_BUSY_ORDER_10_SEC: + return 10000; + case H_LONG_BUSY_ORDER_100_SEC: + return 100000; + default: + return 1; + } +} + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + + ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " + "arg5=%lx arg6=%lx arg7=%lx", + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx ", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7); + + ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret); + return ret; + + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + + ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " + "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7, + arg8, arg9); + + for (i = 0; i < 5; i++) { + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%lx" + " arg1=%lx arg2=%lx arg3=%lx arg4=%lx" + " arg5=%lx arg6=%lx arg7=%lx arg8=%lx" + " arg9=%lx" + " out1=%lx out2=%lx out3=%lx out4=%lx" + " out5=%lx out6=%lx out7=%lx out8=%lx" + " out9=%lx", + opcode, ret, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9, + outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + + ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx " + "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx " + "out9=%lx", + opcode, ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], outs[8]); + return ret; + + } + + return H_BUSY; +} +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms) +{ + u64 ret; + u64 allocate_controls; + u64 max_r10_reg; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; + u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; + int daqp_ctrl = parms->daqp_ctrl; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, + (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + max_nr_send_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + max_nr_receive_wqes) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->max_send_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->max_recv_sge); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + qp->send_cq->ipz_cq_handle.handle, + qp->recv_cq->ipz_cq_handle.handle, + parms->ipz_eq_handle.handle, + ((u64)qp->token << 32) | parms->pd.value, + max_r10_reg, /* r10 */ + parms->ud_av_l_key_ctl, /* r11 */ + 0); + qp->ipz_qp_handle.handle = outs[0]; + qp->real_qp_num = (u32)outs[1]; + parms->act_nr_send_sges = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->act_nr_recv_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->act_nr_send_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->act_nr_recv_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->nr_sq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->nr_rq_pages = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) + hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = virt_to_abs(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = virt_to_abs(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + queue_type | pagesize << 8, /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%lx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%lx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count != 1) { + ehca_gen_err("Page counter=%lx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle,pagesize,queue_type, + qp_handle.handle,logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void*)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void*)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + virt_to_abs(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lx", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + virt_to_abs(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lx", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lx", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lx ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%lx mr=%p mr_handle=%lx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%lx count=%lx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + u64 outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = virt_to_abs(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h new file mode 100644 index 000000000000..587ebd470959 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -0,0 +1,261 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp, + struct ehca_alloc_qp_parms *parms); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c new file mode 100644 index 000000000000..0b1a4772c78a --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -0,0 +1,80 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +int hcall_map_page(u64 physaddr, u64 *mapaddr) +{ + *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); + return 0; +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem*)mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user) +{ + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h new file mode 100644 index 000000000000..5305c2a3ed94 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +int hcall_map_page(u64 physaddr, u64 * mapaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h new file mode 100644 index 000000000000..9dac93d02140 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h new file mode 100644 index 000000000000..20898a153446 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal,offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal,QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h new file mode 100644 index 000000000000..3fc92b031c50 --- /dev/null +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -0,0 +1,388 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48,63) +#define QPX_RQADDER EHCA_BMASK_IBM(48,63) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63) +#define CQX_FECADDER EHCA_BMASK_IBM(32,63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u8 reserved2[1900]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c new file mode 100644 index 000000000000..e028ff1588cc --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -0,0 +1,149 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % EHCA_PAGESIZE) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_ctor(struct ipz_queue *queue, + const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, const u32 nr_of_sg) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int f; + + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x is greater " + "than kernel page size", pagesize); + return 0; + } + if (!pages_per_kpage) { + ehca_gen_err("FATAL ERROR: invalid kernel page size. " + "pages_per_kpage=%x", pages_per_kpage); + return 0; + } + queue->queue_length = nr_of_pages * pagesize; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("ERROR!! didn't get the memory"); + return 0; + } + memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); + /* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ + f = 0; + while (f < nr_of_pages) { + u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL); + int k; + if (!kpage) + goto ipz_queue_ctor_exit0; /*NOMEM*/ + for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) { + (queue->queue_pages)[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + + queue->current_q_offset = 0; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->pagesize = pagesize; + queue->toggle_state = 1; + return 1; + + ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x", + queue, f, nr_of_pages); + for (f = 0; f < nr_of_pages; f += pages_per_kpage) { + if (!(queue->queue_pages)[f]) + break; + free_page((unsigned long)(queue->queue_pages)[f]); + } + return 0; +} + +int ipz_queue_dtor(struct ipz_queue *queue) +{ + int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT; + int g; + int nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + nr_pages = queue->queue_length / queue->pagesize; + for (g = 0; g < nr_pages; g += pages_per_kpage) + free_page((unsigned long)(queue->queue_pages)[g]); + vfree(queue->queue_pages); + + return 1; +} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h new file mode 100644 index 000000000000..2f13509d5257 --- /dev/null +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -0,0 +1,247 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 dummy3; /* 64 bit alignment */ +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + u32 cqe_flags = cqe->cqe_flags; + + if ((cqe_flags >> 7) != (queue->toggle_state & 1)) + return NULL; + + ipz_qeit_get_inc(queue); + return cqe; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages, + const u32 pagesize, const u32 qe_size, + const u32 nr_of_sg); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *) ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ From 64f817ba98095156149ba5991592d5d039f6da74 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 22 Sep 2006 15:22:24 -0700 Subject: [PATCH 0738/1063] IB/uverbs: Allow resize CQ operation to return driver-specific data Add a ib_uverbs_resize_cq_resp.driver_data field so that low-level drivers can return data from a resize CQ operation to userspace. Have ib_uverbs_resize_cq() only copy the cqe field, to avoid having to bump the userspace ABI. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 3 +-- include/rdma/ib_user_verbs.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8b6df7cec0bf..deae43f31e79 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -879,11 +879,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (ret) goto out; - memset(&resp, 0, sizeof resp); resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) + &resp, sizeof resp.cqe)) ret = -EFAULT; out: diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 7b5372010f4b..db1b814b62cc 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -275,6 +275,8 @@ struct ib_uverbs_resize_cq { struct ib_uverbs_resize_cq_resp { __u32 cqe; + __u32 reserved; + __u64 driver_data[0]; }; struct ib_uverbs_poll_cq { From 9bc57e2d19db4da81c1150120658cc3658a99ed4 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 11 Aug 2006 14:58:09 -0700 Subject: [PATCH 0739/1063] IB/uverbs: Pass userspace data to modify_srq and modify_qp methods Pass a struct ib_udata to the low-level driver's ->modify_srq() and ->modify_qp() methods, so that it can get to the device-specific data passed in by the userspace driver. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 13 ++++++++++--- drivers/infiniband/core/verbs.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 ++- drivers/infiniband/hw/ehca/ehca_qp.c | 3 ++- drivers/infiniband/hw/ipath/ipath_qp.c | 3 ++- drivers/infiniband/hw/ipath/ipath_srq.c | 4 +++- drivers/infiniband/hw/ipath/ipath_verbs.h | 5 +++-- drivers/infiniband/hw/mthca/mthca_dev.h | 5 +++-- drivers/infiniband/hw/mthca/mthca_qp.c | 3 ++- drivers/infiniband/hw/mthca/mthca_srq.c | 2 +- include/rdma/ib_verbs.h | 6 ++++-- 11 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index deae43f31e79..3fcb5d189a23 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -841,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, err_copy: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - err_free: ib_destroy_cq(cq); @@ -1273,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_qp cmd; + struct ib_udata udata; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; @@ -1280,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; @@ -1336,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = ib_modify_qp(qp, attr, cmd.attr_mask); + ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); put_qp_read(qp); @@ -2054,6 +2057,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_modify_srq cmd; + struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; @@ -2061,6 +2065,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, + out_len); + srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -2068,7 +2075,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; - ret = ib_modify_srq(srq, &attr, cmd.attr_mask); + ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); put_srq_read(srq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 468999c38803..06f98e9e14f9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -231,7 +231,7 @@ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { - return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_srq); @@ -547,7 +547,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); + return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index bbdc437f5167..319c39d47f3a 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -143,7 +143,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, int ehca_destroy_qp(struct ib_qp *qp); -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 4b27bedc6c24..4394123cdbd7 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1230,7 +1230,8 @@ modify_qp_exit1: return ret; } -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) { struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 83e557be591e..44c32d2db990 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -426,11 +426,12 @@ void ipath_error_qp(struct ipath_qp *qp) * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify + * @udata: user data for ipathverbs.so * * Returns 0 on success, otherwise returns an errno. */ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask) + int attr_mask, struct ib_udata *udata) { struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index f760434660bd..fa77da6667ed 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -188,9 +188,11 @@ bail: * @ibsrq: the SRQ to modify * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify + * @udata: user data for ipathverbs.so */ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); unsigned long flags; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 2df684727dc1..698396778f00 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -579,7 +579,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, int ipath_destroy_qp(struct ib_qp *ibqp); int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask); + int attr_mask, struct ib_udata *udata); int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); @@ -638,7 +638,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, struct ib_udata *udata); int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index f8160b8de090..33bd0b8bfd13 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -506,7 +506,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask); + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mthca_max_srq_sge(struct mthca_dev *dev); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, @@ -521,7 +521,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 2e8f6f36e0a5..6d6ba4180a39 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -527,7 +527,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, return 0; } -int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index b60a9d79ae54..0f316c87bf64 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) } int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask) + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ee1f3a355666..61eed3996117 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -888,7 +888,8 @@ struct ib_device { struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask); + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq); @@ -900,7 +901,8 @@ struct ib_device { struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, - int qp_attr_mask); + int qp_attr_mask, + struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, From 373d9915803aebbbf7fd3841efd9dac31c32e148 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 22 Sep 2006 15:22:26 -0700 Subject: [PATCH 0740/1063] IB/ipath: Performance improvements via mmap of queues Improve performance of userspace post receive, post SRQ receive, and poll CQ operations for ipath by allowing userspace to directly mmap() receive queues and completion queues. This eliminates the copying between userspace and the kernel in the data path. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/Makefile | 1 + drivers/infiniband/hw/ipath/ipath_cq.c | 176 ++++++++++++---- drivers/infiniband/hw/ipath/ipath_mmap.c | 122 +++++++++++ drivers/infiniband/hw/ipath/ipath_qp.c | 156 ++++++++++---- drivers/infiniband/hw/ipath/ipath_ruc.c | 138 ++++++++---- drivers/infiniband/hw/ipath/ipath_srq.c | 246 ++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_ud.c | 169 +++++++++------ drivers/infiniband/hw/ipath/ipath_verbs.c | 50 ++--- drivers/infiniband/hw/ipath/ipath_verbs.h | 115 +++++----- 9 files changed, 788 insertions(+), 385 deletions(-) create mode 100644 drivers/infiniband/hw/ipath/ipath_mmap.c diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index b0bf72864130..6bb43474d104 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -25,6 +25,7 @@ ib_ipath-y := \ ipath_cq.o \ ipath_keys.o \ ipath_mad.o \ + ipath_mmap.o \ ipath_mr.o \ ipath_qp.o \ ipath_rc.o \ diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3efee341c9bc..3c4c198a4514 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -42,20 +42,28 @@ * @entry: work completion entry to add * @sig: true if @entry is a solicitated entry * - * This may be called with one of the qp->s_lock or qp->r_rq.lock held. + * This may be called with qp->s_lock held. */ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) { + struct ipath_cq_wc *wc = cq->queue; unsigned long flags; + u32 head; u32 next; spin_lock_irqsave(&cq->lock, flags); - if (cq->head == cq->ibcq.cqe) + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; next = 0; - else - next = cq->head + 1; - if (unlikely(next == cq->tail)) { + } else + next = head + 1; + if (unlikely(next == wc->tail)) { spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; @@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) } return; } - cq->queue[cq->head] = *entry; - cq->head = next; + wc->queue[head] = *entry; + wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && solicited)) { @@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct ipath_cq *cq = to_icq(ibcq); + struct ipath_cq_wc *wc = cq->queue; unsigned long flags; int npolled; spin_lock_irqsave(&cq->lock, flags); for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { - if (cq->tail == cq->head) + if (wc->tail == wc->head) break; - *entry = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + *entry = wc->queue[wc->tail]; + if (wc->tail >= cq->ibcq.cqe) + wc->tail = 0; else - cq->tail++; + wc->tail++; } spin_unlock_irqrestore(&cq->lock, flags); @@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, { struct ipath_ibdev *dev = to_idev(ibdev); struct ipath_cq *cq; - struct ib_wc *wc; + struct ipath_cq_wc *wc; struct ib_cq *ret; if (entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } if (dev->n_cqs_allocated == ib_ipath_max_cqs) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } - /* - * Need to use vmalloc() if we want to support large #s of - * entries. - */ + /* Allocate the completion queue structure. */ cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* - * Need to use vmalloc() if we want to support large #s of entries. + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. */ - wc = vmalloc(sizeof(*wc) * (entries + 1)); + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries); if (!wc) { - kfree(cq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_cq; } + + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) wc; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wc; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + cq->ip = ip; + ip->context = context; + ip->obj = wc; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * entries); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + cq->ip = NULL; + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, cq->triggered = 0; spin_lock_init(&cq->lock); tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); - cq->head = 0; - cq->tail = 0; + wc->head = 0; + wc->tail = 0; cq->queue = wc; ret = &cq->ibcq; dev->n_cqs_allocated++; + goto done; -bail: +bail_wc: + vfree(wc); + +bail_cq: + kfree(cq); + +done: return ret; } @@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq) tasklet_kill(&cq->comptask); dev->n_cqs_allocated--; - vfree(cq->queue); + if (cq->ip) + kref_put(&cq->ip->ref, ipath_release_mmap_info); + else + vfree(cq->queue); kfree(cq); return 0; @@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) spin_lock_irqsave(&cq->lock, flags); /* * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow - * any other transitions. + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). */ if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = notify; @@ -264,46 +319,81 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct ipath_cq *cq = to_icq(ibcq); - struct ib_wc *wc, *old_wc; - u32 n; + struct ipath_cq_wc *old_wc = cq->queue; + struct ipath_cq_wc *wc; + u32 head, tail, n; int ret; /* * Need to use vmalloc() if we want to support large #s of entries. */ - wc = vmalloc(sizeof(*wc) * (cqe + 1)); + wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe); if (!wc) { ret = -ENOMEM; goto bail; } + /* + * Return the address of the WC as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = (__u64) wc; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail; + } + spin_lock_irq(&cq->lock); - if (cq->head < cq->tail) - n = cq->ibcq.cqe + 1 + cq->head - cq->tail; + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; else - n = cq->head - cq->tail; + n = head - tail; if (unlikely((u32)cqe < n)) { spin_unlock_irq(&cq->lock); vfree(wc); ret = -EOVERFLOW; goto bail; } - for (n = 0; cq->tail != cq->head; n++) { - wc[n] = cq->queue[cq->tail]; - if (cq->tail == cq->ibcq.cqe) - cq->tail = 0; + for (n = 0; tail != head; n++) { + wc->queue[n] = old_wc->queue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; else - cq->tail++; + tail++; } cq->ibcq.cqe = cqe; - cq->head = n; - cq->tail = 0; - old_wc = cq->queue; + wc->head = n; + wc->tail = 0; cq->queue = wc; spin_unlock_irq(&cq->lock); vfree(old_wc); + if (cq->ip) { + struct ipath_ibdev *dev = to_idev(ibcq->device); + struct ipath_mmap_info *ip = cq->ip; + + ip->obj = wc; + ip->size = PAGE_ALIGN(sizeof(*wc) + + sizeof(struct ib_wc) * cqe); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + ret = 0; bail: diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c new file mode 100644 index 000000000000..11b7378ff214 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "ipath_verbs.h" + +/** + * ipath_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct ipath_mmap_info + */ +void ipath_release_mmap_info(struct kref *ref) +{ + struct ipath_mmap_info *ip = + container_of(ref, struct ipath_mmap_info, ref); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void ipath_vma_open(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); + ip->mmap_cnt++; +} + +static void ipath_vma_close(struct vm_area_struct *vma) +{ + struct ipath_mmap_info *ip = vma->vm_private_data; + + ip->mmap_cnt--; + kref_put(&ip->ref, ipath_release_mmap_info); +} + +static struct vm_operations_struct ipath_vm_ops = { + .open = ipath_vma_open, + .close = ipath_vma_close, +}; + +/** + * ipath_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct ipath_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct ipath_mmap_info *ip, **pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (void *) offset != ip->obj) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + *pp = ip->next; + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &ipath_vm_ops; + vma->vm_private_data = ip; + ipath_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 44c32d2db990..1ccfc909db1e 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -35,7 +35,7 @@ #include #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) @@ -44,19 +44,6 @@ #define find_next_offset(map, off) find_next_zero_bit((map)->page, \ BITS_PER_PAGE, off) -#define TRANS_INVALID 0 -#define TRANS_ANY2RST 1 -#define TRANS_RST2INIT 2 -#define TRANS_INIT2INIT 3 -#define TRANS_INIT2RTR 4 -#define TRANS_RTR2RTS 5 -#define TRANS_RTS2RTS 6 -#define TRANS_SQERR2RTS 7 -#define TRANS_ANY2ERR 8 -#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */ -#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */ -#define TRANS_SQD2RTS 11 /* error if not drained */ - /* * Convert the AETH credit code into the number of credits. */ @@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->r_rq.head = 0; - qp->r_rq.tail = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } qp->r_reuse_sge = 0; } @@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp) qp->s_hdrwords = 0; qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - wc.opcode = IB_WC_RECV; - spin_lock(&qp->r_rq.lock); - while (qp->r_rq.tail != qp->r_rq.head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; - if (++qp->r_rq.tail >= qp->r_rq.size) - qp->r_rq.tail = 0; - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + if (qp->r_rq.wq) { + struct ipath_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + wc.opcode = IB_WC_RECV; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); } - spin_unlock(&qp->r_rq.lock); } /** @@ -544,7 +550,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->dest_qp_num = qp->remote_qpn; attr->qp_access_flags = qp->qp_access_flags; attr->cap.max_send_wr = qp->s_size - 1; - attr->cap.max_recv_wr = qp->r_rq.size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; attr->cap.max_send_sge = qp->s_max_sge; attr->cap.max_recv_sge = qp->r_rq.max_sge; attr->cap.max_inline_data = 0; @@ -597,13 +603,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) } else { u32 min, max, x; u32 credits; + struct ipath_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; /* * Compute the number of credits available (RWQEs). * XXX Not holding the r_rq.lock here so there is a small * chance that the pair of reads are not atomic. */ - credits = qp->r_rq.head - qp->r_rq.tail; + credits = head - tail; if ((int)credits < 0) credits += qp->r_rq.size; /* @@ -680,27 +696,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - qp = kmalloc(sizeof(*qp), GFP_KERNEL); + sz = sizeof(*qp); + if (init_attr->srq) { + struct ipath_srq *srq = to_isrq(init_attr->srq); + + sz += sizeof(*qp->r_sg_list) * + srq->rq.max_sge; + } else + sz += sizeof(*qp->r_sg_list) * + init_attr->cap.max_recv_sge; + qp = kmalloc(sz, GFP_KERNEL); if (!qp) { - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_swq; } if (init_attr->srq) { + sz = 0; qp->r_rq.size = 0; qp->r_rq.max_sge = 0; qp->r_rq.wq = NULL; + init_attr->cap.max_recv_wr = 0; + init_attr->cap.max_recv_sge = 0; } else { qp->r_rq.size = init_attr->cap.max_recv_wr + 1; qp->r_rq.max_sge = init_attr->cap.max_recv_sge; - sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) + + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct ipath_rwqe); - qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); + qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) { - kfree(qp); - vfree(swq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_qp; } } @@ -726,12 +752,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); if (err) { - vfree(swq); - vfree(qp->r_rq.wq); - kfree(qp); ret = ERR_PTR(err); - goto bail; + goto bail_rwq; } + qp->ip = NULL; ipath_reset_qp(qp); /* Tell the core driver that the kernel SMA is present. */ @@ -748,8 +772,51 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->cap.max_inline_data = 0; - ret = &qp->ibqp; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) qp->r_rq.wq; + int err; + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_rwq; + } + + if (qp->r_rq.wq) { + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_rwq; + } + qp->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = qp->r_rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + qp->r_rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } + + ret = &qp->ibqp; + goto bail; + +bail_rwq: + vfree(qp->r_rq.wq); +bail_qp: + kfree(qp); +bail_swq: + vfree(swq); bail: return ret; } @@ -773,11 +840,9 @@ int ipath_destroy_qp(struct ib_qp *ibqp) if (qp->ibqp.qp_type == IB_QPT_SMI) ipath_layer_set_verbs_flags(dev->dd, 0); - spin_lock_irqsave(&qp->r_rq.lock, flags); - spin_lock(&qp->s_lock); + spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; - spin_unlock(&qp->s_lock); - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->s_lock, flags); /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); @@ -798,8 +863,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp) if (atomic_read(&qp->refcount) != 0) ipath_free_qp(&dev->qp_table, qp); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); vfree(qp->s_wq); - vfree(qp->r_rq.wq); kfree(qp); return 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 772bc59fb85c..dd09420d677d 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* * Convert the AETH RNR timeout code into the number of milliseconds. @@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) spin_unlock_irqrestore(&dev->pending_lock, flags); } +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(&dev->lk_table, + &qp->r_sg_list[j], &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + qp->r_sge.sge = qp->r_sg_list[0]; + qp->r_sge.sg_list = qp->r_sg_list + 1; + qp->r_sge.num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + /** * ipath_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP @@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) { unsigned long flags; struct ipath_rq *rq; + struct ipath_rwq *wq; struct ipath_srq *srq; struct ipath_rwqe *wqe; - int ret = 1; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; - if (!qp->ibqp.srq) { + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; rq = &qp->r_rq; - spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); - qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - goto done; } - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; spin_lock_irqsave(&rq->lock, flags); - - if (unlikely(rq->tail == rq->head)) { - ret = 0; - goto done; - } - wqe = get_rwqe_ptr(rq, rq->tail); + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + do { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + ret = 0; + goto bail; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + } while (!wr_id_only && !init_sge(qp, wqe)); qp->r_wr_id = wqe->wr_id; - if (!wr_id_only) { - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - qp->r_len = wqe->length; - } - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq->ibsrq.event_handler) { - struct ib_event ev; + wq->tail = tail; + + ret = 1; + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { + struct ib_event ev; + srq->limit = 0; spin_unlock_irqrestore(&rq->lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); goto bail; } } - -done: spin_unlock_irqrestore(&rq->lock, flags); + bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index fa77da6667ed..941e866d9517 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_srq *srq = to_isrq(ibsrq); - struct ipath_ibdev *dev = to_idev(ibsrq->device); + struct ipath_rwq *wq; unsigned long flags; int ret; for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > srq->rq.max_sge) { + if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&srq->rq.lock, flags); - next = srq->rq.head + 1; + wq = srq->rq.wq; + next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == srq->rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&srq->rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); + wqe = get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(srq->ibsrq.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok(&dev->lk_table, - &wqe->sg_list[j], - &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&srq->rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - srq->rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } ret = 0; @@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, if (dev->n_srqs_allocated == ib_ipath_max_srqs) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { ret = ERR_PTR(-EINVAL); - goto bail; + goto done; } srq = kmalloc(sizeof(*srq), GFP_KERNEL); if (!srq) { ret = ERR_PTR(-ENOMEM); - goto bail; + goto done; } /* * Need to use vmalloc() if we want to support large #s of entries. */ srq->rq.size = srq_init_attr->attr.max_wr + 1; - sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge + + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct ipath_rwqe); - srq->rq.wq = vmalloc(srq->rq.size * sz); + srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz); if (!srq->rq.wq) { - kfree(srq); ret = ERR_PTR(-ENOMEM); - goto bail; + goto bail_srq; } + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + struct ipath_mmap_info *ip; + __u64 offset = (__u64) srq->rq.wq; + int err; + + err = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_wq; + } + + /* Allocate info for ipath_mmap(). */ + ip = kmalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + srq->ip = ip; + ip->context = ibpd->uobject->context; + ip->obj = srq->rq.wq; + kref_init(&ip->ref); + ip->mmap_cnt = 0; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + srq->rq.size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } else + srq->ip = NULL; + /* * ib_create_srq() will initialize srq->ibsrq. */ spin_lock_init(&srq->rq.lock); - srq->rq.head = 0; - srq->rq.tail = 0; + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; srq->rq.max_sge = srq_init_attr->attr.max_sge; srq->limit = srq_init_attr->attr.srq_limit; - ret = &srq->ibsrq; - dev->n_srqs_allocated++; -bail: + ret = &srq->ibsrq; + goto done; + +bail_wq: + vfree(srq->rq.wq); + +bail_srq: + kfree(srq); + +done: return ret; } @@ -195,78 +210,123 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); - unsigned long flags; - int ret; - - if (attr_mask & IB_SRQ_MAX_WR) - if ((attr->max_wr > ib_ipath_max_srq_wrs) || - (attr->max_sge > srq->rq.max_sge)) { - ret = -EINVAL; - goto bail; - } - - if (attr_mask & IB_SRQ_LIMIT) - if (attr->srq_limit >= srq->rq.size) { - ret = -EINVAL; - goto bail; - } + int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { - struct ipath_rwqe *wq, *p; - u32 sz, size, n; + struct ipath_rwq *owq; + struct ipath_rwq *wq; + struct ipath_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_ipath_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { + ret = -EINVAL; + goto bail; + } sz = sizeof(struct ipath_rwqe) + - attr->max_sge * sizeof(struct ipath_sge); + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = vmalloc(size * sz); + wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz); if (!wq) { ret = -ENOMEM; goto bail; } - spin_lock_irqsave(&srq->rq.lock, flags); - if (srq->rq.head < srq->rq.tail) - n = srq->rq.size + srq->rq.head - srq->rq.tail; + /* + * Return the address of the RWQ as the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = (__u64) wq; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) { + vfree(wq); + goto bail; + } + udata->outbuf = (void __user *) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) { + vfree(wq); + goto bail; + } + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + if (head >= srq->rq.size) + head = 0; + tail = owq->tail; + if (tail >= srq->rq.size) + tail = 0; + n = head; + if (n < tail) + n += srq->rq.size - tail; else - n = srq->rq.head - srq->rq.tail; - if (size <= n || size <= srq->limit) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + n -= tail; + if (size <= n) { + spin_unlock_irq(&srq->rq.lock); vfree(wq); ret = -EINVAL; goto bail; } n = 0; - p = wq; - while (srq->rq.tail != srq->rq.head) { + p = wq->wq; + while (tail != head) { struct ipath_rwqe *wqe; int i; - wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail); + wqe = get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; - p->length = wqe->length; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) p->sg_list[i] = wqe->sg_list[i]; n++; p = (struct ipath_rwqe *)((char *) p + sz); - if (++srq->rq.tail >= srq->rq.size) - srq->rq.tail = 0; + if (++tail >= srq->rq.size) + tail = 0; } - vfree(srq->rq.wq); srq->rq.wq = wq; srq->rq.size = size; - srq->rq.head = n; - srq->rq.tail = 0; - srq->rq.max_sge = attr->max_sge; - spin_unlock_irqrestore(&srq->rq.lock, flags); - } + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); - if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irqsave(&srq->rq.lock, flags); - srq->limit = attr->srq_limit; - spin_unlock_irqrestore(&srq->rq.lock, flags); + vfree(owq); + + if (srq->ip) { + struct ipath_mmap_info *ip = srq->ip; + struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + + ip->obj = wq; + ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + + size * sz); + spin_lock_irq(&dev->pending_lock); + ip->next = dev->pending_mmaps; + dev->pending_mmaps = ip; + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); } - ret = 0; bail: return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 3466129af804..82439fcfc2f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -34,7 +34,54 @@ #include #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" + +static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + int user = to_ipd(qp->ibqp.pd)->user; + int i, j, ret; + struct ib_wc wc; + + *lengthp = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if ((user && wqe->sg_list[i].lkey == 0) || + !ipath_lkey_ok(&dev->lk_table, + j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + *lengthp += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ret = 1; + goto bail; + +bad_lkey: + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp_num = qp->ibqp.qp_num; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal solicited completion event. */ + ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} /** * ipath_ud_loopback - handle send on loopback QPs @@ -46,6 +93,8 @@ * * This is called from ipath_post_ud_send() to forward a WQE addressed * to the same HCA. + * Note that the receive interrupt handler may be calling ipath_ud_rcv() + * while this is being called. */ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss, @@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_srq *srq; struct ipath_sge_state rsge; struct ipath_sge *sge; + struct ipath_rwq *wq; struct ipath_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + u32 rlen; qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); if (!qp) @@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->imm_data = 0; } + if (wr->num_sge > 1) { + rsge.sg_list = kmalloc((wr->num_sge - 1) * + sizeof(struct ipath_sge), + GFP_ATOMIC); + } else + rsge.sg_list = NULL; + /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, */ if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; rq = &srq->rq; } else { srq = NULL; + handler = NULL; rq = &qp->r_rq; } + spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto done; + wq = rq->wq; + tail = wq->tail; + while (1) { + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto bail_sge; + } + wqe = get_rwqe_ptr(rq, tail); + if (++tail >= rq->size) + tail = 0; + if (init_sge(qp, wqe, &rlen, &rsge)) + break; + wq->tail = tail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc->byte_len > wqe->length) { + if (wc->byte_len > rlen) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; - goto done; + goto bail_sge; } + wq->tail = tail; wc->wr_id = wqe->wr_id; - rsge.sge = wqe->sg_list[0]; - rsge.sg_list = wqe->sg_list + 1; - rsge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { + if (handler) { u32 n; - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; + /* + * validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; else - n = rq->head - rq->tail; + n -= tail; if (n < srq->limit) { struct ib_event ev; @@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); + handler(&ev, srq->ibsrq.srq_context); } else spin_unlock_irqrestore(&rq->lock, flags); } else spin_unlock_irqrestore(&rq->lock, flags); + ah_attr = &to_iah(wr->wr.ud.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); @@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, wc->src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ wc->pkey_index = 0; - wc->slid = ipath_layer_get_lid(dev->dd) | + wc->slid = dev->dd->ipath_lid | (ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); wc->sl = ah_attr->sl; @@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, wr->send_flags & IB_SEND_SOLICITED); +bail_sge: + kfree(rsge.sg_list); done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int opcode; u32 hdrsize; u32 pad; - unsigned long flags; struct ib_wc wc; u32 qkey; u32 src_qp; - struct ipath_rq *rq; - struct ipath_srq *srq; - struct ipath_rwqe *wqe; u16 dlid; int header_in_data; @@ -547,19 +620,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. */ - if (qp->ibqp.srq) { - srq = to_isrq(qp->ibqp.srq); - rq = &srq->rq; - } else { - srq = NULL; - rq = &qp->r_rq; - } - spin_lock_irqsave(&rq->lock, flags); - if (rq->tail == rq->head) { - spin_unlock_irqrestore(&rq->lock, flags); + if (qp->r_reuse_sge) + qp->r_reuse_sge = 0; + else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. * Otherwise, count them as buffer overruns since usually, @@ -573,39 +637,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto bail; } /* Silently drop packets which are too big. */ - wqe = get_rwqe_ptr(rq, rq->tail); - if (wc.byte_len > wqe->length) { - spin_unlock_irqrestore(&rq->lock, flags); + if (wc.byte_len > qp->r_len) { + qp->r_reuse_sge = 1; dev->n_pkt_drops++; goto bail; } - wc.wr_id = wqe->wr_id; - qp->r_sge.sge = wqe->sg_list[0]; - qp->r_sge.sg_list = wqe->sg_list + 1; - qp->r_sge.num_sge = wqe->num_sge; - if (++rq->tail >= rq->size) - rq->tail = 0; - if (srq && srq->ibsrq.event_handler) { - u32 n; - - if (rq->head < rq->tail) - n = rq->size + rq->head - rq->tail; - else - n = rq->head - rq->tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - srq->ibsrq.event_handler(&ev, - srq->ibsrq.srq_context); - } else - spin_unlock_irqrestore(&rq->lock, flags); - } else - spin_unlock_irqrestore(&rq->lock, flags); if (has_grh) { ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh)); @@ -614,6 +650,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); + wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; wc.vendor_err = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index d70a9b6b5239..a2b4c70192d8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -277,11 +277,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct ipath_qp *qp = to_iqp(ibqp); + struct ipath_rwq *wq = qp->r_rq.wq; unsigned long flags; int ret; /* Check that state is OK to post receive. */ - if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { *bad_wr = wr; ret = -EINVAL; goto bail; @@ -290,59 +291,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, for (; wr; wr = wr->next) { struct ipath_rwqe *wqe; u32 next; - int i, j; + int i; - if (wr->num_sge > qp->r_rq.max_sge) { + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; ret = -ENOMEM; goto bail; } spin_lock_irqsave(&qp->r_rq.lock, flags); - next = qp->r_rq.head + 1; + next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == qp->r_rq.tail) { + if (next == wq->tail) { spin_unlock_irqrestore(&qp->r_rq.lock, flags); *bad_wr = wr; ret = -ENOMEM; goto bail; } - wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head); + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - /* Check LKEY */ - if (to_ipd(qp->ibqp.pd)->user && - wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok( - &to_idev(qp->ibqp.device)->lk_table, - &wqe->sg_list[j], &wr->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) { - spin_unlock_irqrestore(&qp->r_rq.lock, - flags); - *bad_wr = wr; - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->num_sge = j; - qp->r_rq.head = next; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + wq->head = next; spin_unlock_irqrestore(&qp->r_rq.lock, flags); } ret = 0; @@ -1137,6 +1110,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) dev->attach_mcast = ipath_multicast_attach; dev->detach_mcast = ipath_multicast_detach; dev->process_mad = ipath_process_mad; + dev->mmap = ipath_mmap; snprintf(dev->node_desc, sizeof(dev->node_desc), IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 698396778f00..7d2ba72609f7 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "ipath_layer.h" @@ -50,7 +51,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IPATH_UVERBS_ABI_VERSION 1 +#define IPATH_UVERBS_ABI_VERSION 2 /* * Define an ib_cq_notify value that is not valid so we know when CQ @@ -178,58 +179,41 @@ struct ipath_ah { }; /* - * Quick description of our CQ/QP locking scheme: - * - * We have one global lock that protects dev->cq/qp_table. Each - * struct ipath_cq/qp also has its own lock. An individual qp lock - * may be taken inside of an individual cq lock. Both cqs attached to - * a qp may be locked, with the send cq locked first. No other - * nesting should be done. - * - * Each struct ipath_cq/qp also has an atomic_t ref count. The - * pointer from the cq/qp_table to the struct counts as one reference. - * This reference also is good for access through the consumer API, so - * modifying the CQ/QP etc doesn't need to take another reference. - * Access because of a completion being polled does need a reference. - * - * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the - * destroy function to sleep on. - * - * This means that access from the consumer API requires nothing but - * taking the struct's lock. - * - * Access because of a completion event should go as follows: - * - lock cq/qp_table and look up struct - * - increment ref count in struct - * - drop cq/qp_table lock - * - lock struct, do your thing, and unlock struct - * - decrement ref count; if zero, wake up waiters - * - * To destroy a CQ/QP, we can do the following: - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock - * - decrement ref count - * - wait_event until ref count is zero - * - * It is the consumer's responsibilty to make sure that no QP - * operations (WQE posting or state modification) are pending when the - * QP is destroyed. Also, the consumer must make sure that calls to - * qp_modify are serialized. - * - * Possible optimizations (wait for profile data to see if/where we - * have locks bouncing between CPUs): - * - split cq/qp table lock into n separate (cache-aligned) locks, - * indexed (say) by the page in the table + * This structure is used by ipath_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. */ +struct ipath_mmap_info { + struct ipath_mmap_info *next; + struct ib_ucontext *context; + void *obj; + struct kref ref; + unsigned size; + unsigned mmap_cnt; +}; +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct ipath_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ +}; + +/* + * The completion queue structure. + */ struct ipath_cq { struct ib_cq ibcq; struct tasklet_struct comptask; spinlock_t lock; u8 notify; u8 triggered; - u32 head; /* new records added to the head */ - u32 tail; /* poll_cq() reads from here. */ - struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ + struct ipath_cq_wc *queue; + struct ipath_mmap_info *ip; }; /* @@ -248,28 +232,40 @@ struct ipath_swqe { /* * Receive work request queue entry. - * The size of the sg_list is determined when the QP is created and stored - * in qp->r_max_sge. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). */ struct ipath_rwqe { u64 wr_id; - u32 length; /* total length of data in sg_list */ u8 num_sge; - struct ipath_sge sg_list[0]; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct ipath_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct ipath_rwqe wq[0]; }; struct ipath_rq { + struct ipath_rwq *wq; spinlock_t lock; - u32 head; /* new work requests posted to the head */ - u32 tail; /* receives pull requests from here. */ u32 size; /* size of RWQE array */ u8 max_sge; - struct ipath_rwqe *wq; /* RWQE array */ }; struct ipath_srq { struct ib_srq ibsrq; struct ipath_rq rq; + struct ipath_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; @@ -293,6 +289,7 @@ struct ipath_qp { atomic_t refcount; wait_queue_head_t wait; struct tasklet_struct s_task; + struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_sge_state s_sge; /* current send request data */ /* current RDMA read send data */ @@ -345,7 +342,8 @@ struct ipath_qp { u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ - struct ipath_rq r_rq; /* receive work queue */ + struct ipath_rq r_rq; /* receive work queue */ + struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; /* @@ -369,15 +367,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, /* * Since struct ipath_rwqe is not a fixed size, we can't simply index into - * struct ipath_rq.wq. This function does the array index computation. + * struct ipath_rwq.wq. This function does the array index computation. */ static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, unsigned n) { return (struct ipath_rwqe *) - ((char *) rq->wq + + ((char *) rq->wq->wq + (sizeof(struct ipath_rwqe) + - rq->max_sge * sizeof(struct ipath_sge)) * n); + rq->max_sge * sizeof(struct ib_sge)) * n); } /* @@ -417,6 +415,7 @@ struct ipath_ibdev { struct ib_device ibdev; struct list_head dev_list; struct ipath_devdata *dd; + struct ipath_mmap_info *pending_mmaps; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -681,6 +680,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list); int ipath_dealloc_fmr(struct ib_fmr *ibfmr); +void ipath_release_mmap_info(struct kref *ref); + +int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); void ipath_insert_rnr_queue(struct ipath_qp *qp); From eb9dc6f48dc7537ce53163109625bd992150e0cf Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:26 -0700 Subject: [PATCH 0741/1063] IB/ipath: More changes to support InfiniPath on PowerPC 970 systems Ordering of writethrough store buffers needs to be forced, and we need to use ifdef to get writethrough behavior to InfiniPath buffers, because there is no generic way to specify that at this time (similar to code in char/drm/drm_vm.c and block/z2ram.c). Signed-off-by: John Gregor Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/Makefile | 1 + drivers/infiniband/hw/ipath/ipath_driver.c | 6 +++ drivers/infiniband/hw/ipath/ipath_file_ops.c | 7 +++ drivers/infiniband/hw/ipath/ipath_wc_ppc64.c | 52 ++++++++++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 drivers/infiniband/hw/ipath/ipath_wc_ppc64.c diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index 6bb43474d104..075e313941fe 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -20,6 +20,7 @@ ipath_core-y := \ ipath_user_pages.o ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ipath_core-$(CONFIG_PPC64) += ipath_wc_ppc64.o ib_ipath-y := \ ipath_cq.o \ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index f98518d912b5..6ded914f9eb9 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -440,7 +440,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } dd->ipath_pcirev = rev; +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->ipath_kregbase = __ioremap(addr, len, + (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); +#else dd->ipath_kregbase = ioremap_nocache(addr, len); +#endif if (!dd->ipath_kregbase) { ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index bbaa70e57db1..0b6e7679eefd 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -985,6 +985,13 @@ static int mmap_piobufs(struct vm_area_struct *vma, * write combining behavior we want on the PIO buffers! */ +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif + if (vma->vm_flags & VM_READ) { dev_info(&dd->pcidev->dev, "Can't map piobufs as readable (flags=%lx)\n", diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c new file mode 100644 index 000000000000..036fde662aa9 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include "ipath_kernel.h" + +/** + * ipath_unordered_wc - indicate whether write combining is ordered + * + * PowerPC systems (at least those in the 970 processor family) + * write partially filled store buffers in address order, but will write + * completely filled store buffers in "random" order, and therefore must + * have serialization for correctness with current InfiniPath chips. + * + */ +int ipath_unordered_wc(void) +{ + return 1; +} From c27fef26271d352b5546c33239edeb0dcb4fc0cc Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:27 -0700 Subject: [PATCH 0742/1063] IB/ipath: lock resource limit counters correctly Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 38 ++++++++++++++----- drivers/infiniband/hw/ipath/ipath_verbs.h | 7 ++++ .../infiniband/hw/ipath/ipath_verbs_mcast.c | 5 +++ 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index a2b4c70192d8..5b8ee65c6cd3 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -776,18 +776,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, * we allow allocations of more than we report for this value. */ - if (dev->n_pds_allocated == ib_ipath_max_pds) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - pd = kmalloc(sizeof *pd, GFP_KERNEL); if (!pd) { ret = ERR_PTR(-ENOMEM); goto bail; } + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_ipath_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); /* ib_alloc_pd() will initialize pd->ibpd. */ pd->user = udata != NULL; @@ -803,7 +807,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd) struct ipath_pd *pd = to_ipd(ibpd); struct ipath_ibdev *dev = to_idev(ibpd->device); + spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); kfree(pd); @@ -824,11 +830,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); - if (dev->n_ahs_allocated == ib_ipath_max_ahs) { - ret = ERR_PTR(-ENOMEM); - goto bail; - } - /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID && @@ -854,7 +855,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, goto bail; } + spin_lock(&dev->n_ahs_lock); + if (dev->n_ahs_allocated == ib_ipath_max_ahs) { + spin_unlock(&dev->n_ahs_lock); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + dev->n_ahs_allocated++; + spin_unlock(&dev->n_ahs_lock); /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; @@ -876,7 +886,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah) struct ipath_ibdev *dev = to_idev(ibah->device); struct ipath_ah *ah = to_iah(ibah); + spin_lock(&dev->n_ahs_lock); dev->n_ahs_allocated--; + spin_unlock(&dev->n_ahs_lock); kfree(ah); @@ -963,6 +975,12 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) dev = &idev->ibdev; /* Only need to initialize non-zero fields. */ + spin_lock_init(&idev->n_pds_lock); + spin_lock_init(&idev->n_ahs_lock); + spin_lock_init(&idev->n_cqs_lock); + spin_lock_init(&idev->n_srqs_lock); + spin_lock_init(&idev->n_mcast_grps_lock); + spin_lock_init(&idev->qp_table.lock); spin_lock_init(&idev->lk_table.lock); idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 7d2ba72609f7..a9baa9101432 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -434,11 +434,18 @@ struct ipath_ibdev { __be64 sys_image_guid; /* in network order */ __be64 gid_prefix; /* in network order */ __be64 mkey; + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + u64 ipath_sword; /* total dwords sent (sample result) */ u64 ipath_rword; /* total dwords received (sample result) */ u64 ipath_spkts; /* total packets sent (sample result) */ diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index ee0e1d96d723..cb35679e4a18 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -207,12 +207,15 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, goto bail; } + spin_lock(&dev->n_mcast_grps_lock); if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); ret = ENOMEM; goto bail; } dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); list_add_tail_rcu(&mqp->list, &mcast->qp_list); @@ -343,7 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); ipath_mcast_free(mcast); + spin_lock(&dev->n_mcast_grps_lock); dev->n_mcast_grps_allocated--; + spin_unlock(&dev->n_mcast_grps_lock); } ret = 0; From 8e280d94e29af67035637fb957daba7ae0d23583 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:28 -0700 Subject: [PATCH 0743/1063] IB/ipath: fix for crash on module unload, if cfgports < portcnt Allocate enough pointers for all possible ports, to avoid problems in cleanup/unload. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 414cdd1d80a6..c63de8f0fa9e 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd, "only supports %u\n", ipath_cfgports, dd->ipath_portcnt); } - dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports, + /* + * Allocate full portcnt array, rather than just cfgports, because + * cleanup iterates across all possible ports. + */ + dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt, GFP_KERNEL); if (!dd->ipath_pd) { From ba11203a11835737df980ef3dd3bd8325b9cc94e Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:29 -0700 Subject: [PATCH 0744/1063] IB/ipath: fix handling of kpiobufs Change comment: no longer imply that user can set ipath_kpiobufs to zero. Actually set ipath_kpiobufs from parameter. Previously only altered per-device ipath_lastport_piobuf, which was over-written in chip init. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index c63de8f0fa9e..75c372136702 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -691,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); if (ipath_kpiobufs == 0) { - /* not set by user, or set explictly to default */ + /* not set by user (this is default) */ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) kpiobufs = 32; else @@ -950,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; } + ipath_kpiobufs = val; ret = 0; bail: spin_unlock_irqrestore(&ipath_devs_lock, flags); From 367fe711c5dc85dbc3265cf01e34d4d6fbd55f06 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:30 -0700 Subject: [PATCH 0745/1063] IB/ipath: drop requirement that PIO buffers be mmaped write-only Some userlands try to mmap these pages read-write, so accommodate them. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 0b6e7679eefd..e999a46bef9b 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -992,15 +992,10 @@ static int mmap_piobufs(struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; #endif - if (vma->vm_flags & VM_READ) { - dev_info(&dd->pcidev->dev, - "Can't map piobufs as readable (flags=%lx)\n", - vma->vm_flags); - ret = -EPERM; - goto bail; - } - - /* don't allow them to later change to readable with mprotect */ + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; From b1c1b6a30eac88665a35a207cc5e6233090b9d65 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:31 -0700 Subject: [PATCH 0746/1063] IB/ipath: merge ipath_core and ib_ipath drivers There is little point in keeping the two drivers separate, so we are merging them. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/Makefile | 2 +- drivers/infiniband/hw/ipath/Kconfig | 21 +- drivers/infiniband/hw/ipath/Makefile | 27 ++- drivers/infiniband/hw/ipath/ipath_driver.c | 13 +- drivers/infiniband/hw/ipath/ipath_intr.c | 3 +- drivers/infiniband/hw/ipath/ipath_kernel.h | 11 +- drivers/infiniband/hw/ipath/ipath_layer.c | 214 +-------------------- drivers/infiniband/hw/ipath/ipath_layer.h | 8 - drivers/infiniband/hw/ipath/ipath_verbs.c | 45 ++--- drivers/infiniband/hw/ipath/ipath_verbs.h | 10 + 10 files changed, 62 insertions(+), 292 deletions(-) diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 893bee0a50b5..08cff32d900e 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ -obj-$(CONFIG_IPATH_CORE) += hw/ipath/ +obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 1db9489f1e82..574a678e7fdd 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,16 +1,9 @@ -config IPATH_CORE - tristate "QLogic InfiniPath Driver" - depends on 64BIT && PCI_MSI && NET - ---help--- - This is a low-level driver for QLogic InfiniPath host channel - adapters (HCAs) based on the HT-400 and PE-800 chips. - config INFINIBAND_IPATH - tristate "QLogic InfiniPath Verbs Driver" - depends on IPATH_CORE && INFINIBAND + tristate "QLogic InfiniPath Driver" + depends on PCI_MSI && 64BIT && INFINIBAND ---help--- - This is a driver that provides InfiniBand verbs support for - QLogic InfiniPath host channel adapters (HCAs). This - allows these devices to be used with both kernel upper level - protocols such as IP-over-InfiniBand as well as with userspace - applications (in conjunction with InfiniBand userspace access). + This is a driver for QLogic InfiniPath host channel adapters, + including InfiniBand verbs support. This driver allows these + devices to be used with both kernel upper level protocols such + as IP-over-InfiniBand as well as with userspace applications + (in conjunction with InfiniBand userspace access). diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index 075e313941fe..690dc713e63e 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,10 +1,10 @@ EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 -obj-$(CONFIG_IPATH_CORE) += ipath_core.o obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o -ipath_core-y := \ +ib_ipath-y := \ + ipath_cq.o \ ipath_diag.o \ ipath_driver.o \ ipath_eeprom.o \ @@ -13,26 +13,23 @@ ipath_core-y := \ ipath_ht400.o \ ipath_init_chip.o \ ipath_intr.o \ - ipath_layer.o \ - ipath_pe800.o \ - ipath_stats.o \ - ipath_sysfs.o \ - ipath_user_pages.o - -ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o -ipath_core-$(CONFIG_PPC64) += ipath_wc_ppc64.o - -ib_ipath-y := \ - ipath_cq.o \ ipath_keys.o \ + ipath_layer.o \ ipath_mad.o \ ipath_mmap.o \ ipath_mr.o \ + ipath_pe800.o \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ ipath_srq.o \ + ipath_stats.o \ + ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ - ipath_verbs.o \ - ipath_verbs_mcast.o + ipath_user_pages.o \ + ipath_verbs_mcast.o \ + ipath_verbs.o + +ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o +ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 6ded914f9eb9..9af7406d6a62 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -40,6 +40,7 @@ #include "ipath_kernel.h" #include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" static void ipath_update_pio_bufs(struct ipath_devdata *); @@ -51,8 +52,6 @@ const char *ipath_get_unit_name(int unit) return iname; } -EXPORT_SYMBOL_GPL(ipath_get_unit_name); - #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " #define PFX IPATH_DRV_NAME ": " @@ -510,6 +509,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_user_add(dd); ipath_diag_add(dd); ipath_layer_add(dd); + ipath_register_ib_device(dd); goto bail; @@ -538,6 +538,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) return; dd = pci_get_drvdata(pdev); + ipath_unregister_ib_device(dd->verbs_dev); ipath_layer_remove(dd); ipath_diag_remove(dd); ipath_user_remove(dd); @@ -978,12 +979,8 @@ reloop: if (unlikely(eflags)) ipath_rcv_hdrerr(dd, eflags, l, etail, rc); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - int ret = __ipath_verbs_rcv(dd, rc + 1, - ebuf, tlen); - if (ret == -ENODEV) - ipath_cdbg(VERBOSE, - "received IB packet, " - "not SMA (QP=%x)\n", qp); + ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, + tlen); if (dd->ipath_lli_counter) dd->ipath_lli_counter--; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 280e732660a1..ed54f8f2945e 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -35,6 +35,7 @@ #include "ipath_kernel.h" #include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" /* These are all rcv-related errors which we want to count for stats */ @@ -712,7 +713,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) if (ret > 0) goto set; - ret = __ipath_verbs_piobufavail(dd); + ret = ipath_ib_piobufavail(dd->verbs_dev); if (ret > 0) goto set; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e9f374fb641e..f1931105adb3 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -132,12 +132,6 @@ struct _ipath_layer { void *l_arg; }; -/* Verbs layer interface */ -struct _verbs_layer { - void *l_arg; - struct timer_list l_timer; -}; - struct ipath_devdata { struct list_head ipath_list; @@ -198,7 +192,8 @@ struct ipath_devdata { void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); /* fill out chip-specific fields */ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); - struct _verbs_layer verbs_layer; + struct ipath_ibdev *verbs_dev; + struct timer_list verbs_timer; /* total dwords sent (summed from counter) */ u64 ipath_sword; /* total dwords rcvd (summed from counter) */ @@ -529,8 +524,6 @@ extern int ipath_layer_intr(struct ipath_devdata *, u32); extern int __ipath_layer_rcv(struct ipath_devdata *, void *, struct sk_buff *); extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *); -extern int __ipath_verbs_piobufavail(struct ipath_devdata *); -extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32); void ipath_layer_add(struct ipath_devdata *); void ipath_layer_remove(struct ipath_devdata *); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index b28c6f81c731..acc32200cc0e 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -42,26 +42,20 @@ #include "ipath_kernel.h" #include "ipath_layer.h" +#include "ipath_verbs.h" #include "ipath_common.h" /* Acquire before ipath_devs_lock. */ static DEFINE_MUTEX(ipath_layer_mutex); -static int ipath_verbs_registered; - u16 ipath_layer_rcv_opcode; static int (*layer_intr)(void *, u32); static int (*layer_rcv)(void *, void *, struct sk_buff *); static int (*layer_rcv_lid)(void *, void *); -static int (*verbs_piobufavail)(void *); -static void (*verbs_rcv)(void *, void *, void *, u32); static void *(*layer_add_one)(int, struct ipath_devdata *); static void (*layer_remove_one)(void *); -static void *(*verbs_add_one)(int, struct ipath_devdata *); -static void (*verbs_remove_one)(void *); -static void (*verbs_timer_cb)(void *); int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) { @@ -107,29 +101,6 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) return ret; } -int __ipath_verbs_piobufavail(struct ipath_devdata *dd) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_piobufavail) - ret = verbs_piobufavail(dd->verbs_layer.l_arg); - - return ret; -} - -int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf, - u32 tlen) -{ - int ret = -ENODEV; - - if (dd->verbs_layer.l_arg && verbs_rcv) { - verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen); - ret = 0; - } - - return ret; -} - int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate) { u32 lstate; @@ -212,8 +183,6 @@ bail: return ret; } -EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate); - /** * ipath_layer_set_mtu - set the MTU * @dd: the infinipath device @@ -298,8 +267,6 @@ bail: return ret; } -EXPORT_SYMBOL_GPL(ipath_layer_set_mtu); - int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) { dd->ipath_lid = arg; @@ -315,8 +282,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) return 0; } -EXPORT_SYMBOL_GPL(ipath_set_lid); - int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) { /* XXX - need to inform anyone who cares this just happened. */ @@ -324,85 +289,56 @@ int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_guid); - __be64 ipath_layer_get_guid(struct ipath_devdata *dd) { return dd->ipath_guid; } -EXPORT_SYMBOL_GPL(ipath_layer_get_guid); - -u32 ipath_layer_get_nguid(struct ipath_devdata *dd) -{ - return dd->ipath_nguid; -} - -EXPORT_SYMBOL_GPL(ipath_layer_get_nguid); - u32 ipath_layer_get_majrev(struct ipath_devdata *dd) { return dd->ipath_majrev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_majrev); - u32 ipath_layer_get_minrev(struct ipath_devdata *dd) { return dd->ipath_minrev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_minrev); - u32 ipath_layer_get_pcirev(struct ipath_devdata *dd) { return dd->ipath_pcirev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev); - u32 ipath_layer_get_flags(struct ipath_devdata *dd) { return dd->ipath_flags; } -EXPORT_SYMBOL_GPL(ipath_layer_get_flags); - struct device *ipath_layer_get_device(struct ipath_devdata *dd) { return &dd->pcidev->dev; } -EXPORT_SYMBOL_GPL(ipath_layer_get_device); - u16 ipath_layer_get_deviceid(struct ipath_devdata *dd) { return dd->ipath_deviceid; } -EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid); - u32 ipath_layer_get_vendorid(struct ipath_devdata *dd) { return dd->ipath_vendorid; } -EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid); - u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd) { return dd->ipath_lastibcstat; } -EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat); - u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd) { return dd->ipath_ibmtu; } -EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu); - void ipath_layer_add(struct ipath_devdata *dd) { mutex_lock(&ipath_layer_mutex); @@ -411,10 +347,6 @@ void ipath_layer_add(struct ipath_devdata *dd) dd->ipath_layer.l_arg = layer_add_one(dd->ipath_unit, dd); - if (verbs_add_one) - dd->verbs_layer.l_arg = - verbs_add_one(dd->ipath_unit, dd); - mutex_unlock(&ipath_layer_mutex); } @@ -427,11 +359,6 @@ void ipath_layer_remove(struct ipath_devdata *dd) dd->ipath_layer.l_arg = NULL; } - if (dd->verbs_layer.l_arg && verbs_remove_one) { - verbs_remove_one(dd->verbs_layer.l_arg); - dd->verbs_layer.l_arg = NULL; - } - mutex_unlock(&ipath_layer_mutex); } @@ -521,95 +448,10 @@ static void __ipath_verbs_timer(unsigned long arg) ipath_kreceive(dd); /* Handle verbs layer timeouts. */ - if (dd->verbs_layer.l_arg && verbs_timer_cb) - verbs_timer_cb(dd->verbs_layer.l_arg); - - mod_timer(&dd->verbs_layer.l_timer, jiffies + 1); + ipath_ib_timer(dd->verbs_dev); + mod_timer(&dd->verbs_timer, jiffies + 1); } -/** - * ipath_verbs_register - verbs layer registration - * @l_piobufavail: callback for when PIO buffers become available - * @l_rcv: callback for receiving a packet - * @l_timer_cb: timer callback - * @ipath_devdata: device data structure is put here - */ -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail) (void *arg), - void (*l_rcv) (void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb) (void *arg)) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - - verbs_add_one = l_add; - verbs_remove_one = l_remove; - verbs_piobufavail = l_piobufavail; - verbs_rcv = l_rcv; - verbs_timer_cb = l_timer_cb; - - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - if (!(dd->ipath_flags & IPATH_INITTED)) - continue; - - if (dd->verbs_layer.l_arg) - continue; - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd); - spin_lock_irqsave(&ipath_devs_lock, flags); - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - mutex_unlock(&ipath_layer_mutex); - - ipath_verbs_registered = 1; - - return 0; -} - -EXPORT_SYMBOL_GPL(ipath_verbs_register); - -void ipath_verbs_unregister(void) -{ - struct ipath_devdata *dd, *tmp; - unsigned long flags; - - mutex_lock(&ipath_layer_mutex); - spin_lock_irqsave(&ipath_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { - *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - - if (dd->verbs_layer.l_arg && verbs_remove_one) { - spin_unlock_irqrestore(&ipath_devs_lock, flags); - verbs_remove_one(dd->verbs_layer.l_arg); - spin_lock_irqsave(&ipath_devs_lock, flags); - dd->verbs_layer.l_arg = NULL; - } - } - - spin_unlock_irqrestore(&ipath_devs_lock, flags); - - verbs_add_one = NULL; - verbs_remove_one = NULL; - verbs_piobufavail = NULL; - verbs_rcv = NULL; - verbs_timer_cb = NULL; - - ipath_verbs_registered = 0; - - mutex_unlock(&ipath_layer_mutex); -} - -EXPORT_SYMBOL_GPL(ipath_verbs_unregister); - int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) { int ret; @@ -703,8 +545,6 @@ u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd) return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); } -EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey); - static void update_sge(struct ipath_sge_state *ss, u32 length) { struct ipath_sge *sge = &ss->sge; @@ -981,8 +821,6 @@ bail: return ret; } -EXPORT_SYMBOL_GPL(ipath_verbs_send); - int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, u64 *rwords, u64 *spkts, u64 *rpkts, u64 *xmit_wait) @@ -1007,8 +845,6 @@ bail: return ret; } -EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters); - /** * ipath_layer_get_counters - get various chip counters * @dd: the infinipath device @@ -1069,8 +905,6 @@ bail: return ret; } -EXPORT_SYMBOL_GPL(ipath_layer_get_counters); - int ipath_layer_want_buffer(struct ipath_devdata *dd) { set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); @@ -1080,8 +914,6 @@ int ipath_layer_want_buffer(struct ipath_devdata *dd) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_want_buffer); - int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) { int ret = 0; @@ -1174,30 +1006,26 @@ int ipath_layer_enable_timer(struct ipath_devdata *dd) (u64) (1 << 2)); } - init_timer(&dd->verbs_layer.l_timer); - dd->verbs_layer.l_timer.function = __ipath_verbs_timer; - dd->verbs_layer.l_timer.data = (unsigned long)dd; - dd->verbs_layer.l_timer.expires = jiffies + 1; - add_timer(&dd->verbs_layer.l_timer); + init_timer(&dd->verbs_timer); + dd->verbs_timer.function = __ipath_verbs_timer; + dd->verbs_timer.data = (unsigned long)dd; + dd->verbs_timer.expires = jiffies + 1; + add_timer(&dd->verbs_timer); return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_enable_timer); - int ipath_layer_disable_timer(struct ipath_devdata *dd) { /* Disable GPIO bit 2 interrupt */ if (dd->ipath_flags & IPATH_GPIO_INTR) ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); - del_timer_sync(&dd->verbs_layer.l_timer); + del_timer_sync(&dd->verbs_timer); return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_disable_timer); - /** * ipath_layer_set_verbs_flags - set the verbs layer flags * @dd: the infinipath device @@ -1225,8 +1053,6 @@ int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags); - /** * ipath_layer_get_npkeys - return the size of the PKEY table for port 0 * @dd: the infinipath device @@ -1236,8 +1062,6 @@ unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd) return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); } -EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys); - /** * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table * @dd: the infinipath device @@ -1255,8 +1079,6 @@ unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index) return ret; } -EXPORT_SYMBOL_GPL(ipath_layer_get_pkey); - /** * ipath_layer_get_pkeys - return the PKEY table for port 0 * @dd: the infinipath device @@ -1271,8 +1093,6 @@ int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys); - /** * rm_pkey - decrecment the reference count for the given PKEY * @dd: the infinipath device @@ -1419,8 +1239,6 @@ int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys); - /** * ipath_layer_get_linkdowndefaultstate - get the default linkdown state * @dd: the infinipath device @@ -1432,8 +1250,6 @@ int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd) return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); } -EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate); - /** * ipath_layer_set_linkdowndefaultstate - set the default linkdown state * @dd: the infinipath device @@ -1453,8 +1269,6 @@ int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate); - int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) { return (dd->ipath_ibcctrl >> @@ -1462,8 +1276,6 @@ int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; } -EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold); - /** * ipath_layer_set_phyerrthreshold - set the physical error threshold * @dd: the infinipath device @@ -1489,8 +1301,6 @@ int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold); - int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) { return (dd->ipath_ibcctrl >> @@ -1498,8 +1308,6 @@ int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; } -EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold); - /** * ipath_layer_set_overrunthreshold - set the overrun threshold * @dd: the infinipath device @@ -1525,17 +1333,13 @@ int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n) return 0; } -EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold); - int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, size_t namelen) { return dd->ipath_f_get_boardname(dd, name, namelen); } -EXPORT_SYMBOL_GPL(ipath_layer_get_boardname); u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd) { return dd->ipath_rcvhdrentsize; } -EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 71485096fcac..57c990a5715f 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -114,14 +114,7 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), struct sk_buff *), u16 rcv_opcode, int (*l_rcv_lid)(void *, void *)); -int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), - void (*l_remove)(void *arg), - int (*l_piobufavail)(void *arg), - void (*l_rcv)(void *arg, void *rhdr, - void *data, u32 tlen), - void (*l_timer_cb)(void *arg)); void ipath_layer_unregister(void); -void ipath_verbs_unregister(void); int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); u16 ipath_layer_get_lid(struct ipath_devdata *dd); int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); @@ -145,7 +138,6 @@ int ipath_layer_get_counters(struct ipath_devdata *dd, int ipath_layer_want_buffer(struct ipath_devdata *dd); int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid); __be64 ipath_layer_get_guid(struct ipath_devdata *); -u32 ipath_layer_get_nguid(struct ipath_devdata *); u32 ipath_layer_get_majrev(struct ipath_devdata *); u32 ipath_layer_get_minrev(struct ipath_devdata *); u32 ipath_layer_get_pcirev(struct ipath_devdata *); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 5b8ee65c6cd3..15edec9227e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -368,7 +368,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, } /** - * ipath_ib_rcv - process and incoming packet + * ipath_ib_rcv - process an incoming packet * @arg: the device pointer * @rhdr: the header of the packet * @data: the packet data @@ -377,9 +377,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, * This is called from ipath_kreceive() to process an incoming packet at * interrupt level. Tlen is the length of the header + data + CRC in bytes. */ -static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) +void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, + u32 tlen) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_ib_header *hdr = rhdr; struct ipath_other_headers *ohdr; struct ipath_qp *qp; @@ -468,9 +468,8 @@ bail:; * This is called from ipath_do_rcv_timer() at interrupt level to check for * QPs which need retransmits and to collect performance numbers. */ -static void ipath_ib_timer(void *arg) +void ipath_ib_timer(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *resend = NULL; struct list_head *last; struct ipath_qp *qp; @@ -564,9 +563,8 @@ static void ipath_ib_timer(void *arg) * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and * return zero). */ -static int ipath_ib_piobufavail(void *arg) +int ipath_ib_piobufavail(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ipath_qp *qp; unsigned long flags; @@ -957,11 +955,10 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev); /** * ipath_register_ib_device - register our device with the infiniband core - * @unit: the device number to register * @dd: the device data structure * Return the allocated ipath_ibdev pointer or NULL on error. */ -static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) +int ipath_register_ib_device(struct ipath_devdata *dd) { struct ipath_layer_counters cntrs; struct ipath_ibdev *idev; @@ -969,8 +966,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) int ret; idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); - if (idev == NULL) + if (idev == NULL) { + ret = -ENOMEM; goto bail; + } dev = &idev->ibdev; @@ -1047,7 +1046,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) if (!sys_image_guid) sys_image_guid = ipath_layer_get_guid(dd); idev->sys_image_guid = sys_image_guid; - idev->ib_unit = unit; + idev->ib_unit = dd->ipath_unit; idev->dd = dd; strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); @@ -1153,16 +1152,16 @@ err_lk: err_qp: ib_dealloc_device(dev); _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", - unit, -ret); + dd->ipath_unit, -ret); idev = NULL; bail: - return idev; + dd->verbs_dev = idev; + return ret; } -static void ipath_unregister_ib_device(void *arg) +void ipath_unregister_ib_device(struct ipath_ibdev *dev) { - struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; struct ib_device *ibdev = &dev->ibdev; ipath_layer_disable_timer(dev->dd); @@ -1193,19 +1192,6 @@ static void ipath_unregister_ib_device(void *arg) ib_dealloc_device(ibdev); } -static int __init ipath_verbs_init(void) -{ - return ipath_verbs_register(ipath_register_ib_device, - ipath_unregister_ib_device, - ipath_ib_piobufavail, ipath_ib_rcv, - ipath_ib_timer); -} - -static void __exit ipath_verbs_cleanup(void) -{ - ipath_verbs_unregister(); -} - static ssize_t show_rev(struct class_device *cdev, char *buf) { struct ipath_ibdev *dev = @@ -1297,6 +1283,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) bail: return ret; } - -module_init(ipath_verbs_init); -module_exit(ipath_verbs_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index a9baa9101432..d6faa4ba6067 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -711,6 +711,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p); +int ipath_register_ib_device(struct ipath_devdata *); + +void ipath_unregister_ib_device(struct ipath_ibdev *); + +void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32); + +int ipath_ib_piobufavail(struct ipath_ibdev *); + +void ipath_ib_timer(struct ipath_ibdev *); + extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; extern const u8 ipath_cvt_physportstate[]; From 34b2aafea38efdf02cd8107a6e1057e2a297c447 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:32 -0700 Subject: [PATCH 0747/1063] IB/ipath: simplify layering code A lot of ipath layer code was only called in one place. Now that the ipath_core and ib_ipath drivers are merged, it's more sensible to simply inline the simple stuff that the layer code was doing. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_diag.c | 1 - drivers/infiniband/hw/ipath/ipath_driver.c | 275 ++++-- drivers/infiniband/hw/ipath/ipath_file_ops.c | 1 - drivers/infiniband/hw/ipath/ipath_intr.c | 7 - drivers/infiniband/hw/ipath/ipath_kernel.h | 15 +- drivers/infiniband/hw/ipath/ipath_layer.c | 978 +------------------ drivers/infiniband/hw/ipath/ipath_layer.h | 104 -- drivers/infiniband/hw/ipath/ipath_mad.c | 339 ++++++- drivers/infiniband/hw/ipath/ipath_mr.c | 12 + drivers/infiniband/hw/ipath/ipath_qp.c | 34 +- drivers/infiniband/hw/ipath/ipath_rc.c | 9 +- drivers/infiniband/hw/ipath/ipath_ruc.c | 22 +- drivers/infiniband/hw/ipath/ipath_sysfs.c | 6 +- drivers/infiniband/hw/ipath/ipath_uc.c | 5 +- drivers/infiniband/hw/ipath/ipath_ud.c | 13 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 525 +++++++++- drivers/infiniband/hw/ipath/ipath_verbs.h | 109 ++- 17 files changed, 1126 insertions(+), 1329 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 147dd89e21c9..5d77a74aa57b 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -45,7 +45,6 @@ #include #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" int ipath_diag_inuse; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 9af7406d6a62..958cc9b33c8f 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -39,7 +39,6 @@ #include #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_verbs.h" #include "ipath_common.h" @@ -508,7 +507,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipathfs_add_device(dd); ipath_user_add(dd); ipath_diag_add(dd); - ipath_layer_add(dd); ipath_register_ib_device(dd); goto bail; @@ -539,7 +537,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) dd = pci_get_drvdata(pdev); ipath_unregister_ib_device(dd->verbs_dev); - ipath_layer_remove(dd); ipath_diag_remove(dd); ipath_user_remove(dd); ipathfs_remove_device(dd); @@ -614,11 +611,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * * wait up to msecs milliseconds for IB link state change to occur for * now, take the easy polling route. Currently used only by - * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise + * ipath_set_linkstate. Returns 0 if state reached, otherwise * -ETIMEDOUT state can have multiple states set, for any of several * transitions. */ -int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) +static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, + int msecs) { dd->ipath_sma_state_wanted = state; wait_event_interruptible_timeout(ipath_sma_state_wait, @@ -814,58 +812,6 @@ bail: return skb; } -/** - * ipath_rcv_layer - receive a packet for the layered (ethernet) driver - * @dd: the infinipath device - * @etail: the sk_buff number - * @tlen: the total packet length - * @hdr: the ethernet header - * - * Separate routine for better overall optimization - */ -static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail, - u32 tlen, struct ether_header *hdr) -{ - u32 elen; - u8 pad, *bthbytes; - struct sk_buff *skb, *nskb; - - if (dd->ipath_port0_skbs && - hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) { - /* - * Allocate a new sk_buff to replace the one we give - * to the network stack. - */ - nskb = ipath_alloc_skb(dd, GFP_ATOMIC); - if (!nskb) { - /* count OK packets that we drop */ - ipath_stats.sps_krdrops++; - return; - } - - bthbytes = (u8 *) hdr->bth; - pad = (bthbytes[1] >> 4) & 3; - /* +CRC32 */ - elen = tlen - (sizeof(*hdr) + pad + sizeof(u32)); - - skb = dd->ipath_port0_skbs[etail]; - dd->ipath_port0_skbs[etail] = nskb; - skb_put(skb, elen); - - dd->ipath_f_put_tid(dd, etail + (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase - + dd->ipath_rcvegrbase), 0, - virt_to_phys(nskb->data)); - - __ipath_layer_rcv(dd, hdr, skb); - - /* another ether packet received */ - ipath_stats.sps_ether_rpkts++; - } - else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP) - __ipath_layer_rcv_lid(dd, hdr); -} - static void ipath_rcv_hdrerr(struct ipath_devdata *dd, u32 eflags, u32 l, @@ -979,22 +925,17 @@ reloop: if (unlikely(eflags)) ipath_rcv_hdrerr(dd, eflags, l, etail, rc); else if (etype == RCVHQ_RCV_TYPE_NON_KD) { - ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, - tlen); - if (dd->ipath_lli_counter) - dd->ipath_lli_counter--; - - } else if (etype == RCVHQ_RCV_TYPE_EAGER) { - if (qp == IPATH_KD_QP && - bthbytes[0] == ipath_layer_rcv_opcode && - ebuf) - ipath_rcv_layer(dd, etail, tlen, - (struct ether_header *)hdr); - else - ipath_cdbg(PKT, "typ %x, opcode %x (eager, " - "qp=%x), len %x; ignored\n", - etype, bthbytes[0], qp, tlen); + ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen); + if (dd->ipath_lli_counter) + dd->ipath_lli_counter--; + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); } + else if (etype == RCVHQ_RCV_TYPE_EAGER) + ipath_cdbg(PKT, "typ %x, opcode %x (eager, " + "qp=%x), len %x; ignored\n", + etype, bthbytes[0], qp, tlen); else if (etype == RCVHQ_RCV_TYPE_EXPECTED) ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", be32_to_cpu(hdr->bth[0]) & 0xff); @@ -1320,13 +1261,6 @@ rescan: goto bail; } - if (updated) - /* - * ran out of bufs, now some (at least this one we just - * got) are now available, so tell the layered driver. - */ - __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - /* * set next starting place. Since it's just an optimization, * it doesn't matter who wins on this, so no locking @@ -1503,7 +1437,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) return ret; } -void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) +static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { [0] = "DOWN", @@ -1537,6 +1471,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) dd->ipath_ibcctrl | which); } +int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) +{ + u32 lstate; + int ret; + + switch (newstate) { + case IPATH_IB_LINKDOWN: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_SLEEP: + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKDOWN_DISABLE: + ipath_set_ib_lstate(dd, + INFINIPATH_IBCC_LINKINITCMD_DISABLE << + INFINIPATH_IBCC_LINKINITCMD_SHIFT); + /* don't wait */ + ret = 0; + goto bail; + + case IPATH_IB_LINKINIT: + if (dd->ipath_flags & IPATH_LINKINIT) { + ret = 0; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKINIT; + break; + + case IPATH_IB_LINKARM: + if (dd->ipath_flags & IPATH_LINKARMED) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & + (IPATH_LINKINIT | IPATH_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << + INFINIPATH_IBCC_LINKCMD_SHIFT); + /* + * Since the port can transition to ACTIVE by receiving + * a non VL 15 packet, wait for either state. + */ + lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; + break; + + case IPATH_IB_LINKACTIVE: + if (dd->ipath_flags & IPATH_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(dd->ipath_flags & IPATH_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << + INFINIPATH_IBCC_LINKCMD_SHIFT); + lstate = IPATH_LINKACTIVE; + break; + + default: + ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); + ret = -EINVAL; + goto bail; + } + ret = ipath_wait_linkstate(dd, lstate, 2000); + +bail: + return ret; +} + +/** + * ipath_set_mtu - set the MTU + * @dd: the infinipath device + * @arg: the new MTU + * + * we can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link initialize (IPATH_IBSTATE_INIT) state... + */ +int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) +{ + u32 piosize; + int changed = 0; + int ret; + + /* + * mtu is IB data payload max. It's the largest power of 2 less + * than piosize (or even larger, since it only really controls the + * largest we can receive; we can send the max of the mtu and + * piosize). We check that it's one of the valid IB sizes. + */ + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); + ret = -EINVAL; + goto bail; + } + if (dd->ipath_ibmtu == arg) { + ret = 0; /* same as current */ + goto bail; + } + + piosize = dd->ipath_ibmaxlen; + dd->ipath_ibmtu = arg; + + if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != dd->ipath_init_ibmaxlen) { + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { + piosize = arg + IPATH_PIO_MAXIBHDR; + ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " + "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, + arg); + dd->ipath_ibmaxlen = piosize; + changed = 1; + } + + if (changed) { + /* + * set the IBC maxpktlength to the size of our pio + * buffers in words + */ + u64 ibc = dd->ipath_ibcctrl; + ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << + INFINIPATH_IBCC_MAXPKTLEN_SHIFT); + + piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ + dd->ipath_ibmaxlen = piosize; + piosize /= sizeof(u32); /* in words */ + /* + * for ICRC, which we only send in diag test pkt mode, and + * we don't need to worry about that for mtu + */ + piosize += 1; + + ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; + dd->ipath_ibcctrl = ibc; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + dd->ipath_f_tidtemplate(dd); + } + + ret = 0; + +bail: + return ret; +} + +int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) +{ + dd->ipath_lid = arg; + dd->ipath_lmc = lmc; + + return 0; +} + /** * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register * @dd: the infinipath device @@ -1640,13 +1748,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* - * we are shutting down, so tell the layered driver. We don't do - * this on just a link state change, much like ethernet, a cable - * unplug, etc. doesn't change driver state - */ - ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN); - /* disable IBC */ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index e999a46bef9b..f865ce89b73f 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,7 +39,6 @@ #include #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" static int ipath_open(struct inode *, struct file *); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index ed54f8f2945e..250e2a9f01bb 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -34,7 +34,6 @@ #include #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_verbs.h" #include "ipath_common.h" @@ -290,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, *dd->ipath_statusp |= IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; dd->ipath_f_setextled(dd, lstate, ltstate); - - __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP); } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { /* * set INIT and DOWN. Down is checked by most of the other @@ -709,10 +706,6 @@ static void handle_layer_pioavail(struct ipath_devdata *dd) { int ret; - ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); - if (ret > 0) - goto set; - ret = ipath_ib_piobufavail(dd->verbs_dev); if (ret > 0) goto set; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index f1931105adb3..999249b7f27f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -518,16 +518,6 @@ extern struct list_head ipath_dev_list; extern spinlock_t ipath_devs_lock; extern struct ipath_devdata *ipath_lookup(int unit); -extern u16 ipath_layer_rcv_opcode; -extern int __ipath_layer_intr(struct ipath_devdata *, u32); -extern int ipath_layer_intr(struct ipath_devdata *, u32); -extern int __ipath_layer_rcv(struct ipath_devdata *, void *, - struct sk_buff *); -extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *); - -void ipath_layer_add(struct ipath_devdata *); -void ipath_layer_remove(struct ipath_devdata *); - int ipath_init_chip(struct ipath_devdata *, int); int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); @@ -575,12 +565,13 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); int ipath_parse_ushort(const char *str, unsigned short *valp); -int ipath_wait_linkstate(struct ipath_devdata *, u32, int); -void ipath_set_ib_lstate(struct ipath_devdata *, int); void ipath_kreceive(struct ipath_devdata *); int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); int ipath_reset_device(int); void ipath_get_faststats(unsigned long); +int ipath_set_linkstate(struct ipath_devdata *, u8); +int ipath_set_mtu(struct ipath_devdata *, u16); +int ipath_set_lid(struct ipath_devdata *, u32, u8); /* for use in system calls, where we want to know device type, etc. */ #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index acc32200cc0e..10f578e2aed6 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -101,242 +101,14 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) return ret; } -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate) +void ipath_layer_lid_changed(struct ipath_devdata *dd) { - u32 lstate; - int ret; - - switch (newstate) { - case IPATH_IB_LINKDOWN: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_SLEEP: - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKDOWN_DISABLE: - ipath_set_ib_lstate(dd, - INFINIPATH_IBCC_LINKINITCMD_DISABLE << - INFINIPATH_IBCC_LINKINITCMD_SHIFT); - /* don't wait */ - ret = 0; - goto bail; - - case IPATH_IB_LINKINIT: - if (dd->ipath_flags & IPATH_LINKINIT) { - ret = 0; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKINIT; - break; - - case IPATH_IB_LINKARM: - if (dd->ipath_flags & IPATH_LINKARMED) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & - (IPATH_LINKINIT | IPATH_LINKACTIVE))) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << - INFINIPATH_IBCC_LINKCMD_SHIFT); - /* - * Since the port can transition to ACTIVE by receiving - * a non VL 15 packet, wait for either state. - */ - lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; - break; - - case IPATH_IB_LINKACTIVE: - if (dd->ipath_flags & IPATH_LINKACTIVE) { - ret = 0; - goto bail; - } - if (!(dd->ipath_flags & IPATH_LINKARMED)) { - ret = -EINVAL; - goto bail; - } - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << - INFINIPATH_IBCC_LINKCMD_SHIFT); - lstate = IPATH_LINKACTIVE; - break; - - default: - ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); - ret = -EINVAL; - goto bail; - } - ret = ipath_wait_linkstate(dd, lstate, 2000); - -bail: - return ret; -} - -/** - * ipath_layer_set_mtu - set the MTU - * @dd: the infinipath device - * @arg: the new MTU - * - * we can handle "any" incoming size, the issue here is whether we - * need to restrict our outgoing size. For now, we don't do any - * sanity checking on this, and we don't deal with what happens to - * programs that are already running when the size changes. - * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... - */ -int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg) -{ - u32 piosize; - int changed = 0; - int ret; - - /* - * mtu is IB data payload max. It's the largest power of 2 less - * than piosize (or even larger, since it only really controls the - * largest we can receive; we can send the max of the mtu and - * piosize). We check that it's one of the valid IB sizes. - */ - if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && - arg != 4096) { - ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); - ret = -EINVAL; - goto bail; - } - if (dd->ipath_ibmtu == arg) { - ret = 0; /* same as current */ - goto bail; - } - - piosize = dd->ipath_ibmaxlen; - dd->ipath_ibmtu = arg; - - if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { - /* Only if it's not the initial value (or reset to it) */ - if (piosize != dd->ipath_init_ibmaxlen) { - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { - piosize = arg + IPATH_PIO_MAXIBHDR; - ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " - "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, - arg); - dd->ipath_ibmaxlen = piosize; - changed = 1; - } - - if (changed) { - /* - * set the IBC maxpktlength to the size of our pio - * buffers in words - */ - u64 ibc = dd->ipath_ibcctrl; - ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << - INFINIPATH_IBCC_MAXPKTLEN_SHIFT); - - piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ - dd->ipath_ibmaxlen = piosize; - piosize /= sizeof(u32); /* in words */ - /* - * for ICRC, which we only send in diag test pkt mode, and - * we don't need to worry about that for mtu - */ - piosize += 1; - - ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - dd->ipath_ibcctrl = ibc; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - dd->ipath_f_tidtemplate(dd); - } - - ret = 0; - -bail: - return ret; -} - -int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) -{ - dd->ipath_lid = arg; - dd->ipath_lmc = lmc; - mutex_lock(&ipath_layer_mutex); if (dd->ipath_layer.l_arg && layer_intr) layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); mutex_unlock(&ipath_layer_mutex); - - return 0; -} - -int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) -{ - /* XXX - need to inform anyone who cares this just happened. */ - dd->ipath_guid = guid; - return 0; -} - -__be64 ipath_layer_get_guid(struct ipath_devdata *dd) -{ - return dd->ipath_guid; -} - -u32 ipath_layer_get_majrev(struct ipath_devdata *dd) -{ - return dd->ipath_majrev; -} - -u32 ipath_layer_get_minrev(struct ipath_devdata *dd) -{ - return dd->ipath_minrev; -} - -u32 ipath_layer_get_pcirev(struct ipath_devdata *dd) -{ - return dd->ipath_pcirev; -} - -u32 ipath_layer_get_flags(struct ipath_devdata *dd) -{ - return dd->ipath_flags; -} - -struct device *ipath_layer_get_device(struct ipath_devdata *dd) -{ - return &dd->pcidev->dev; -} - -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd) -{ - return dd->ipath_deviceid; -} - -u32 ipath_layer_get_vendorid(struct ipath_devdata *dd) -{ - return dd->ipath_vendorid; -} - -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd) -{ - return dd->ipath_lastibcstat; -} - -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd) -{ - return dd->ipath_ibmtu; } void ipath_layer_add(struct ipath_devdata *dd) @@ -436,22 +208,6 @@ void ipath_layer_unregister(void) EXPORT_SYMBOL_GPL(ipath_layer_unregister); -static void __ipath_verbs_timer(unsigned long arg) -{ - struct ipath_devdata *dd = (struct ipath_devdata *) arg; - - /* - * If port 0 receive packet interrupts are not available, or - * can be missed, poll the receive queue - */ - if (dd->ipath_flags & IPATH_POLL_RX_INTR) - ipath_kreceive(dd); - - /* Handle verbs layer timeouts. */ - ipath_ib_timer(dd->verbs_dev); - mod_timer(&dd->verbs_timer, jiffies + 1); -} - int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) { int ret; @@ -540,380 +296,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd) EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd) -{ - return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); -} - -static void update_sge(struct ipath_sge_state *ss, u32 length) -{ - struct ipath_sge *sge = &ss->sge; - - sge->vaddr += length; - sge->length -= length; - sge->sge_length -= length; - if (sge->sge_length == 0) { - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr != NULL) { - if (++sge->n >= IPATH_SEGSZ) { - if (++sge->m >= sge->mr->mapsz) - return; - sge->n = 0; - } - sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; - sge->length = sge->mr->map[sge->m]->segs[sge->n].length; - } -} - -#ifdef __LITTLE_ENDIAN -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); - data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#else -static inline u32 get_upper_bits(u32 data, u32 shift) -{ - return data << shift; -} - -static inline u32 set_upper_bits(u32 data, u32 shift) -{ - return data >> shift; -} - -static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) -{ - data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); - data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); - return data; -} -#endif - -static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, - u32 length) -{ - u32 extra = 0; - u32 data = 0; - u32 last; - - while (1) { - u32 len = ss->sge.length; - u32 off; - - BUG_ON(len == 0); - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; - /* If the source address is not aligned, try to align it. */ - off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); - if (off) { - u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & - ~(sizeof(u32) - 1)); - u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); - u32 y; - - y = sizeof(u32) - off; - if (len > y) - len = y; - if (len + extra >= sizeof(u32)) { - data |= set_upper_bits(v, extra * - BITS_PER_BYTE); - len = sizeof(u32) - extra; - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, len, extra); - if (len == length) { - last = data; - break; - } - extra += len; - } - } else if (extra) { - /* Source address is aligned. */ - u32 *addr = (u32 *) ss->sge.vaddr; - int shift = extra * BITS_PER_BYTE; - int ushift = 32 - shift; - u32 l = len; - - while (l >= sizeof(u32)) { - u32 v = *addr; - - data |= set_upper_bits(v, shift); - __raw_writel(data, piobuf); - data = get_upper_bits(v, ushift); - piobuf++; - addr++; - l -= sizeof(u32); - } - /* - * We still have 'extra' number of bytes leftover. - */ - if (l) { - u32 v = *addr; - - if (l + extra >= sizeof(u32)) { - data |= set_upper_bits(v, shift); - len -= l + extra - sizeof(u32); - if (len == length) { - last = data; - break; - } - __raw_writel(data, piobuf); - piobuf++; - extra = 0; - data = 0; - } else { - /* Clear unused upper bytes */ - data |= clear_upper_bytes(v, l, - extra); - if (len == length) { - last = data; - break; - } - extra += l; - } - } else if (len == length) { - last = data; - break; - } - } else if (len == length) { - u32 w; - - /* - * Need to round up for the last dword in the - * packet. - */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; - break; - } else { - u32 w = len >> 2; - - __iowrite32_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; - - extra = len & (sizeof(u32) - 1); - if (extra) { - u32 v = ((u32 *) ss->sge.vaddr)[w]; - - /* Clear unused upper bytes */ - data = clear_upper_bytes(v, extra, 0); - } - } - update_sge(ss, len); - length -= len; - } - /* Update address before sending packet. */ - update_sge(ss, length); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(last, piobuf); - /* be sure trigger word is written */ - ipath_flush_wc(); -} - -/** - * ipath_verbs_send - send a packet from the verbs layer - * @dd: the infinipath device - * @hdrwords: the number of words in the header - * @hdr: the packet header - * @len: the length of the packet in bytes - * @ss: the SGE to send - * - * This is like ipath_sma_send_pkt() in that we need to be able to send - * packets after the chip is initialized (MADs) but also like - * ipath_layer_send_hdr() since its used by the verbs layer. - */ -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 *hdr, u32 len, struct ipath_sge_state *ss) -{ - u32 __iomem *piobuf; - u32 plen; - int ret; - - /* +1 is for the qword padding of pbc */ - plen = hdrwords + ((len + 3) >> 2) + 1; - if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { - ipath_dbg("packet len 0x%x too long, failing\n", plen); - ret = -EINVAL; - goto bail; - } - - /* Get a PIO buffer to use. */ - piobuf = ipath_getpiobuf(dd, NULL); - if (unlikely(piobuf == NULL)) { - ret = -EBUSY; - goto bail; - } - - /* - * Write len to control qword, no flags. - * We have to flush after the PBC for correctness on some cpus - * or WC buffer can be written out of order. - */ - writeq(plen, piobuf); - ipath_flush_wc(); - piobuf += 2; - if (len == 0) { - /* - * If there is just the header portion, must flush before - * writing last word of header for correctness, and after - * the last header word (trigger word). - */ - __iowrite32_copy(piobuf, hdr, hdrwords - 1); - ipath_flush_wc(); - __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); - ipath_flush_wc(); - ret = 0; - goto bail; - } - - __iowrite32_copy(piobuf, hdr, hdrwords); - piobuf += hdrwords; - - /* The common case is aligned and contained in one segment. */ - if (likely(ss->num_sge == 1 && len <= ss->sge.length && - !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { - u32 w; - u32 *addr = (u32 *) ss->sge.vaddr; - - /* Update address before sending packet. */ - update_sge(ss, len); - /* Need to round up for the last dword in the packet. */ - w = (len + 3) >> 2; - __iowrite32_copy(piobuf, addr, w - 1); - /* must flush early everything before trigger word */ - ipath_flush_wc(); - __raw_writel(addr[w - 1], piobuf + w - 1); - /* be sure trigger word is written */ - ipath_flush_wc(); - ret = 0; - goto bail; - } - copy_io(piobuf, ss, len); - ret = 0; - -bail: - return ret; -} - -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); - - ret = 0; - -bail: - return ret; -} - -/** - * ipath_layer_get_counters - get various chip counters - * @dd: the infinipath device - * @cntrs: counters are placed here - * - * Return the counters needed by recv_pma_get_portcounters(). - */ -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs) -{ - int ret; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* no hardware, freeze, etc. */ - ipath_dbg("unit %u not usable\n", dd->ipath_unit); - ret = -EINVAL; - goto bail; - } - cntrs->symbol_error_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); - cntrs->link_error_recovery_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); - /* - * The link downed counter counts when the other side downs the - * connection. We add in the number of times we downed the link - * due to local link integrity errors to compensate. - */ - cntrs->link_downed_counter = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); - cntrs->port_rcv_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + - ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); - cntrs->port_rcv_remphys_errors = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); - cntrs->port_xmit_discards = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); - cntrs->port_xmit_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - cntrs->port_rcv_data = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); - cntrs->port_xmit_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); - cntrs->port_rcv_packets = - ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); - cntrs->local_link_integrity_errors = dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ - - ret = 0; - -bail: - return ret; -} - -int ipath_layer_want_buffer(struct ipath_devdata *dd) -{ - set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - - return 0; -} - int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) { int ret = 0; @@ -985,361 +367,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd) } EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); - -int ipath_layer_enable_timer(struct ipath_devdata *dd) -{ - /* - * HT-400 has a design flaw where the chip and kernel idea - * of the tail register don't always agree, and therefore we won't - * get an interrupt on the next packet received. - * If the board supports per packet receive interrupts, use it. - * Otherwise, the timer function periodically checks for packets - * to cover this case. - * Either way, the timer is needed for verbs layer related - * processing. - */ - if (dd->ipath_flags & IPATH_GPIO_INTR) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, - 0x2074076542310ULL); - /* Enable GPIO bit 2 interrupt */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - (u64) (1 << 2)); - } - - init_timer(&dd->verbs_timer); - dd->verbs_timer.function = __ipath_verbs_timer; - dd->verbs_timer.data = (unsigned long)dd; - dd->verbs_timer.expires = jiffies + 1; - add_timer(&dd->verbs_timer); - - return 0; -} - -int ipath_layer_disable_timer(struct ipath_devdata *dd) -{ - /* Disable GPIO bit 2 interrupt */ - if (dd->ipath_flags & IPATH_GPIO_INTR) - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); - - del_timer_sync(&dd->verbs_timer); - - return 0; -} - -/** - * ipath_layer_set_verbs_flags - set the verbs layer flags - * @dd: the infinipath device - * @flags: the flags to set - */ -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags) -{ - struct ipath_devdata *ss; - unsigned long lflags; - - spin_lock_irqsave(&ipath_devs_lock, lflags); - - list_for_each_entry(ss, &ipath_dev_list, ipath_list) { - if (!(ss->ipath_flags & IPATH_INITTED)) - continue; - if ((flags & IPATH_VERBS_KERNEL_SMA) && - !(*ss->ipath_statusp & IPATH_STATUS_SMA)) - *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; - else - *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - } - - spin_unlock_irqrestore(&ipath_devs_lock, lflags); - - return 0; -} - -/** - * ipath_layer_get_npkeys - return the size of the PKEY table for port 0 - * @dd: the infinipath device - */ -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd) -{ - return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); -} - -/** - * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table - * @dd: the infinipath device - * @index: the PKEY index - */ -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index) -{ - unsigned ret; - - if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) - ret = 0; - else - ret = dd->ipath_pd[0]->port_pkeys[index]; - - return ret; -} - -/** - * ipath_layer_get_pkeys - return the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the pkey table is placed here - */ -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd = dd->ipath_pd[0]; - - memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); - - return 0; -} - -/** - * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the infinipath device - * @key: the PKEY index - * - * Return true if this was the last reference and the hardware table entry - * needs to be changed. - */ -static int rm_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - int ret; - - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (dd->ipath_pkeys[i] != key) - continue; - if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { - dd->ipath_pkeys[i] = 0; - ret = 1; - goto bail; - } - break; - } - - ret = 0; - -bail: - return ret; -} - -/** - * add_pkey - add the given PKEY to the hardware table - * @dd: the infinipath device - * @key: the PKEY - * - * Return an error code if unable to add the entry, zero if no change, - * or 1 if the hardware PKEY register needs to be updated. - */ -static int add_pkey(struct ipath_devdata *dd, u16 key) -{ - int i; - u16 lkey = key & 0x7FFF; - int any = 0; - int ret; - - if (lkey == 0x7FFF) { - ret = 0; - goto bail; - } - - /* Look for an empty slot or a matching PKEY. */ - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i]) { - any++; - continue; - } - /* If it matches exactly, try to increment the ref count */ - if (dd->ipath_pkeys[i] == key) { - if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { - ret = 0; - goto bail; - } - /* Lost the race. Look for an empty slot below. */ - atomic_dec(&dd->ipath_pkeyrefs[i]); - any++; - } - /* - * It makes no sense to have both the limited and unlimited - * PKEY set at the same time since the unlimited one will - * disable the limited one. - */ - if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { - ret = -EEXIST; - goto bail; - } - } - if (!any) { - ret = -EBUSY; - goto bail; - } - for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { - if (!dd->ipath_pkeys[i] && - atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { - /* for ipathstats, etc. */ - ipath_stats.sps_pkeys[i] = lkey; - dd->ipath_pkeys[i] = key; - ret = 1; - goto bail; - } - } - ret = -EBUSY; - -bail: - return ret; -} - -/** - * ipath_layer_set_pkeys - set the PKEY table for port 0 - * @dd: the infinipath device - * @pkeys: the PKEY table - */ -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys) -{ - struct ipath_portdata *pd; - int i; - int changed = 0; - - pd = dd->ipath_pd[0]; - - for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { - u16 key = pkeys[i]; - u16 okey = pd->port_pkeys[i]; - - if (key == okey) - continue; - /* - * The value of this PKEY table entry is changing. - * Remove the old entry in the hardware's array of PKEYs. - */ - if (okey & 0x7FFF) - changed |= rm_pkey(dd, okey); - if (key & 0x7FFF) { - int ret = add_pkey(dd, key); - - if (ret < 0) - key = 0; - else - changed |= ret; - } - pd->port_pkeys[i] = key; - } - if (changed) { - u64 pkey; - - pkey = (u64) dd->ipath_pkeys[0] | - ((u64) dd->ipath_pkeys[1] << 16) | - ((u64) dd->ipath_pkeys[2] << 32) | - ((u64) dd->ipath_pkeys[3] << 48); - ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", - (unsigned long long) pkey); - ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, - pkey); - } - return 0; -} - -/** - * ipath_layer_get_linkdowndefaultstate - get the default linkdown state - * @dd: the infinipath device - * - * Returns zero if the default is POLL, 1 if the default is SLEEP. - */ -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd) -{ - return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); -} - -/** - * ipath_layer_set_linkdowndefaultstate - set the default linkdown state - * @dd: the infinipath device - * @sleep: the new state - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep) -{ - if (sleep) - dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - else - dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - return 0; -} - -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; -} - -/** - * ipath_layer_set_phyerrthreshold - set the physical error threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << - INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) -{ - return (dd->ipath_ibcctrl >> - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; -} - -/** - * ipath_layer_set_overrunthreshold - set the overrun threshold - * @dd: the infinipath device - * @n: the new threshold - * - * Note that this will only take effect when the link state changes. - */ -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n) -{ - unsigned v; - - v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & - INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; - if (v != n) { - dd->ipath_ibcctrl &= - ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << - INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); - dd->ipath_ibcctrl |= - (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, - dd->ipath_ibcctrl); - } - return 0; -} - -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - return dd->ipath_f_get_boardname(dd, name, namelen); -} - -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd) -{ - return dd->ipath_rcvhdrentsize; -} diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 57c990a5715f..4a27ede49941 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -40,73 +40,9 @@ */ struct sk_buff; -struct ipath_sge_state; struct ipath_devdata; struct ether_header; -struct ipath_layer_counters { - u64 symbol_error_counter; - u64 link_error_recovery_counter; - u64 link_downed_counter; - u64 port_rcv_errors; - u64 port_rcv_remphys_errors; - u64 port_xmit_discards; - u64 port_xmit_data; - u64 port_rcv_data; - u64 port_xmit_packets; - u64 port_rcv_packets; - u32 local_link_integrity_errors; - u32 excessive_buffer_overrun_errors; -}; - -/* - * A segment is a linear region of low physical memory. - * XXX Maybe we should use phys addr here and kmap()/kunmap(). - * Used by the verbs layer. - */ -struct ipath_seg { - void *vaddr; - size_t length; -}; - -/* The number of ipath_segs that fit in a page. */ -#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) - -struct ipath_segarray { - struct ipath_seg segs[IPATH_SEGSZ]; -}; - -struct ipath_mregion { - u64 user_base; /* User's address for this region */ - u64 iova; /* IB start address of this region */ - size_t length; - u32 lkey; - u32 offset; /* offset (bytes) to start of region */ - int access_flags; - u32 max_segs; /* number of ipath_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - struct ipath_segarray *map[0]; /* the segments */ -}; - -/* - * These keep track of the copy progress within a memory region. - * Used by the verbs layer. - */ -struct ipath_sge { - struct ipath_mregion *mr; - void *vaddr; /* current pointer into the segment */ - u32 sge_length; /* length of the SGE */ - u32 length; /* remaining length of the segment */ - u16 m; /* current index: mr->map[m] */ - u16 n; /* current index: mr->map[m]->segs[n] */ -}; - -struct ipath_sge_state { - struct ipath_sge *sg_list; /* next SGE to be used if any */ - struct ipath_sge sge; /* progress state for the current SGE */ - u8 num_sge; -}; - int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), void (*l_remove)(void *), int (*l_intr)(void *, u32), @@ -119,49 +55,9 @@ int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); u16 ipath_layer_get_lid(struct ipath_devdata *dd); int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); u16 ipath_layer_get_bcast(struct ipath_devdata *dd); -u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd); -int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state); -int ipath_layer_set_mtu(struct ipath_devdata *, u16); -int ipath_set_lid(struct ipath_devdata *, u32, u8); int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr); -int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, - u32 * hdr, u32 len, struct ipath_sge_state *ss); int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); -int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, - size_t namelen); -int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, - u64 *rwords, u64 *spkts, u64 *rpkts, - u64 *xmit_wait); -int ipath_layer_get_counters(struct ipath_devdata *dd, - struct ipath_layer_counters *cntrs); -int ipath_layer_want_buffer(struct ipath_devdata *dd); -int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid); -__be64 ipath_layer_get_guid(struct ipath_devdata *); -u32 ipath_layer_get_majrev(struct ipath_devdata *); -u32 ipath_layer_get_minrev(struct ipath_devdata *); -u32 ipath_layer_get_pcirev(struct ipath_devdata *); -u32 ipath_layer_get_flags(struct ipath_devdata *dd); -struct device *ipath_layer_get_device(struct ipath_devdata *dd); -u16 ipath_layer_get_deviceid(struct ipath_devdata *dd); -u32 ipath_layer_get_vendorid(struct ipath_devdata *); -u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd); -u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd); -int ipath_layer_enable_timer(struct ipath_devdata *dd); -int ipath_layer_disable_timer(struct ipath_devdata *dd); -int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags); -unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd); -unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index); -int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys); -int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd); -int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, - int sleep); -int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd); -int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n); -int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd); -int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n); -u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); /* ipath_ether interrupt values */ #define IPATH_LAYER_INT_IF_UP 0x2 diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index d3402341b7d0..72d1db89db8f 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = to_idev(ibdev)->sys_image_guid; - nip->node_guid = ipath_layer_get_guid(dd); + nip->node_guid = dd->ipath_guid; nip->port_guid = nip->sys_guid; - nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd)); - nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd)); - majrev = ipath_layer_get_majrev(dd); - minrev = ipath_layer_get_minrev(dd); + nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->ipath_deviceid); + majrev = dd->ipath_majrev; + minrev = dd->ipath_minrev; nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; - vendor = ipath_layer_get_vendorid(dd); + vendor = dd->ipath_vendorid; nip->vendor_id[0] = 0; nip->vendor_id[1] = vendor >> 8; nip->vendor_id[2] = vendor; @@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp, */ if (startgx == 0) /* The first is a copy of the read-only HW GUID. */ - *p = ipath_layer_get_guid(to_idev(ibdev)->dd); + *p = to_idev(ibdev)->dd->ipath_guid; else smp->status |= IB_SMP_INVALID_FIELD; return reply(smp); } + +static int get_overrunthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; +} + +/** + * set_overrunthreshold - set the overrun threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << + INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +static int get_phyerrthreshold(struct ipath_devdata *dd) +{ + return (dd->ipath_ibcctrl >> + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @dd: the infinipath device + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) +{ + unsigned v; + + v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & + INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; + if (v != n) { + dd->ipath_ibcctrl &= + ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << + INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); + dd->ipath_ibcctrl |= + (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + } + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @dd: the infinipath device + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct ipath_devdata *dd) +{ + return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); +} + static int recv_subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, u8 port) { @@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, (dev->mkeyprot_resv_lmc >> 6) == 0) pip->mkey = dev->mkey; pip->gid_prefix = dev->gid_prefix; - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; pip->sm_lid = cpu_to_be16(dev->sm_lid); pip->cap_mask = cpu_to_be32(dev->port_cap_flags); @@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->link_width_supported = 3; /* 1x or 4x */ pip->link_width_active = 2; /* 4x */ pip->linkspeed_portstate = 0x10; /* 2.5Gbps */ - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1; pip->portphysstate_linkdown = (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | - (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2); + (get_linkdowndefaultstate(dev->dd) ? 1 : 2); pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, pip->mkey_violations = cpu_to_be16(dev->mkey_violations); /* P_KeyViolations are counted by hardware. */ pip->pkey_violations = - cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) - + cpu_to_be16((ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations) & 0xFFFF); pip->qkey_violations = cpu_to_be16(dev->qkey_violations); /* Only the hardware GUID is supported for now */ @@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, /* 32.768 usec. response time (guessing) */ pip->resv_resptimevalue = 3; pip->localphyerrors_overrunerrors = - (ipath_layer_get_phyerrthreshold(dev->dd) << 4) | - ipath_layer_get_overrunthreshold(dev->dd); + (get_phyerrthreshold(dev->dd) << 4) | + get_overrunthreshold(dev->dd); /* pip->max_credit_hint; */ /* pip->link_roundtrip_latency[3]; */ @@ -237,6 +313,20 @@ bail: return ret; } +/** + * get_pkeys - return the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) +{ + struct ipath_portdata *pd = dd->ipath_pd[0]; + + memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); + + return 0; +} + static int recv_subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp, memset(smp->data, 0, sizeof(smp->data)); if (startpx == 0) { struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); - ipath_layer_get_pkeys(dev->dd, p); + get_pkeys(dev->dd, p); for (i = 0; i < n; i++) q[i] = cpu_to_be16(p[i]); @@ -268,6 +358,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp, return recv_subn_get_guidinfo(smp, ibdev); } +/** + * set_linkdowndefaultstate - set the default linkdown state + * @dd: the infinipath device + * @sleep: the new state + * + * Note that this will only take effect when the link state changes. + */ +static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep) +{ + if (sleep) + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + else + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + return 0; +} + /** * recv_subn_set_portinfo - set port information * @smp: the incoming SM packet @@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, u8 state; u16 lstate; u32 mtu; - int ret; + int ret, ore; if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) goto err; @@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); lid = be16_to_cpu(pip->lid); - if (lid != ipath_layer_get_lid(dev->dd)) { + if (lid != dev->dd->ipath_lid) { /* Must be a valid unicast LID address. */ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) goto err; @@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case 0: /* NOP */ break; case 1: /* SLEEP */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1)) + if (set_linkdowndefaultstate(dev->dd, 1)) goto err; break; case 2: /* POLL */ - if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0)) + if (set_linkdowndefaultstate(dev->dd, 0)) goto err; break; default: @@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* XXX We have already partially updated our state! */ goto err; } - ipath_layer_set_mtu(dev->dd, mtu); + ipath_set_mtu(dev->dd, mtu); dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; @@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * later. */ if (pip->pkey_violations == 0) - dev->z_pkey_violations = - ipath_layer_get_cr_errpkey(dev->dd); + dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd); if (pip->qkey_violations == 0) dev->qkey_violations = 0; - if (ipath_layer_set_phyerrthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors >> 4) & 0xF)) + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF)) goto err; - if (ipath_layer_set_overrunthreshold( - dev->dd, - (pip->localphyerrors_overrunerrors & 0xF))) + if (set_overrunthreshold(dev->dd, (ore & 0xF))) goto err; dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; @@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, * is down or is being set to down. */ state = pip->linkspeed_portstate & 0xF; - flags = ipath_layer_get_flags(dev->dd); + flags = dev->dd->ipath_flags; lstate = (pip->portphysstate_linkdown >> 4) & 0xF; if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) goto err; @@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, /* FALLTHROUGH */ case IB_PORT_DOWN: if (lstate == 0) - if (ipath_layer_get_linkdowndefaultstate(dev->dd)) + if (get_linkdowndefaultstate(dev->dd)) lstate = IPATH_IB_LINKDOWN_SLEEP; else lstate = IPATH_IB_LINKDOWN; @@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, lstate = IPATH_IB_LINKDOWN_DISABLE; else goto err; - ipath_layer_set_linkstate(dev->dd, lstate); + ipath_set_linkstate(dev->dd, lstate); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ARMED: if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM); if (flags & IPATH_LINKACTIVE) { event.event = IB_EVENT_PORT_ERR; ib_dispatch_event(&event); @@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp, case IB_PORT_ACTIVE: if (!(flags & IPATH_LINKARMED)) break; - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); event.event = IB_EVENT_PORT_ACTIVE; ib_dispatch_event(&event); break; @@ -493,6 +597,152 @@ done: return ret; } +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the infinipath device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (dd->ipath_pkeys[i] != key) + continue; + if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { + dd->ipath_pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the infinipath device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct ipath_devdata *dd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (dd->ipath_pkeys[i] == key) { + if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&dd->ipath_pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { + if (!dd->ipath_pkeys[i] && + atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { + /* for ipathstats, etc. */ + ipath_stats.sps_pkeys[i] = lkey; + dd->ipath_pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for port 0 + * @dd: the infinipath device + * @pkeys: the PKEY table + */ +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +{ + struct ipath_portdata *pd; + int i; + int changed = 0; + + pd = dd->ipath_pd[0]; + + for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = pd->port_pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(dd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(dd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + pd->port_pkeys[i] = key; + } + if (changed) { + u64 pkey; + + pkey = (u64) dd->ipath_pkeys[0] | + ((u64) dd->ipath_pkeys[1] << 16) | + ((u64) dd->ipath_pkeys[2] << 32) | + ((u64) dd->ipath_pkeys[3] << 48); + ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", + (unsigned long long) pkey); + ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, + pkey); + } + return 0; +} + static int recv_subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev) { @@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, __be16 *p = (__be16 *) smp->data; u16 *q = (u16 *) smp->data; struct ipath_ibdev *dev = to_idev(ibdev); - unsigned i, n = ipath_layer_get_npkeys(dev->dd); + unsigned i, n = ipath_get_npkeys(dev->dd); for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || - ipath_layer_set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; u8 port_select = p->port_select; - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); /* Adjust counters for any resets done. */ cntrs.symbol_error_counter -= dev->z_symbol_error_counter; @@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, u64 swords, rwords, spkts, rpkts, xwait; u8 port_select = p->port_select; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); /* Adjust counters for any resets done. */ swords -= dev->z_port_xmit_data; @@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) pmp->data; struct ipath_ibdev *dev = to_idev(ibdev); - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; /* * Since the HW doesn't support clearing counters, we save the * current count and subtract it from future responses. */ - ipath_layer_get_counters(dev->dd, &cntrs); + ipath_get_counters(dev->dd, &cntrs); if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) dev->z_symbol_error_counter = cntrs.symbol_error_counter; @@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp, struct ipath_ibdev *dev = to_idev(ibdev); u64 swords, rwords, spkts, rpkts, xwait; - ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, - &rpkts, &xwait); + ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts, + &rpkts, &xwait); if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) dev->z_port_xmit_data = swords; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 4ac31a5da330..b36f6fb3e37a 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -36,6 +36,18 @@ #include "ipath_verbs.h" +/* Fast memory region */ +struct ipath_fmr { + struct ib_fmr ibfmr; + u8 page_shift; + struct ipath_mregion mr; /* must be last */ +}; + +static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct ipath_fmr, ibfmr); +} + /** * ipath_get_dma_mr - get a DMA memory region * @pd: protection domain for this memory region diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 1ccfc909db1e..9228d59b59d4 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -461,7 +461,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto inval; if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) + if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; if (attr_mask & IB_QP_MIN_RNR_TIMER) @@ -644,6 +644,33 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) return cpu_to_be32(aeth); } +/** + * set_verbs_flags - set the verbs layer flags + * @dd: the infinipath device + * @flags: the flags to set + */ +static int set_verbs_flags(struct ipath_devdata *dd, unsigned flags) +{ + struct ipath_devdata *ss; + unsigned long lflags; + + spin_lock_irqsave(&ipath_devs_lock, lflags); + + list_for_each_entry(ss, &ipath_dev_list, ipath_list) { + if (!(ss->ipath_flags & IPATH_INITTED)) + continue; + if ((flags & IPATH_VERBS_KERNEL_SMA) && + !(*ss->ipath_statusp & IPATH_STATUS_SMA)) + *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; + else + *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; + } + + spin_unlock_irqrestore(&ipath_devs_lock, lflags); + + return 0; +} + /** * ipath_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for @@ -760,8 +787,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, /* Tell the core driver that the kernel SMA is present. */ if (init_attr->qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, - IPATH_VERBS_KERNEL_SMA); + set_verbs_flags(dev->dd, IPATH_VERBS_KERNEL_SMA); break; default: @@ -838,7 +864,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp) /* Tell the core driver that the kernel SMA is gone. */ if (qp->ibqp.qp_type == IB_QPT_SMI) - ipath_layer_set_verbs_flags(dev->dd, 0); + set_verbs_flags(dev->dd, 0); spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 774d1615ce2f..a08654042c03 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x @@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp) lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << @@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); + hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); @@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 4 * bytes of the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index dd09420d677d..5c1da2d25e03 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -470,6 +470,15 @@ done: wake_up(&qp->wait); } +static int want_buffer(struct ipath_devdata *dd) +{ + set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + + return 0; +} + /** * ipath_no_bufs_available - tell the layer driver we need buffers * @qp: the QP that caused the problem @@ -486,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) list_add_tail(&qp->piowait, &dev->piowait); spin_unlock_irqrestore(&dev->pending_lock, flags); /* - * Note that as soon as ipath_layer_want_buffer() is called and + * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() * could be called. If we are still in the tasklet function, * tasklet_hi_schedule() will not call us until the next time @@ -496,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) */ clear_bit(IPATH_S_BUSY, &qp->s_flags); tasklet_unlock(&qp->s_task); - ipath_layer_want_buffer(dev->dd); + want_buffer(dev->dd); dev->n_piowait++; } @@ -611,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = dev->gid_prefix; - hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd); + hdr->sgid.global.interface_id = dev->dd->ipath_guid; hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ @@ -643,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data) if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) goto bail; - if (unlikely(qp->remote_ah_attr.dlid == - ipath_layer_get_lid(dev->dd))) { + if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { ipath_ruc_loopback(qp); goto clear; } @@ -711,8 +719,8 @@ again: qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); - bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index b98821d7801d..7396a63840db 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -35,7 +35,6 @@ #include #include "ipath_kernel.h" -#include "ipath_layer.h" #include "ipath_common.h" /** @@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev, unit = dd->ipath_unit; dd->ipath_mlid = mlid; - ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST); goto bail; invalid: @@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_linkstate(dd, state); + r = ipath_set_linkstate(dd, state); if (r < 0) { ret = r; goto bail; @@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev, if (ret < 0) goto invalid; - r = ipath_layer_set_mtu(dd, mtu); + r = ipath_set_mtu(dd, mtu); if (r < 0) ret = r; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index c33abea2d5a7..0fd3cded16ba 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -32,7 +32,7 @@ */ #include "ipath_verbs.h" -#include "ipath_common.h" +#include "ipath_kernel.h" /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x @@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * size to 56 bytes so the last 4 bytes of * the BTH header (PSN) is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { psn = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 82439fcfc2f8..6991d1d74e3c 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -353,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) ss.num_sge++; } /* Check for invalid packet size. */ - if (len > ipath_layer_get_ibmtu(dev->dd)) { + if (len > dev->dd->ipath_ibmtu) { ret = -EINVAL; goto bail; } @@ -375,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) dev->n_unicast_xmit++; lid = ah_attr->dlid & ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid == ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid == dev->dd->ipath_lid)) { /* * Pass in an uninitialized ib_wc to save stack * space. @@ -404,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix; qp->s_hdr.u.l.grh.sgid.global.interface_id = - ipath_layer_get_guid(dev->dd); + dev->dd->ipath_guid; qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid; /* * Don't worry about sending to locally attached multicast @@ -434,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); - lid = ipath_layer_get_lid(dev->dd); + lid = dev->dd->ipath_lid; if (lid) { lid |= ah_attr->src_path_bits & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); @@ -445,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) bth0 |= 1 << 23; bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : - ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); + ipath_get_pkey(dev->dd, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -531,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * the eager header buffer size to 56 bytes so the last 12 * bytes of the IB header is in the data buffer. */ - header_in_data = - ipath_layer_get_rcvhdrentsize(dev->dd) == 16; + header_in_data = dev->dd->ipath_rcvhdrentsize == 16; if (header_in_data) { qkey = be32_to_cpu(((__be32 *) data)[1]); src_qp = be32_to_cpu(((__be32 *) data)[2]); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 15edec9227e4..3c47620e9887 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -33,15 +33,13 @@ #include #include +#include #include #include "ipath_kernel.h" #include "ipath_verbs.h" #include "ipath_common.h" -/* Not static, because we don't want the compiler removing it */ -const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR; - static unsigned int ib_ipath_qp_table_size = 251; module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -109,10 +107,6 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("QLogic "); -MODULE_DESCRIPTION("QLogic InfiniPath driver"); - const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, @@ -125,6 +119,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_ERR] = 0, }; +struct ipath_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct ipath_ucontext, ibucontext); +} + /* * Translate ib_wr_opcode into ib_wc_opcode. */ @@ -400,7 +404,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, lid = be16_to_cpu(hdr->lrh[1]); if (lid < IPATH_MULTICAST_LID_BASE) { lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); - if (unlikely(lid != ipath_layer_get_lid(dev->dd))) { + if (unlikely(lid != dev->dd->ipath_lid)) { dev->rcv_errors++; goto bail; } @@ -511,19 +515,19 @@ void ipath_ib_timer(struct ipath_ibdev *dev) if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && --dev->pma_sample_start == 0) { dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; - ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword, - &dev->ipath_rword, - &dev->ipath_spkts, - &dev->ipath_rpkts, - &dev->ipath_xmit_wait); + ipath_snapshot_counters(dev->dd, &dev->ipath_sword, + &dev->ipath_rword, + &dev->ipath_spkts, + &dev->ipath_rpkts, + &dev->ipath_xmit_wait); } if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { if (dev->pma_sample_interval == 0) { u64 ta, tb, tc, td, te; dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; - ipath_layer_snapshot_counters(dev->dd, &ta, &tb, - &tc, &td, &te); + ipath_snapshot_counters(dev->dd, &ta, &tb, + &tc, &td, &te); dev->ipath_sword = ta - dev->ipath_sword; dev->ipath_rword = tb - dev->ipath_rword; @@ -553,6 +557,362 @@ void ipath_ib_timer(struct ipath_ibdev *dev) } } +static void update_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, + u32 length) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + BUG_ON(len == 0); + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, + extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + __iowrite32_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + ipath_flush_wc(); +} + +/** + * ipath_verbs_send - send a packet + * @dd: the infinipath device + * @hdrwords: the number of words in the header + * @hdr: the packet header + * @len: the length of the packet in bytes + * @ss: the SGE to send + */ +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss) +{ + u32 __iomem *piobuf; + u32 plen; + int ret; + + /* +1 is for the qword padding of pbc */ + plen = hdrwords + ((len + 3) >> 2) + 1; + if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { + ipath_dbg("packet len 0x%x too long, failing\n", plen); + ret = -EINVAL; + goto bail; + } + + /* Get a PIO buffer to use. */ + piobuf = ipath_getpiobuf(dd, NULL); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto bail; + } + + /* + * Write len to control qword, no flags. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(plen, piobuf); + ipath_flush_wc(); + piobuf += 2; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + __iowrite32_copy(piobuf, hdr, hdrwords - 1); + ipath_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + ipath_flush_wc(); + ret = 0; + goto bail; + } + + __iowrite32_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 w; + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + /* Need to round up for the last dword in the packet. */ + w = (len + 3) >> 2; + __iowrite32_copy(piobuf, addr, w - 1); + /* must flush early everything before trigger word */ + ipath_flush_wc(); + __raw_writel(addr[w - 1], piobuf + w - 1); + /* be sure trigger word is written */ + ipath_flush_wc(); + ret = 0; + goto bail; + } + copy_io(piobuf, ss, len); + ret = 0; + +bail: + return ret; +} + +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); + + ret = 0; + +bail: + return ret; +} + +/** + * ipath_get_counters - get various chip counters + * @dd: the infinipath device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs) +{ + int ret; + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_dbg("unit %u not usable\n", dd->ipath_unit); + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); + cntrs->link_error_recovery_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); + cntrs->port_rcv_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + + ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); + cntrs->port_rcv_remphys_errors = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); + cntrs->port_xmit_discards = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); + cntrs->port_xmit_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + cntrs->port_rcv_data = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + cntrs->port_xmit_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); + cntrs->port_rcv_packets = + ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); + cntrs->local_link_integrity_errors = dd->ipath_lli_errors; + cntrs->excessive_buffer_overrun_errors = 0; /* XXX */ + + ret = 0; + +bail: + return ret; +} + /** * ipath_ib_piobufavail - callback when a PIO buffer is available * @arg: the device pointer @@ -595,9 +955,9 @@ static int ipath_query_device(struct ib_device *ibdev, IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID; props->page_size_cap = PAGE_SIZE; - props->vendor_id = ipath_layer_get_vendorid(dev->dd); - props->vendor_part_id = ipath_layer_get_deviceid(dev->dd); - props->hw_ver = ipath_layer_get_pcirev(dev->dd); + props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_part_id = dev->dd->ipath_deviceid; + props->hw_ver = dev->dd->ipath_pcirev; props->sys_image_guid = dev->sys_image_guid; @@ -618,7 +978,7 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_srq_sge = ib_ipath_max_srq_sges; /* props->local_ca_ack_delay */ props->atomic_cap = IB_ATOMIC_HCA; - props->max_pkeys = ipath_layer_get_npkeys(dev->dd); + props->max_pkeys = ipath_get_npkeys(dev->dd); props->max_mcast_grp = ib_ipath_max_mcast_grps; props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * @@ -643,12 +1003,17 @@ const u8 ipath_cvt_physportstate[16] = { [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, }; +u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) +{ + return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); +} + static int ipath_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct ipath_ibdev *dev = to_idev(ibdev); enum ib_mtu mtu; - u16 lid = ipath_layer_get_lid(dev->dd); + u16 lid = dev->dd->ipath_lid; u64 ibcstat; memset(props, 0, sizeof(*props)); @@ -656,16 +1021,16 @@ static int ipath_query_port(struct ib_device *ibdev, props->lmc = dev->mkeyprot_resv_lmc & 7; props->sm_lid = dev->sm_lid; props->sm_sl = dev->sm_sl; - ibcstat = ipath_layer_get_lastibcstat(dev->dd); + ibcstat = dev->dd->ipath_lastibcstat; props->state = ((ibcstat >> 4) & 0x3) + 1; /* See phys_state_show() */ props->phys_state = ipath_cvt_physportstate[ - ipath_layer_get_lastibcstat(dev->dd) & 0xf]; + dev->dd->ipath_lastibcstat & 0xf]; props->port_cap_flags = dev->port_cap_flags; props->gid_tbl_len = 1; props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd); - props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) - + props->pkey_tbl_len = ipath_get_npkeys(dev->dd); + props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) - dev->z_pkey_violations; props->qkey_viol_cntr = dev->qkey_violations; props->active_width = IB_WIDTH_4X; @@ -675,7 +1040,7 @@ static int ipath_query_port(struct ib_device *ibdev, props->init_type_reply = 0; props->max_mtu = IB_MTU_4096; - switch (ipath_layer_get_ibmtu(dev->dd)) { + switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; @@ -734,7 +1099,7 @@ static int ipath_modify_port(struct ib_device *ibdev, dev->port_cap_flags |= props->set_port_cap_mask; dev->port_cap_flags &= ~props->clr_port_cap_mask; if (port_modify_mask & IB_PORT_SHUTDOWN) - ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); + ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) dev->qkey_violations = 0; return 0; @@ -751,7 +1116,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port, goto bail; } gid->global.subnet_prefix = dev->gid_prefix; - gid->global.interface_id = ipath_layer_get_guid(dev->dd); + gid->global.interface_id = dev->dd->ipath_guid; ret = 0; @@ -902,25 +1267,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * ipath_get_npkeys - return the size of the PKEY table for port 0 + * @dd: the infinipath device + */ +unsigned ipath_get_npkeys(struct ipath_devdata *dd) +{ + return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); +} + +/** + * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * @dd: the infinipath device + * @index: the PKEY index + */ +unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) +{ + unsigned ret; + + if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) + ret = 0; + else + ret = dd->ipath_pd[0]->port_pkeys[index]; + + return ret; +} + static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { struct ipath_ibdev *dev = to_idev(ibdev); int ret; - if (index >= ipath_layer_get_npkeys(dev->dd)) { + if (index >= ipath_get_npkeys(dev->dd)) { ret = -EINVAL; goto bail; } - *pkey = ipath_layer_get_pkey(dev->dd, index); + *pkey = ipath_get_pkey(dev->dd, index); ret = 0; bail: return ret; } - /** * ipath_alloc_ucontext - allocate a ucontest * @ibdev: the infiniband device @@ -953,6 +1343,63 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context) static int ipath_verbs_register_sysfs(struct ib_device *dev); +static void __verbs_timer(unsigned long arg) +{ + struct ipath_devdata *dd = (struct ipath_devdata *) arg; + + /* + * If port 0 receive packet interrupts are not available, or + * can be missed, poll the receive queue + */ + if (dd->ipath_flags & IPATH_POLL_RX_INTR) + ipath_kreceive(dd); + + /* Handle verbs layer timeouts. */ + ipath_ib_timer(dd->verbs_dev); + + mod_timer(&dd->verbs_timer, jiffies + 1); +} + +static int enable_timer(struct ipath_devdata *dd) +{ + /* + * Early chips had a design flaw where the chip and kernel idea + * of the tail register don't always agree, and therefore we won't + * get an interrupt on the next packet received. + * If the board supports per packet receive interrupts, use it. + * Otherwise, the timer function periodically checks for packets + * to cover this case. + * Either way, the timer is needed for verbs layer related + * processing. + */ + if (dd->ipath_flags & IPATH_GPIO_INTR) { + ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, + 0x2074076542310ULL); + /* Enable GPIO bit 2 interrupt */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + (u64) (1 << 2)); + } + + init_timer(&dd->verbs_timer); + dd->verbs_timer.function = __verbs_timer; + dd->verbs_timer.data = (unsigned long)dd; + dd->verbs_timer.expires = jiffies + 1; + add_timer(&dd->verbs_timer); + + return 0; +} + +static int disable_timer(struct ipath_devdata *dd) +{ + /* Disable GPIO bit 2 interrupt */ + if (dd->ipath_flags & IPATH_GPIO_INTR) + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); + + del_timer_sync(&dd->verbs_timer); + + return 0; +} + /** * ipath_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -960,7 +1407,7 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev); */ int ipath_register_ib_device(struct ipath_devdata *dd) { - struct ipath_layer_counters cntrs; + struct ipath_verbs_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; int ret; @@ -1020,7 +1467,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) idev->link_width_enabled = 3; /* 1x or 4x */ /* Snapshot current HW counters to "clear" them. */ - ipath_layer_get_counters(dd, &cntrs); + ipath_get_counters(dd, &cntrs); idev->z_symbol_error_counter = cntrs.symbol_error_counter; idev->z_link_error_recovery_counter = cntrs.link_error_recovery_counter; @@ -1044,14 +1491,14 @@ int ipath_register_ib_device(struct ipath_devdata *dd) * device types in the system, we can't be sure this is unique. */ if (!sys_image_guid) - sys_image_guid = ipath_layer_get_guid(dd); + sys_image_guid = dd->ipath_guid; idev->sys_image_guid = sys_image_guid; idev->ib_unit = dd->ipath_unit; idev->dd = dd; strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); dev->owner = THIS_MODULE; - dev->node_guid = ipath_layer_get_guid(dd); + dev->node_guid = dd->ipath_guid; dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; dev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1085,7 +1532,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); dev->node_type = IB_NODE_CA; dev->phys_port_cnt = 1; - dev->dma_device = ipath_layer_get_device(dd); + dev->dma_device = &dd->pcidev->dev; dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; @@ -1139,7 +1586,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) if (ipath_verbs_register_sysfs(dev)) goto err_class; - ipath_layer_enable_timer(dd); + enable_timer(dd); goto bail; @@ -1164,7 +1611,7 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) { struct ib_device *ibdev = &dev->ibdev; - ipath_layer_disable_timer(dev->dd); + disable_timer(dev->dd); ib_unregister_device(ibdev); @@ -1197,7 +1644,7 @@ static ssize_t show_rev(struct class_device *cdev, char *buf) struct ipath_ibdev *dev = container_of(cdev, struct ipath_ibdev, ibdev.class_dev); - return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd)); + return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); } static ssize_t show_hca(struct class_device *cdev, char *buf) @@ -1206,7 +1653,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) container_of(cdev, struct ipath_ibdev, ibdev.class_dev); int ret; - ret = ipath_layer_get_boardname(dev->dd, buf, 128); + ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); if (ret < 0) goto bail; strcat(buf, "\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index d6faa4ba6067..00f4cecc258e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -153,19 +153,6 @@ struct ipath_mcast { int n_attached; }; -/* Memory region */ -struct ipath_mr { - struct ib_mr ibmr; - struct ipath_mregion mr; /* must be last */ -}; - -/* Fast memory region */ -struct ipath_fmr { - struct ib_fmr ibfmr; - u8 page_shift; - struct ipath_mregion mr; /* must be last */ -}; - /* Protection domain */ struct ipath_pd { struct ib_pd ibpd; @@ -216,6 +203,54 @@ struct ipath_cq { struct ipath_mmap_info *ip; }; +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct ipath_seg { + void *vaddr; + size_t length; +}; + +/* The number of ipath_segs that fit in a page. */ +#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) + +struct ipath_segarray { + struct ipath_seg segs[IPATH_SEGSZ]; +}; + +struct ipath_mregion { + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of ipath_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + struct ipath_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct ipath_sge { + struct ipath_mregion *mr; + void *vaddr; /* current pointer into the segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct ipath_mr { + struct ib_mr ibmr; + struct ipath_mregion mr; /* must be last */ +}; + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -270,6 +305,12 @@ struct ipath_srq { u32 limit; }; +struct ipath_sge_state { + struct ipath_sge *sg_list; /* next SGE to be used if any */ + struct ipath_sge sge; /* progress state for the current SGE */ + u8 num_sge; +}; + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). @@ -500,8 +541,19 @@ struct ipath_ibdev { struct ipath_opcode_stats opstats[128]; }; -struct ipath_ucontext { - struct ib_ucontext ibucontext; +struct ipath_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; }; static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) @@ -509,11 +561,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) return container_of(ibmr, struct ipath_mr, ibmr); } -static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct ipath_fmr, ibfmr); -} - static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) { return container_of(ibpd, struct ipath_pd, ibpd); @@ -551,12 +598,6 @@ int ipath_process_mad(struct ib_device *ibdev, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); -static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext - *ibucontext) -{ - return container_of(ibucontext, struct ipath_ucontext, ibucontext); -} - /* * Compare the lower 24 bits of the two values. * Returns an integer <, ==, or > than zero. @@ -568,6 +609,13 @@ static inline int ipath_cmp24(u32 a, u32 b) struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); +int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int ipath_get_counters(struct ipath_devdata *dd, + struct ipath_verbs_counters *cntrs); + int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); @@ -598,6 +646,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); void ipath_get_credit(struct ipath_qp *qp, u32 aeth); +int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, + u32 *hdr, u32 len, struct ipath_sge_state *ss); + void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, @@ -721,6 +772,12 @@ int ipath_ib_piobufavail(struct ipath_ibdev *); void ipath_ib_timer(struct ipath_ibdev *); +unsigned ipath_get_npkeys(struct ipath_devdata *); + +u32 ipath_get_cr_errpkey(struct ipath_devdata *); + +unsigned ipath_get_pkey(struct ipath_devdata *, unsigned); + extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; extern const u8 ipath_cvt_physportstate[]; From b55f4f06c834a67f949a5219c5f97ffafa240989 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:33 -0700 Subject: [PATCH 0748/1063] IB/ipath: simplify debugging code after ipath_core and ib_ipath merger Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 4 +- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 +- drivers/infiniband/hw/ipath/ipath_keys.c | 3 +- drivers/infiniband/hw/ipath/ipath_qp.c | 10 +- drivers/infiniband/hw/ipath/ipath_sysfs.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 19 +--- drivers/infiniband/hw/ipath/ipath_verbs.h | 1 - drivers/infiniband/hw/ipath/verbs_debug.h | 108 --------------------- 8 files changed, 17 insertions(+), 134 deletions(-) delete mode 100644 drivers/infiniband/hw/ipath/verbs_debug.h diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 958cc9b33c8f..e6261bb9a8a1 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -58,7 +58,7 @@ const char *ipath_get_unit_name(int unit) * The size has to be longer than this string, so we can append * board/chip information to it in the init code. */ -const char ipath_core_version[] = IPATH_IDSTR "\n"; +const char ib_ipath_version[] = IPATH_IDSTR "\n"; static struct idr unit_table; DEFINE_SPINLOCK(ipath_devs_lock); @@ -1847,7 +1847,7 @@ static int __init infinipath_init(void) { int ret; - ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version); + ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); /* * These must be called before the driver is registered with diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 999249b7f27f..af342314b368 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -785,7 +785,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, struct device_driver; -extern const char ipath_core_version[]; +extern const char ib_ipath_version[]; int ipath_driver_create_group(struct device_driver *); void ipath_driver_remove_group(struct device_driver *); @@ -815,7 +815,7 @@ const char *ipath_get_unit_name(int unit); extern struct mutex ipath_mutex; -#define IPATH_DRV_NAME "ipath_core" +#define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 #define IPATH_SMA_MINOR 128 diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index a5ca279370aa..ba1b93226caa 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -34,6 +34,7 @@ #include #include "ipath_verbs.h" +#include "ipath_kernel.h" /** * ipath_alloc_lkey - allocate an lkey @@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) r = (r + 1) & (rkt->max - 1); if (r == n) { spin_unlock_irqrestore(&rkt->lock, flags); - _VERBS_INFO("LKEY table full\n"); + ipath_dbg(KERN_INFO "LKEY table full\n"); ret = 0; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 9228d59b59d4..b86858e70f2a 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -274,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt) free_qpn(qpt, qp->ibqp.qp_num); if (!atomic_dec_and_test(&qp->refcount) || !ipath_destroy_qp(&qp->ibqp)) - _VERBS_INFO("QP memory leak!\n"); + ipath_dbg(KERN_INFO "QP memory leak!\n"); qp = nqp; } } @@ -362,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; - _VERBS_INFO("QP%d/%d in error state\n", - qp->ibqp.qp_num, qp->remote_qpn); + ipath_dbg(KERN_INFO "QP%d/%d in error state\n", + qp->ibqp.qp_num, qp->remote_qpn); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ @@ -945,8 +945,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); + ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", + qp->ibqp.qp_num, qp->remote_qpn, wc->status); spin_lock(&dev->pending_lock); /* XXX What if its already removed by the timeout code? */ diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 7396a63840db..56f12202ff49 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -75,7 +75,7 @@ bail: static ssize_t show_version(struct device_driver *dev, char *buf) { /* The string printed here is already newline-terminated. */ - return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version); + return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version); } static ssize_t show_num_units(struct device_driver *dev, char *buf) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 3c47620e9887..1776330914e5 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -50,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, MODULE_PARM_DESC(lkey_table_size, "LKEY table size in bits (2^n, 1 <= n <= 23)"); -unsigned int ib_ipath_debug; /* debug mask */ -module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(debug, "Verbs debug mask"); - static unsigned int ib_ipath_max_pds = 0xFFFF; module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_pds, @@ -1598,8 +1594,7 @@ err_lk: kfree(idev->qp_table.table); err_qp: ib_dealloc_device(dev); - _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", - dd->ipath_unit, -ret); + ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); idev = NULL; bail: @@ -1618,17 +1613,13 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) - _VERBS_ERROR("ipath%d pending list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "pending list not empty!\n"); if (!list_empty(&dev->piowait)) - _VERBS_ERROR("ipath%d piowait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "piowait list not empty!\n"); if (!list_empty(&dev->rnrwait)) - _VERBS_ERROR("ipath%d rnrwait list not empty!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); if (!ipath_mcast_tree_empty()) - _VERBS_ERROR("ipath%d multicast table memory leak!\n", - dev->ib_unit); + ipath_dev_err(dev->dd, "multicast table memory leak!\n"); /* * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 00f4cecc258e..9cc0dbfe8602 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -42,7 +42,6 @@ #include #include "ipath_layer.h" -#include "verbs_debug.h" #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h deleted file mode 100644 index 6186676f2a16..000000000000 --- a/drivers/infiniband/hw/ipath/verbs_debug.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _VERBS_DEBUG_H -#define _VERBS_DEBUG_H - -/* - * This file contains tracing code for the ib_ipath kernel module. - */ -#ifndef _VERBS_DEBUGGING /* tracing enabled or not */ -#define _VERBS_DEBUGGING 1 -#endif - -extern unsigned ib_ipath_debug; - -#define _VERBS_ERROR(fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_UNIT_ERROR(unit,fmt,...) \ - do { \ - printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ - } while(0) - -#if _VERBS_DEBUGGING - -/* - * Mask values for debugging. The scheme allows us to compile out any - * of the debug tracing stuff, and if compiled in, to enable or - * disable dynamically. - * This can be set at modprobe time also: - * modprobe ib_path ib_ipath_debug=3 - */ - -#define __VERBS_INFO 0x1 /* generic low verbosity stuff */ -#define __VERBS_DBG 0x2 /* generic debug */ -#define __VERBS_VDBG 0x4 /* verbose debug */ -#define __VERBS_SMADBG 0x8000 /* sma packet debug */ - -#define _VERBS_INFO(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_INFO)) \ - printk(KERN_INFO "%s: " fmt,"ib_ipath", \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_DBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_DBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_VDBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#define _VERBS_SMADBG(fmt,...) \ - do { \ - if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \ - printk(KERN_DEBUG "%s: " fmt, __func__, \ - ##__VA_ARGS__); \ - } while(0) - -#else /* ! _VERBS_DEBUGGING */ - -#define _VERBS_INFO(fmt,...) -#define _VERBS_DBG(fmt,...) -#define _VERBS_VDBG(fmt,...) -#define _VERBS_SMADBG(fmt,...) - -#endif /* _VERBS_DEBUGGING */ - -#endif /* _VERBS_DEBUG_H */ From 0fd41363e0785247b7c19127318abc8b5eacc86b Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:34 -0700 Subject: [PATCH 0749/1063] IB/ipath: remove stale references to userspace SMA When we first submitted a userspace subnet management agent, it was rejected, so we left it out of the final driver submission. This patch removes a number of vestigial references to it. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 16 ++------- drivers/infiniband/hw/ipath/ipath_debug.h | 2 -- drivers/infiniband/hw/ipath/ipath_driver.c | 17 ++++----- drivers/infiniband/hw/ipath/ipath_file_ops.c | 4 +-- drivers/infiniband/hw/ipath/ipath_fs.c | 4 +-- drivers/infiniband/hw/ipath/ipath_init_chip.c | 12 +++---- drivers/infiniband/hw/ipath/ipath_intr.c | 14 ++++---- drivers/infiniband/hw/ipath/ipath_kernel.h | 11 ++---- drivers/infiniband/hw/ipath/ipath_layer.c | 3 -- drivers/infiniband/hw/ipath/ipath_layer.h | 3 -- drivers/infiniband/hw/ipath/ipath_qp.c | 35 ------------------- drivers/infiniband/hw/ipath/ipath_stats.c | 27 -------------- drivers/infiniband/hw/ipath/ipath_sysfs.c | 4 +-- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +- 14 files changed, 35 insertions(+), 119 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 062bd392e7e5..f8df3b771c26 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -106,9 +106,9 @@ struct infinipath_stats { __u64 sps_ether_spkts; /* number of "ethernet" packets received by driver */ __u64 sps_ether_rpkts; - /* number of SMA packets sent by driver */ + /* number of SMA packets sent by driver. Obsolete. */ __u64 sps_sma_spkts; - /* number of SMA packets received by driver */ + /* number of SMA packets received by driver. Obsolete. */ __u64 sps_sma_rpkts; /* number of times all ports rcvhdrq was full and packet dropped */ __u64 sps_hdrqfull; @@ -138,7 +138,7 @@ struct infinipath_stats { __u64 sps_pageunlocks; /* * Number of packets dropped in kernel other than errors (ether - * packets if ipath not configured, sma/mad, etc.) + * packets if ipath not configured, etc.) */ __u64 sps_krdrops; /* pad for future growth */ @@ -153,8 +153,6 @@ struct infinipath_stats { #define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ /* Device has been disabled via admin request */ #define IPATH_STATUS_ADMIN_DISABLED 0x4 -#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */ -#define IPATH_STATUS_SMA 0x10 /* user SMA running */ /* Chip has been found and initted */ #define IPATH_STATUS_CHIP_PRESENT 0x20 /* IB link is at ACTIVE, usable for data traffic */ @@ -465,14 +463,6 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; -/* Passed into SMA special file's ->read and ->write methods. */ -struct ipath_sma_pkt -{ - __u32 unit; /* unit on which to send packet */ - __u64 data; /* address of payload in userspace */ - __u32 len; /* length of payload */ -}; - /* * Data layout in I2C flash (for GUID, etc.) * All fields are little-endian binary unless otherwise stated diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index f415beda0d32..df69f0d80b8b 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -60,7 +60,6 @@ #define __IPATH_USER_SEND 0x1000 /* use user mode send */ #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x8000 /* sma packet debug */ #define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ #define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ #define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ @@ -84,7 +83,6 @@ /* print mmap/nopage stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x0 #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ -#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */ #define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ #define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ #define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index e6261bb9a8a1..520c38f13868 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -64,7 +64,7 @@ static struct idr unit_table; DEFINE_SPINLOCK(ipath_devs_lock); LIST_HEAD(ipath_dev_list); -wait_queue_head_t ipath_sma_state_wait; +wait_queue_head_t ipath_state_wait; unsigned ipath_debug = __IPATH_INFO; @@ -618,15 +618,16 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) { - dd->ipath_sma_state_wanted = state; - wait_event_interruptible_timeout(ipath_sma_state_wait, + dd->ipath_state_wanted = state; + wait_event_interruptible_timeout(ipath_state_wait, (dd->ipath_flags & state), msecs_to_jiffies(msecs)); - dd->ipath_sma_state_wanted = 0; + dd->ipath_state_wanted = 0; if (!(dd->ipath_flags & state)) { u64 val; - ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n", + ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" + " ms\n", /* test INIT ahead of DOWN, both can be set */ (state & IPATH_LINKINIT) ? "INIT" : ((state & IPATH_LINKDOWN) ? "DOWN" : @@ -1155,7 +1156,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) * * do appropriate marking as busy, etc. * returns buffer number if one found (>=0), negative number is error. - * Used by ipath_sma_send_pkt and ipath_layer_send + * Used by ipath_layer_send */ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) { @@ -1448,7 +1449,7 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & INFINIPATH_IBCC_LINKCMD_MASK; - ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " + ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " "is %s\n", dd->ipath_unit, what[linkcmd], ipath_ibcstatus_str[ @@ -1457,7 +1458,7 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); /* flush all queued sends when going to DOWN or INIT, to be sure that - * they don't block SMA and other MAD packets */ + * they don't block MAD packets */ if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, INFINIPATH_S_ABORT); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index f865ce89b73f..4080fed2dcd9 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1816,7 +1816,7 @@ int ipath_user_add(struct ipath_devdata *dd) if (ret < 0) { ipath_dev_err(dd, "Could not create wildcard " "minor: error %d\n", -ret); - goto bail_sma; + goto bail_user; } atomic_set(&user_setup, 1); @@ -1832,7 +1832,7 @@ int ipath_user_add(struct ipath_devdata *dd) goto bail; -bail_sma: +bail_user: user_cleanup(); bail: return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 0936d8e8d704..a5eb30a06a5c 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, portinfo[4] = (dd->ipath_lid << 16); /* - * Notimpl yet SMLID (should we store this in the driver, in case - * SMA dies?) CapabilityMask is 0, we don't support any of these + * Notimpl yet SMLID. + * CapabilityMask is 0, we don't support any of these * DiagCode is 0; we don't store any diag info for now Notimpl yet * M_KeyLeasePeriod (we don't support M_Key) */ diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 75c372136702..44669dc2e22d 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); /* - * Number of buffers reserved for driver (layered drivers and SMA - * send). Reserved at end of buffer list. Initialized based on + * Number of buffers reserved for driver (verbs and layered drivers.) + * Reserved at end of buffer list. Initialized based on * number of PIO buffers if not set via module interface. * The problem with this is that it's global, but we'll use different * numbers for different chip types. So the default value is not @@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); * * Allocate the eager TID buffers and program them into infinipath. * We use the network layer alloc_skb() allocator to allocate the - * memory, and either use the buffers as is for things like SMA + * memory, and either use the buffers as is for things like verbs * packets, or pass the buffers up to the ipath layered driver and * thence the network layer, replacing them as we do so (see * ipath_rcv_layer()). @@ -450,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd, u32 val; int i; - if (!reinit) { - init_waitqueue_head(&ipath_sma_state_wait); - } + if (!reinit) + init_waitqueue_head(&ipath_state_wait); + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 250e2a9f01bb..49bf7bb15b04 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, ib_linkstate(lstate)); } else - ipath_cdbg(SMA, "Unit %u link state %s, last " + ipath_cdbg(VERBOSE, "Unit %u link state %s, last " "was %s\n", dd->ipath_unit, ib_linkstate(lstate), ib_linkstate((unsigned) @@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || lstate == IPATH_IBSTATE_ACTIVE) - ipath_cdbg(SMA, "Unit %u link state down" + ipath_cdbg(VERBOSE, "Unit %u link state down" " (state 0x%x), from %s\n", dd->ipath_unit, (u32)val & IPATH_IBSTATE_MASK, @@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, INFINIPATH_IBCS_LINKSTATE_MASK) == INFINIPATH_IBCS_L_STATE_ACTIVE) /* if from up to down be more vocal */ - ipath_cdbg(SMA, + ipath_cdbg(VERBOSE, "Unit %u link now down (%s)\n", dd->ipath_unit, ipath_ibcstatus_str[ltstate]); @@ -596,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (!noprint && *msg) ipath_dev_err(dd, "%s error\n", msg); - if (dd->ipath_sma_state_wanted & dd->ipath_flags) { - ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, " - "waking\n", dd->ipath_sma_state_wanted, + if (dd->ipath_state_wanted & dd->ipath_flags) { + ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " + "waking\n", dd->ipath_state_wanted, dd->ipath_flags); - wake_up_interruptible(&ipath_sma_state_wait); + wake_up_interruptible(&ipath_state_wait); } return chkerrpkts; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index af342314b368..a600347bba6a 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -245,8 +245,8 @@ struct ipath_devdata { u32 ipath_pioavregs; /* IPATH_POLL, etc. */ u32 ipath_flags; - /* ipath_flags sma is waiting for */ - u32 ipath_sma_state_wanted; + /* ipath_flags driver is waiting for */ + u32 ipath_state_wanted; /* last buffer for user use, first buf for kernel use is this * index. */ u32 ipath_lastport_piobuf; @@ -306,10 +306,6 @@ struct ipath_devdata { u32 ipath_pcibar0; /* so we can rewrite it after a chip reset */ u32 ipath_pcibar1; - /* sequential tries for SMA send and no bufs */ - u32 ipath_nosma_bufs; - /* duration (seconds) ipath_nosma_bufs set */ - u32 ipath_nosma_secs; /* HT/PCI Vendor ID (here for NodeInfo) */ u16 ipath_vendorid; @@ -534,7 +530,7 @@ int ipath_diag_add(struct ipath_devdata *); void ipath_diag_remove(struct ipath_devdata *); void ipath_diag_bringup_link(struct ipath_devdata *); -extern wait_queue_head_t ipath_sma_state_wait; +extern wait_queue_head_t ipath_state_wait; int ipath_user_add(struct ipath_devdata *dd); void ipath_user_remove(struct ipath_devdata *dd); @@ -818,7 +814,6 @@ extern struct mutex ipath_mutex; #define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 -#define IPATH_SMA_MINOR 128 #define IPATH_DIAG_MINOR_BASE 129 #define IPATH_NMINORS 255 diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index 10f578e2aed6..e46aa4ed2a7e 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -162,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), if (dd->ipath_layer.l_arg) continue; - if (!(*dd->ipath_statusp & IPATH_STATUS_SMA)) - *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA; - spin_unlock_irqrestore(&ipath_devs_lock, flags); dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); spin_lock_irqsave(&ipath_devs_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 4a27ede49941..3854a4eae684 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -66,9 +66,6 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); #define IPATH_LAYER_INT_SEND_CONTINUE 0x10 #define IPATH_LAYER_INT_BCAST 0x40 -/* _verbs_layer.l_flags */ -#define IPATH_VERBS_KERNEL_SMA 0x1 - extern unsigned ipath_debug; /* debugging bit mask */ #endif /* _IPATH_LAYER_H */ diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index b86858e70f2a..c0267cf8ca8c 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -644,33 +644,6 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp) return cpu_to_be32(aeth); } -/** - * set_verbs_flags - set the verbs layer flags - * @dd: the infinipath device - * @flags: the flags to set - */ -static int set_verbs_flags(struct ipath_devdata *dd, unsigned flags) -{ - struct ipath_devdata *ss; - unsigned long lflags; - - spin_lock_irqsave(&ipath_devs_lock, lflags); - - list_for_each_entry(ss, &ipath_dev_list, ipath_list) { - if (!(ss->ipath_flags & IPATH_INITTED)) - continue; - if ((flags & IPATH_VERBS_KERNEL_SMA) && - !(*ss->ipath_statusp & IPATH_STATUS_SMA)) - *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; - else - *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; - } - - spin_unlock_irqrestore(&ipath_devs_lock, lflags); - - return 0; -} - /** * ipath_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for @@ -784,10 +757,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, } qp->ip = NULL; ipath_reset_qp(qp); - - /* Tell the core driver that the kernel SMA is present. */ - if (init_attr->qp_type == IB_QPT_SMI) - set_verbs_flags(dev->dd, IPATH_VERBS_KERNEL_SMA); break; default: @@ -862,10 +831,6 @@ int ipath_destroy_qp(struct ib_qp *ibqp) struct ipath_ibdev *dev = to_idev(ibqp->device); unsigned long flags; - /* Tell the core driver that the kernel SMA is gone. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) - set_verbs_flags(dev->dd, 0); - spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; spin_unlock_irqrestore(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 70351b7e35c0..30a825928fcf 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque) } } - if (dd->ipath_nosma_bufs) { - dd->ipath_nosma_secs += 5; - if (dd->ipath_nosma_secs >= 30) { - ipath_cdbg(SMA, "No SMA bufs avail %u seconds; " - "cancelling pending sends\n", - dd->ipath_nosma_secs); - /* - * issue an abort as well, in case we have a packet - * stuck in launch fifo. This could corrupt an - * outgoing user packet in the worst case, - * but this is a pretty catastrophic, anyway. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k - - dd->ipath_lastport_piobuf); - /* start again, if necessary */ - dd->ipath_nosma_secs = 0; - } else - ipath_cdbg(SMA, "No SMA bufs avail %u tries, " - "after %u seconds\n", - dd->ipath_nosma_bufs, - dd->ipath_nosma_secs); - } - done: mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 56f12202ff49..8476dd3c7af4 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -107,8 +107,8 @@ static const char *ipath_status_str[] = { "Initted", "Disabled", "Admin_Disabled", - "OIB_SMA", - "SMA", + "", /* This used to be the old "OIB_SMA" status. */ + "", /* This used to be the old "SMA" status. */ "Present", "IB_link_up", "IB_configured", diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 1776330914e5..ab0006288b41 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1573,7 +1573,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->mmap = ipath_mmap; snprintf(dev->node_desc, sizeof(dev->node_desc), - IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); + IPATH_IDSTR " %s", system_utsname.nodename); ret = ib_register_device(dev); if (ret) From 32c0a26c8f91dbc2797175c2bdff42b54f66c71d Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:35 -0700 Subject: [PATCH 0750/1063] IB/ipath: trivial cleanups Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_kernel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index a600347bba6a..0ae2729c7ea2 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -528,7 +528,6 @@ void ipath_cdev_cleanup(struct cdev **cdevp, int ipath_diag_add(struct ipath_devdata *); void ipath_diag_remove(struct ipath_devdata *); -void ipath_diag_bringup_link(struct ipath_devdata *); extern wait_queue_head_t ipath_state_wait; From 98341f261893acd7bc5abee5ddc35337ef49e457 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:36 -0700 Subject: [PATCH 0751/1063] IB/ipath: add new minor device to allow sending of diag packets Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 7 + drivers/infiniband/hw/ipath/ipath_diag.c | 153 +++++++++++++++++++++ drivers/infiniband/hw/ipath/ipath_driver.c | 12 ++ drivers/infiniband/hw/ipath/ipath_kernel.h | 4 + 4 files changed, 176 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index f8df3b771c26..f577905e3aca 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -463,6 +463,13 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; +/* Passed into diag data special file's ->write method. */ +struct ipath_diag_pkt { + __u32 unit; + __u64 data; + __u32 len; +}; + /* * Data layout in I2C flash (for GUID, etc.) * All fields are little-endian binary unless otherwise stated diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 5d77a74aa57b..28b6b46c106a 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -41,6 +41,7 @@ * through the /sys/bus/pci resource mmap interface. */ +#include #include #include @@ -273,6 +274,158 @@ bail: return ret; } +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off); + +static struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = ipath_diagpkt_write, +}; + +static struct cdev *diagpkt_cdev; +static struct class_device *diagpkt_class_dev; + +int __init ipath_diagpkt_add(void) +{ + return ipath_cdev_init(IPATH_DIAGPKT_MINOR, + "ipath_diagpkt", &diagpkt_file_ops, + &diagpkt_cdev, &diagpkt_class_dev); +} + +void __exit ipath_diagpkt_remove(void) +{ + ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev); +} + +/** + * ipath_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: ipath_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t ipath_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, clen, pbufn; + struct ipath_diag_pkt dp; + u32 *tmpbuf = NULL; + struct ipath_devdata *dd; + ssize_t ret = 0; + u64 val; + + if (count < sizeof(dp)) { + ret = -EINVAL; + goto bail; + } + + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + + clen = dp.len >> 2; + + dd = ipath_lookup(dp.unit); + if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || + !dd->ipath_kregbase) { + ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n", + dp.unit); + ret = -ENODEV; + goto bail; + } + + if (ipath_diag_inuse && !diag_set_link && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + diag_set_link = 1; + ipath_cdbg(VERBOSE, "Trying to set to set link active for " + "diag pkt\n"); + ipath_set_linkstate(dd, IPATH_IB_LINKARM); + ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE); + } + + if (!(dd->ipath_flags & IPATH_INITTED)) { + /* no hardware, freeze, etc. */ + ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit); + ret = -ENODEV; + goto bail; + } + val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; + if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && + val != IPATH_IBSTATE_ACTIVE) { + ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", + dd->ipath_unit, (unsigned long long) val); + ret = -EINVAL; + goto bail; + } + + /* need total length before first word written */ + /* +1 word is for the qword padding */ + plen = sizeof(u32) + dp.len; + + if ((plen + 4) > dd->ipath_ibmaxlen) { + ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", + plen - 4, dd->ipath_ibmaxlen); + ret = -EINVAL; + goto bail; /* before writing pbc */ + } + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " + "failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + piobuf = ipath_getpiobuf(dd, &pbufn); + if (!piobuf) { + ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", + dd->ipath_unit); + ret = -EBUSY; + goto bail; + } + + plen >>= 2; /* in dwords */ + + if (ipath_debug & __IPATH_PKTDBG) + ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", + dd->ipath_unit, plen - 1, pbufn); + + /* we have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order */ + writeq(plen, piobuf); + ipath_flush_wc(); + /* copy all by the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. */ + __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + ipath_flush_wc(); + __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + ipath_flush_wc(); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + static int ipath_diag_release(struct inode *in, struct file *fp) { mutex_lock(&ipath_mutex); diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 520c38f13868..8c908b30984e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1881,8 +1881,18 @@ static int __init infinipath_init(void) goto bail_group; } + ret = ipath_diagpkt_add(); + if (ret < 0) { + printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " + "diag data device: error %d\n", -ret); + goto bail_ipathfs; + } + goto bail; +bail_ipathfs: + ipath_exit_ipathfs(); + bail_group: ipath_driver_remove_group(&ipath_driver.driver); @@ -1993,6 +2003,8 @@ static void __exit infinipath_cleanup(void) struct ipath_devdata *dd, *tmp; unsigned long flags; + ipath_diagpkt_remove(); + ipath_exit_ipathfs(); ipath_driver_remove_group(&ipath_driver.driver); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 0ae2729c7ea2..f8accc79b92f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -789,6 +789,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); int ipath_expose_reset(struct device *); +int ipath_diagpkt_add(void); +void ipath_diagpkt_remove(void); + int ipath_init_ipathfs(void); void ipath_exit_ipathfs(void); int ipathfs_add_device(struct ipath_devdata *); @@ -813,6 +816,7 @@ extern struct mutex ipath_mutex; #define IPATH_DRV_NAME "ib_ipath" #define IPATH_MAJOR 233 #define IPATH_USER_MINOR_BASE 0 +#define IPATH_DIAGPKT_MINOR 127 #define IPATH_DIAG_MINOR_BASE 129 #define IPATH_NMINORS 255 From eae33d47a797e159306567643284a98ae7428ec4 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:37 -0700 Subject: [PATCH 0752/1063] IB/ipath: do not allow use of CQ entries with invalid counts Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_cq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3c4c198a4514..049221bc590e 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -172,7 +172,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, struct ipath_cq_wc *wc; struct ib_cq *ret; - if (entries > ib_ipath_max_cqes) { + if (entries < 1 || entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; } @@ -324,6 +324,11 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) u32 head, tail, n; int ret; + if (cqe < 1 || cqe > ib_ipath_max_cqes) { + ret = -EINVAL; + goto bail; + } + /* * Need to use vmalloc() if we want to support large #s of entries. */ From 092260b8f966ebe0742045416082e9a81bd971d1 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:38 -0700 Subject: [PATCH 0753/1063] IB/ipath: account for attached QPs correctly Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index cb35679e4a18..085e28b939ec 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -217,6 +217,8 @@ static int ipath_mcast_add(struct ipath_ibdev *dev, dev->n_mcast_grps_allocated++; spin_unlock(&dev->n_mcast_grps_lock); + mcast->n_attached++; + list_add_tail_rcu(&mqp->list, &mcast->qp_list); atomic_inc(&mcast->refcount); From 525d0ca1d452ed336c1d907fb20c104467a8a47b Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:39 -0700 Subject: [PATCH 0754/1063] IB/ipath: support new QLogic product naming scheme This patch only renames files, fixes product names, and updates comments. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/Makefile | 4 +- drivers/infiniband/hw/ipath/ipath_driver.c | 9 +-- drivers/infiniband/hw/ipath/ipath_file_ops.c | 6 +- .../ipath/{ipath_ht400.c => ipath_iba6110.c} | 39 ++++++----- .../ipath/{ipath_pe800.c => ipath_iba6120.c} | 68 +++++++++---------- drivers/infiniband/hw/ipath/ipath_kernel.h | 8 +-- drivers/infiniband/hw/ipath/ipath_registers.h | 5 +- 7 files changed, 68 insertions(+), 71 deletions(-) rename drivers/infiniband/hw/ipath/{ipath_ht400.c => ipath_iba6110.c} (98%) rename drivers/infiniband/hw/ipath/{ipath_pe800.c => ipath_iba6120.c} (95%) diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index 690dc713e63e..5e29cb0095e5 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -10,7 +10,8 @@ ib_ipath-y := \ ipath_eeprom.o \ ipath_file_ops.o \ ipath_fs.o \ - ipath_ht400.o \ + ipath_iba6110.o \ + ipath_iba6120.o \ ipath_init_chip.o \ ipath_intr.o \ ipath_keys.o \ @@ -18,7 +19,6 @@ ib_ipath-y := \ ipath_mad.o \ ipath_mmap.o \ ipath_mr.o \ - ipath_pe800.o \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 8c908b30984e..3a15efee7387 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -401,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { case PCI_DEVICE_ID_INFINIPATH_HT: - ipath_init_ht400_funcs(dd); + ipath_init_iba6110_funcs(dd); break; case PCI_DEVICE_ID_INFINIPATH_PE800: - ipath_init_pe800_funcs(dd); + ipath_init_iba6120_funcs(dd); break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " @@ -969,7 +969,8 @@ reloop: */ if (l == hdrqtail || (i && !(i&0xf))) { u64 lval; - if (l == hdrqtail) /* PE-800 interrupt only on last */ + if (l == hdrqtail) + /* request IBA6120 interrupt only on last */ lval = dd->ipath_rhdrhead_intr_off | l; else lval = l; @@ -983,7 +984,7 @@ reloop: } if (!dd->ipath_rhdrhead_intr_off && !reloop) { - /* HT-400 workaround; we can have a race clearing chip + /* IBA6110 workaround; we can have a race clearing chip * interrupt with another interrupt about to be delivered, * and can clear it before it is delivered on the GPIO * workaround. By doing the extra check here for the diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 4080fed2dcd9..6ba9a2d1e6ec 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1110,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ret = mmap_rcvegrbufs(vma, pd); else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { /* - * The rcvhdrq itself; readonly except on HT-400 (so have + * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ @@ -1298,14 +1298,14 @@ static int find_best_unit(struct file *fp) * This code is present to allow a knowledgeable person to * specify the layout of processes to processors before opening * this driver, and then we'll assign the process to the "closest" - * HT-400 to that processor (we assume reasonable connectivity, + * InfiniPath chip to that processor (we assume reasonable connectivity, * for now). This code assumes that if affinity has been set * before this point, that at most one cpu is set; for now this * is reasonable. I check for both cpus_empty() and cpus_full(), * in case some kernel variant sets none of the bits when no * affinity is set. 2.6.11 and 12 kernels have all present * cpus set. Some day we'll have to fix it up further to handle - * a cpu subset. This algorithm fails for two HT-400's connected + * a cpu subset. This algorithm fails for two HT chips connected * in tunnel fashion. Eventually this needs real topology * information. There may be some issues with dual core numbering * as well. This needs more work prior to release. diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c similarity index 98% rename from drivers/infiniband/hw/ipath/ipath_ht400.c rename to drivers/infiniband/hw/ipath/ipath_iba6110.c index 3db015da6e77..5076738aff32 100644 --- a/drivers/infiniband/hw/ipath/ipath_ht400.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -33,7 +33,7 @@ /* * This file contains all of the code that is specific to the InfiniPath - * HT-400 chip. + * HT chip. */ #include @@ -43,7 +43,7 @@ #include "ipath_registers.h" /* - * This lists the InfiniPath HT400 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. * * The names are in InterCap form because they're taken straight from @@ -537,7 +537,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) strlcat(msg, "[HT core Misc7]", msgl); if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, HT-400 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; @@ -553,7 +553,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), HT-400 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -610,18 +610,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, break; case 5: /* - * HT-460 original production board; two production levels, with + * original production board; two production levels, with * different serial number ranges. See ipath_ht_early_init() for * case where we enable IPATH_GPIO_INTR for later serial # range. */ - n = "InfiniPath_HT-460"; + n = "InfiniPath_QHT7040"; break; case 6: n = "OEM_Board_3"; break; case 7: - /* HT-460 small form factor production board */ - n = "InfiniPath_HT-465"; + /* small form factor production board */ + n = "InfiniPath_QHT7140"; break; case 8: n = "LS/X-1"; @@ -633,7 +633,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, n = "OEM_Board_2"; break; case 11: - n = "InfiniPath_HT-470"; + n = "InfiniPath_HT-470"; /* obsoleted */ break; case 12: n = "OEM_Board_4"; @@ -641,7 +641,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, default: /* don't know, just print the number */ ipath_dev_err(dd, "Don't yet know about board " "with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u", boardrev); break; } @@ -650,11 +650,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { /* - * This version of the driver only supports the HT-400 - * Rev 3.2 + * This version of the driver only supports Rev 3.2 and 3.3 */ ipath_dev_err(dd, - "Unsupported HT-400 revision %u.%u!\n", + "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; goto bail; @@ -738,7 +737,7 @@ static void ipath_check_htlink(struct ipath_devdata *dd) static int ipath_setup_ht_reset(struct ipath_devdata *dd) { - ipath_dbg("No reset possible for HT-400\n"); + ipath_dbg("No reset possible for this InfiniPath hardware\n"); return 0; } @@ -925,7 +924,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, /* * kernels with CONFIG_PCI_MSI set the vector in the irq field of - * struct pci_device, so we use that to program the HT-400 internal + * struct pci_device, so we use that to program the internal * interrupt register (not config space) with that value. The BIOS * must still have done the basic MSI setup. */ @@ -1013,7 +1012,7 @@ bail: * @dd: the infinipath device * * Called during driver unload. - * This is currently a nop for the HT-400, not for all chips + * This is currently a nop for the HT chip, not for all chips */ static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) { @@ -1470,7 +1469,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; /* - * For HT-400, we allocate a somewhat overly large eager buffer, + * For HT, we allocate a somewhat overly large eager buffer, * such that we can guarantee that we can receive the largest * packet that we can send out. To truly support a 4KB MTU, * we need to bump this to a large value. To date, other than @@ -1531,7 +1530,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { /* - * Later production HT-460 has same changes as HT-465, so + * Later production QHT7040 has same changes as QHT7140, so * can use GPIO interrupts. They have serial #'s starting * with 128, rather than 112. */ @@ -1560,13 +1559,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) } /** - * ipath_init_ht400_funcs - set up the chip-specific function pointers + * ipath_init_iba6110_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_ht400_funcs(struct ipath_devdata *dd) +void ipath_init_iba6110_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_ht_intconfig; dd->ipath_f_bus = ipath_setup_ht_config; diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c similarity index 95% rename from drivers/infiniband/hw/ipath/ipath_pe800.c rename to drivers/infiniband/hw/ipath/ipath_iba6120.c index b83f66d8262c..f4233baaa338 100644 --- a/drivers/infiniband/hw/ipath/ipath_pe800.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -32,7 +32,7 @@ */ /* * This file contains all of the code that is specific to the - * InfiniPath PE-800 chip. + * InfiniPath PCIe chip. */ #include @@ -45,9 +45,9 @@ /* * This file contains all the chip-specific register information and - * access functions for the QLogic InfiniPath PE800, the PCI-Express chip. + * access functions for the QLogic InfiniPath PCI-Express chip. * - * This lists the InfiniPath PE800 registers, in the actual chip layout. + * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. */ struct _infinipath_do_not_use_kernel_regs { @@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = { .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), - /* This group is pe-800-specific; and used only in this file */ /* The rcvpktled register controls one of the debug port signals, so * a packet activity LED can be connected to it. */ .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), @@ -388,7 +387,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, *msg = '\0'; if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, PE-800 unusable]", + strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ *dd->ipath_statusp |= IPATH_STATUS_HWERROR; @@ -433,7 +432,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (hwerrs & _IPATH_PLL_FAIL) { snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), PE-800 unusable]", + "[PLL failed (%llx), InfiniPath hardware unusable]", (unsigned long long) hwerrs & _IPATH_PLL_FAIL); strlcat(msg, bitsmsg, msgl); /* ignore from now on, so disable until driver reloaded */ @@ -511,22 +510,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, n = "InfiniPath_Emulation"; break; case 1: - n = "InfiniPath_PE-800-Bringup"; + n = "InfiniPath_QLE7140-Bringup"; break; case 2: - n = "InfiniPath_PE-880"; + n = "InfiniPath_QLE7140"; break; case 3: - n = "InfiniPath_PE-850"; + n = "InfiniPath_QMI7140"; break; case 4: - n = "InfiniPath_PE-860"; + n = "InfiniPath_QEM7140"; + break; + case 5: + n = "InfiniPath_QMH7140"; break; default: ipath_dev_err(dd, "Don't yet know about board with ID %u\n", boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u", + snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u", boardrev); break; } @@ -534,7 +536,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, snprintf(name, namelen, "%s", n); if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) { - ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n", + ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; } else @@ -705,7 +707,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); } -/* this is not yet needed on the PE800, so just return 0. */ +/* this is not yet needed on this chip, so just return 0. */ static int ipath_pe_intconfig(struct ipath_devdata *dd) { return 0; @@ -759,8 +761,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, * * This is called during driver unload. * We do the pci_disable_msi here, not in generic code, because it - * isn't used for the HT-400. If we do end up needing pci_enable_msi - * at some point in the future for HT-400, we'll move the call back + * isn't used for the HT chips. If we do end up needing pci_enable_msi + * at some point in the future for HT, we'll move the call back * into the main init_one code. */ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) @@ -780,10 +782,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) * late in 2.6.16). * All that can be done is to edit the kernel source to remove the quirk * check until that is fixed. - * We do not need to call enable_msi() for our HyperTransport chip (HT-400), - * even those it uses MSI, and we want to avoid the quirk warning, so - * So we call enable_msi only for the PE-800. If we do end up needing - * pci_enable_msi at some point in the future for HT-400, we'll move the + * We do not need to call enable_msi() for our HyperTransport chip, + * even though it uses MSI, and we want to avoid the quirk warning, so + * So we call enable_msi only for PCIe. If we do end up needing + * pci_enable_msi at some point in the future for HT, we'll move the * call back into the main init_one code. * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that @@ -971,8 +973,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd) int ret; /* Use ERROR so it shows up in logs, etc. */ - ipath_dev_err(dd, "Resetting PE-800 unit %u\n", - dd->ipath_unit); + ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit); /* keep chip from being accessed in a few places */ dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT); val = dd->ipath_control | INFINIPATH_C_RESET; @@ -1078,7 +1079,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, * @port: the port * * clear all TID entries for a port, expected and eager. - * Used from ipath_close(). On PE800, TIDs are only 32 bits, + * Used from ipath_close(). On this chip, TIDs are only 32 bits, * not 64, but they are still on 64 bit boundaries, so tidbase * is declared as u64 * for the pointer math, even though we write 32 bits */ @@ -1148,9 +1149,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_flags |= IPATH_4BYTE_TID; /* - * For openib, we need to be able to handle an IB header of 96 bytes - * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. The PE-800 does not + * For openfabrics, we need to be able to handle an IB header of + * 24 dwords. HT chip has arbitrary sized receive buffers, so we + * made them the same size as the PIO buffers. This chip does not * handle arbitrary size buffers, so we need the header large enough * to handle largest IB header, but still have room for a 2KB MTU * standard IB packet. @@ -1158,11 +1159,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_rcvhdrentsize = 24; dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - /* For HT-400, we allocate a somewhat overly large eager buffer, - * such that we can guarantee that we can receive the largest packet - * that we can send out. To truly support a 4KB MTU, we need to - * bump this to a larger value. We'll do this when I get around to - * testing 4KB sends on the PE-800, which I have not yet done. + /* + * To truly support a 4KB MTU (for usermode), we need to + * bump this to a larger value. For now, we use them for + * the kernel only. */ dd->ipath_rcvegrbufsize = 2048; /* @@ -1175,9 +1175,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd) dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; /* - * For PE-800, we can request a receive interrupt for 1 or + * We can request a receive interrupt for 1 or * more packets from current offset. For now, we set this - * up for a single packet, to match the HT-400 behavior. + * up for a single packet. */ dd->ipath_rhdrhead_intr_off = 1ULL<<32; @@ -1216,13 +1216,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) } /** - * ipath_init_pe800_funcs - set up the chip-specific function pointers + * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. */ -void ipath_init_pe800_funcs(struct ipath_devdata *dd) +void ipath_init_iba6120_funcs(struct ipath_devdata *dd) { dd->ipath_f_intrsetup = ipath_pe_intconfig; dd->ipath_f_bus = ipath_setup_pe_config; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index f8accc79b92f..2530686f6893 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -236,7 +236,7 @@ struct ipath_devdata { u64 ipath_tidtemplate; /* value to write to free TIDs */ u64 ipath_tidinvalid; - /* PE-800 rcv interrupt setup */ + /* IBA6120 rcv interrupt setup */ u64 ipath_rhdrhead_intr_off; /* size of memory at ipath_kregbase */ @@ -621,10 +621,8 @@ void ipath_free_data(struct ipath_portdata *dd); int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); -/* init PE-800-specific func */ -void ipath_init_pe800_funcs(struct ipath_devdata *); -/* init HT-400-specific func */ -void ipath_init_ht400_funcs(struct ipath_devdata *); +void ipath_init_iba6120_funcs(struct ipath_devdata *); +void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 89df8f5ea998..f08c86088ca4 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -36,8 +36,7 @@ /* * This file should only be included by kernel source, and by the diags. It - * defines the registers, and their contents, for the InfiniPath HT-400 - * chip. + * defines the registers, and their contents, for InfiniPath chips. */ /* @@ -286,7 +285,7 @@ #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ -/* TID entries (memory), HT400-only */ +/* TID entries (memory), HT-only */ #define INFINIPATH_RT_VALID 0x8000000000000000ULL #define INFINIPATH_RT_ADDR_SHIFT 0 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF From ff0b8597ec3e7db6b37130a195cb3d673dafea27 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:40 -0700 Subject: [PATCH 0755/1063] IB/ipath: add serial number to hardware freeze error message Also added the word "Hardware" after "Fatal" to make it more obvious that it's hardware, not software. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba6110.c | 5 +++-- drivers/infiniband/hw/ipath/ipath_iba6120.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 5076738aff32..7028c98e5c4f 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * times. */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index f4233baaa338..3a7640be2530 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -363,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * and we get here multiple times */ if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Error (freeze " - "mode), no longer usable\n"); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); isfatal = 1; } /* From fc8cf8cdfc3ce328c577b18ebcd60a5595f2a283 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:41 -0700 Subject: [PATCH 0756/1063] IB/ipath: be more strict about testing the modify QP verb Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 33 +++++++++++++++++++++-- drivers/infiniband/hw/ipath/ipath_verbs.h | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index c0267cf8ca8c..502d555fdf37 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -455,11 +455,16 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask)) goto inval; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid == 0 || attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) goto inval; + if ((attr->ah_attr.ah_flags & IB_AH_GRH) && + (attr->ah_attr.grh.sgid_index > 1)) + goto inval; + } + if (attr_mask & IB_QP_PKEY_INDEX) if (attr->pkey_index >= ipath_get_npkeys(dev->dd)) goto inval; @@ -468,6 +473,27 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr->min_rnr_timer > 31) goto inval; + if (attr_mask & IB_QP_PORT) + if (attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_PATH_MTU) + if (attr->path_mtu > IB_MTU_4096) + goto inval; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + if (attr->max_rd_atomic > 1) + goto inval; + + if (attr_mask & IB_QP_PATH_MIG_STATE) + if (attr->path_mig_state != IB_MIG_MIGRATED) + goto inval; + switch (new_state) { case IB_QPS_RESET: ipath_reset_qp(qp); @@ -518,6 +544,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; + if (attr_mask & IB_QP_TIMEOUT) + qp->timeout = attr->timeout; + if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -564,7 +593,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->max_dest_rd_atomic = 1; attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; - attr->timeout = 0; + attr->timeout = qp->timeout; attr->retry_cnt = qp->s_retry_cnt; attr->rnr_retry = qp->s_rnr_retry; attr->alt_port_num = 0; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 9cc0dbfe8602..f2956090d93f 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -371,6 +371,7 @@ struct ipath_qp { u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_pkey_index; /* PKEY index to use */ + u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ From ca4ce383acfb05b8035453cdbbfd4f8ae36c7a69 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:42 -0700 Subject: [PATCH 0757/1063] IB/ipath: validate path_mig_state properly Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 502d555fdf37..77391886d2f8 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -491,7 +491,8 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto inval; if (attr_mask & IB_QP_PATH_MIG_STATE) - if (attr->path_mig_state != IB_MIG_MIGRATED) + if (attr->path_mig_state != IB_MIG_MIGRATED && + attr->path_mig_state != IB_MIG_REARM) goto inval; switch (new_state) { From 0b81e4f79af8322c7142701982f40d1431dedf19 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:43 -0700 Subject: [PATCH 0758/1063] IB/ipath: put a limit on the number of QPs that can be created Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 15 +++++++++++++++ drivers/infiniband/hw/ipath/ipath_verbs.c | 7 ++++++- drivers/infiniband/hw/ipath/ipath_verbs.h | 4 ++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 77391886d2f8..607ba72af2fa 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -833,9 +833,21 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, } } + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_ipath_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + ret = &qp->ibqp; goto bail; +bail_ip: + kfree(qp->ip); bail_rwq: vfree(qp->r_rq.wq); bail_qp: @@ -864,6 +876,9 @@ int ipath_destroy_qp(struct ib_qp *ibqp) spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; spin_unlock_irqrestore(&qp->s_lock, flags); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index ab0006288b41..b9be0fd2ed36 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -73,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); +unsigned int ib_ipath_max_qps = 16384; +module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + unsigned int ib_ipath_max_sges = 0x60; module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); @@ -958,7 +962,7 @@ static int ipath_query_device(struct ib_device *ibdev, props->sys_image_guid = dev->sys_image_guid; props->max_mr_size = ~0ull; - props->max_qp = dev->qp_table.max; + props->max_qp = ib_ipath_max_qps; props->max_qp_wr = ib_ipath_max_qp_wrs; props->max_sge = ib_ipath_max_sges; props->max_cq = ib_ipath_max_cqs; @@ -1420,6 +1424,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) spin_lock_init(&idev->n_pds_lock); spin_lock_init(&idev->n_ahs_lock); spin_lock_init(&idev->n_cqs_lock); + spin_lock_init(&idev->n_qps_lock); spin_lock_init(&idev->n_srqs_lock); spin_lock_init(&idev->n_mcast_grps_lock); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index f2956090d93f..09bbb3f9a217 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -482,6 +482,8 @@ struct ipath_ibdev { spinlock_t n_ahs_lock; u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ @@ -792,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs; extern unsigned int ib_ipath_max_qp_wrs; +extern unsigned int ib_ipath_max_qps; + extern unsigned int ib_ipath_max_sges; extern unsigned int ib_ipath_max_mcast_grps; From a78aa6fb156f9954562c9539aeb25dbec1ffca10 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:44 -0700 Subject: [PATCH 0759/1063] IB/ipath: handle sq_sig_all field correctly Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 607ba72af2fa..224b0f40767f 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -606,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - init_attr->sq_sig_type = - (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) - ? IB_SIGNAL_REQ_WR : 0; + if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = qp->ibqp.qp_type; init_attr->port_num = 1; return 0; @@ -776,8 +777,10 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; - qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? - 1 << IPATH_S_SIGNAL_REQ_WR : 0; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; + else + qp->s_flags = 0; dev = to_idev(ibpd->device); err = ipath_alloc_qpn(&dev->qp_table, qp, init_attr->qp_type); From d821f02a6ebed97e35e0bc7575452cfc6f9073cb Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:45 -0700 Subject: [PATCH 0760/1063] IB/ipath: allow SMA to be disabled This is useful for testing purposes. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index b9be0fd2ed36..fbda7739715f 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -107,6 +107,10 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); +static unsigned int ib_ipath_disable_sma; +module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA"); + const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, @@ -354,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev, switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: + if (ib_ipath_disable_sma) + break; + /* FALLTHROUGH */ case IB_QPT_UD: ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); break; From e35d710d0c5b74bc9833d6a3791706bd577a3724 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:46 -0700 Subject: [PATCH 0761/1063] IB/ipath: fix return value from ipath_poll This stops the generic poll code from waiting for a timeout. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 6ba9a2d1e6ec..29930e22318e 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1150,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp, struct ipath_portdata *pd; u32 head, tail; int bit; + unsigned pollflag = 0; struct ipath_devdata *dd; pd = port_fp(fp); @@ -1186,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp, clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); pd->port_rcvwait_to++; } + else + pollflag = POLLIN | POLLRDNORM; } else { /* it's already happened; don't do wait_event overhead */ + pollflag = POLLIN | POLLRDNORM; pd->port_rcvnowait++; } @@ -1196,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); - return 0; + return pollflag; } static int try_alloc_port(struct ipath_devdata *dd, int port, From 30fc5c3130bdbc7cc051a2d6054ad38360d408a8 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Fri, 25 Aug 2006 11:24:48 -0700 Subject: [PATCH 0762/1063] IB/ipath: control receive polarity inversion Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 17 +++++++++++ drivers/infiniband/hw/ipath/ipath_iba6110.c | 9 ++++++ drivers/infiniband/hw/ipath/ipath_iba6120.c | 9 ++++++ drivers/infiniband/hw/ipath/ipath_kernel.h | 3 ++ drivers/infiniband/hw/ipath/ipath_registers.h | 2 ++ drivers/infiniband/hw/ipath/ipath_sysfs.c | 29 +++++++++++++++++++ 6 files changed, 69 insertions(+) diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 3a15efee7387..47c9d15557c8 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2116,5 +2116,22 @@ bail: return ret; } +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) +{ + u64 val; + if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) { + return -1; + } + if ( dd->ipath_rx_pol_inv != new_pol_inv ) { + dd->ipath_rx_pol_inv = new_pol_inv; + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= ((u64)dd->ipath_rx_pol_inv) << + INFINIPATH_XGXS_RX_POL_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); + } + return 0; +} module_init(infinipath_init); module_exit(infinipath_cleanup); diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 7028c98e5c4f..bf2455a6d562 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1290,6 +1290,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) val &= ~INFINIPATH_XGXS_RESET; change = 1; } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } if (change) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 3a7640be2530..d86516d23df6 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -654,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val &= ~INFINIPATH_XGXS_RESET; change = 1; } + if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & + INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { + /* need to compensate for Tx inversion in partner */ + val &= ~(INFINIPATH_XGXS_RX_POL_MASK << + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= dd->ipath_rx_pol_inv << + INFINIPATH_XGXS_RX_POL_SHIFT; + change = 1; + } if (change) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 2530686f6893..a8a56276ff1d 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -503,6 +503,8 @@ struct ipath_devdata { u8 ipath_pci_cacheline; /* LID mask control */ u8 ipath_lmc; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 ipath_rx_pol_inv; /* local link integrity counter */ u32 ipath_lli_counter; @@ -567,6 +569,7 @@ void ipath_get_faststats(unsigned long); int ipath_set_linkstate(struct ipath_devdata *, u8); int ipath_set_mtu(struct ipath_devdata *, u16); int ipath_set_lid(struct ipath_devdata *, u32, u8); +int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); /* for use in system calls, where we want to know device type, etc. */ #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index f08c86088ca4..6e23b3d632b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -282,6 +282,8 @@ #define INFINIPATH_XGXS_RESET 0x7ULL #define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 +#define INFINIPATH_XGXS_RX_POL_SHIFT 19 +#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 8476dd3c7af4..e299148c4b68 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -561,6 +561,33 @@ bail: return ret; } +static ssize_t store_rx_pol_inv(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret, r; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret < 0) + goto invalid; + + r = ipath_set_rx_pol_inv(dd, val); + if (r < 0) { + ret = r; + goto bail; + } + + goto bail; +invalid: + ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n"); +bail: + return ret; +} + + static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -587,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); +static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -601,6 +629,7 @@ static struct attribute *dev_attributes[] = { &dev_attr_boardversion.attr, &dev_attr_unit.attr, &dev_attr_enabled.attr, + &dev_attr_rx_pol_inv.attr, NULL }; From b046a04e162dc7f468700a0817acda0321b2b3ae Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 28 Aug 2006 19:08:53 +0300 Subject: [PATCH 0763/1063] IB/mthca: Fix default static rate returned for Tavor in AV When default static rate is returned for Tavor, need to translate it to an ib rate value. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_av.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e215041b2db9..69599455aca2 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate) case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS; case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS; case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS; - default: return port_rate; + default: return mult_to_ib_rate(port_rate); } } From f6f76725b5ed8085c602b16bfd309c9957fb84c8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 28 Aug 2006 19:10:34 +0300 Subject: [PATCH 0764/1063] IB/mthca: Return port number for unconnected QPs in query_qp port_num was not being returned for unconnected QPs. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 6d6ba4180a39..4ac25cf06794 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m if (qp->transport == RC || qp->transport == UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = + be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; - qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; + qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; + qp_attr->port_num = + (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING; @@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; - qp_attr->port_num = qp_attr->ah_attr.port_num; qp_attr->timeout = context->pri_path.ackto >> 3; qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; - qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; qp_attr->alt_timeout = context->alt_path.ackto >> 3; qp_init_attr->cap = qp_attr->cap; From 9e583b85c2a0215dc7f4427361b4f75fcc0316af Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 28 Aug 2006 19:12:39 +0300 Subject: [PATCH 0765/1063] IB/mthca: Return correct number of bits for static rate in query_qp Incorrect number of bits was taken for static_rate field. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 4ac25cf06794..9324b6204ac5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; ib_ah_attr->static_rate = mthca_rate_to_ib(dev, - path->static_rate & 0x7, + path->static_rate & 0xf, ib_ah_attr->port_num); ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; if (ib_ah_attr->ah_flags) { From c1f250c0b45cdfdd89b21f2b866f317439aa21de Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 28 Aug 2006 11:55:52 -0700 Subject: [PATCH 0766/1063] IB/cm: Enable atomics along with RDMA reads Enable atomic operations along with RDMA reads if a local RDMA read/atomic depth is provided by the user. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0de335b7bfc2..0df1454819ac 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3125,7 +3125,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; From 76842405fca5f8b8e08d91558ecd3b922265034a Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 28 Aug 2006 11:57:42 -0700 Subject: [PATCH 0767/1063] IB/cm: Use correct reject code for invalid GID Set the reject code properly when rejecting a request that contains an invalid GID. A suitable GID is returned by the IB CM in the additional reject information (ARI). This is a spec compliancy issue. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0df1454819ac..1aad33e03528 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1354,7 +1354,7 @@ static int cm_req_handler(struct cm_work *work) id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); - goto error1; + goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; @@ -1363,7 +1363,8 @@ static int cm_req_handler(struct cm_work *work) listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; - goto error2; + kfree(cm_id_priv->timewait_info); + goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; @@ -1373,12 +1374,22 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); - if (ret) - goto error3; + if (ret) { + ib_get_cached_gid(work->port->cm_dev->device, + work->port->port_num, 0, &work->path[0].sgid); + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, + &work->path[0].sgid, sizeof work->path[0].sgid, + NULL, 0); + goto rejected; + } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); - if (ret) - goto error3; + if (ret) { + ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, + &work->path[0].sgid, + sizeof work->path[0].sgid, NULL, 0); + goto rejected; + } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( @@ -1400,12 +1411,11 @@ static int cm_req_handler(struct cm_work *work) cm_deref_id(listen_cm_id_priv); return 0; -error3: atomic_dec(&cm_id_priv->refcount); +rejected: + atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); - cm_cleanup_timewait(cm_id_priv->timewait_info); -error2: kfree(cm_id_priv->timewait_info); - cm_id_priv->timewait_info = NULL; -error1: ib_destroy_cm_id(&cm_id_priv->id); +destroy: + ib_destroy_cm_id(cm_id); return ret; } From 75ab13443e4575c00788ba9861105745b9dda05c Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 28 Aug 2006 15:10:32 -0700 Subject: [PATCH 0768/1063] IB/mad: Add support for dual-sided RMPP transfers. The implementation assumes that any RMPP request that requires a response uses DS RMPP. Based on the RMPP start-up scenarios defined by the spec, this should be a valid assumption. That is, there is no start-up scenario defined where an RMPP request is followed by a non-RMPP response. By having this assumption we avoid any API changes. In order for a node that supports DS RMPP to communicate with one that does not, RMPP responses assume a new window size of 1 if a DS ACK has not been received. (By DS ACK, I'm referring to the turn-around ACK after the final ACK of the request.) This is a slight spec deviation, but is necessary to allow communication with nodes that do not generate the DS ACK. It also handles the case when a response is sent after the request state has been discarded. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_rmpp.c | 90 +++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index ebcd5b181770..74fe1af9b18a 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -60,6 +60,7 @@ struct mad_rmpp_recv { int last_ack; int seg_num; int newwin; + int repwin; __be64 tid; u32 src_qp; @@ -170,6 +171,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, return msg; } +static void ack_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *recv_wc) +{ + struct ib_mad_send_buf *msg; + struct ib_rmpp_mad *rmpp_mad; + int ret; + + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) + return; + + rmpp_mad = msg->mad; + memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); + + rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; + ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); + rmpp_mad->rmpp_hdr.seg_num = 0; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); + + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } +} + void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; @@ -271,6 +298,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->newwin = 1; rmpp_recv->seg_num = 1; rmpp_recv->last_ack = 0; + rmpp_recv->repwin = 1; mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; rmpp_recv->tid = mad_hdr->tid; @@ -591,6 +619,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, break; } +static void process_ds_ack(struct ib_mad_agent_private *agent, + struct ib_mad_recv_wc *mad_recv_wc, int newwin) +{ + struct mad_rmpp_recv *rmpp_recv; + + rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); + if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) + rmpp_recv->repwin = newwin; +} + static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { @@ -616,8 +654,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); - if (!mad_send_wr) - goto out; /* Unmatched ACK */ + if (!mad_send_wr) { + if (!seg_num) + process_ds_ack(agent, mad_recv_wc, newwin); + goto out; /* Unmatched or DS RMPP ACK */ + } + + if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && + (mad_send_wr->timeout)) { + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; /* Repeated ACK for DS RMPP transaction */ + } if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) @@ -656,6 +704,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (mad_send_wr->refcount == 1) ib_reset_mad_timeout(mad_send_wr, mad_send_wr->send_buf.timeout_ms); + spin_unlock_irqrestore(&agent->lock, flags); + ack_ds_ack(agent, mad_recv_wc); + return; } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { @@ -772,6 +823,39 @@ out: return NULL; } +static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) +{ + struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; + struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; + struct mad_rmpp_recv *rmpp_recv; + struct ib_ah_attr ah_attr; + unsigned long flags; + int newwin = 1; + + if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) + goto out; + + spin_lock_irqsave(&agent->lock, flags); + list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { + if (rmpp_recv->tid != mad_hdr->tid || + rmpp_recv->mgmt_class != mad_hdr->mgmt_class || + rmpp_recv->class_version != mad_hdr->class_version || + (rmpp_recv->method & IB_MGMT_METHOD_RESP)) + continue; + + if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) + continue; + + if (rmpp_recv->slid == ah_attr.dlid) { + newwin = rmpp_recv->repwin; + break; + } + } + spin_unlock_irqrestore(&agent->lock, flags); +out: + return newwin; +} + int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; @@ -787,7 +871,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) return IB_RMPP_RESULT_INTERNAL; } - mad_send_wr->newwin = 1; + mad_send_wr->newwin = init_newwin(mad_send_wr); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); From 2b3e258e5dd1938e2708eb5354ad8ba056fe8154 Mon Sep 17 00:00:00 2001 From: James Lentini Date: Mon, 28 Aug 2006 15:12:04 -0700 Subject: [PATCH 0769/1063] IB/mad: Remove unused includes The ib_mad module does not use a kthread function, but mad_priv.h includes . mad_rmpp.c does not do any DMA-related stuff, but includes . Remove the unused includes. Signed-off-by: James Lentini Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_priv.h | 1 - drivers/infiniband/core/mad_rmpp.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d147f3bad2ce..1da9adbccaec 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -39,7 +39,6 @@ #include #include -#include #include #include #include diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 74fe1af9b18a..3ace5f492dc4 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -33,8 +33,6 @@ * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ */ -#include - #include "mad_priv.h" #include "mad_rmpp.h" From f06d26537559113207e4b73af6a22eaa5c5e9dc3 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 28 Aug 2006 15:15:18 -0700 Subject: [PATCH 0770/1063] IB/cm: Randomize starting comm ID Randomize the starting local comm ID to avoid getting a rejected connection due to a stale connection after a system reboot or reloading of the ib_cm. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1aad33e03528..c8982b02d9b6 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ static struct ib_cm { struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; + __be32 random_id_operand; struct workqueue_struct *wq; } cm; @@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret; + int ret, id; static int next_id; do { spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, - (__force int *) &cm_id_priv->id.local_id); + ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, + next_id++, &id); spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + + cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand); return ret; } @@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id) unsigned long flags; spin_lock_irqsave(&cm.lock, flags); - idr_remove(&cm.local_id_table, (__force int) local_id); + idr_remove(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irqrestore(&cm.lock, flags); } @@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); + cm_id_priv = idr_find(&cm.local_id_table, + (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -2082,8 +2088,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irqrestore(&cm.lock, flags); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) timewait_info->work.local_id); + cm_id_priv = idr_find(&cm.local_id_table, (__force int) + (timewait_info->work.local_id ^ + cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -3360,6 +3367,7 @@ static int __init ib_cm_init(void) cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); + get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); idr_pre_get(&cm.local_id_table, GFP_KERNEL); cm.wq = create_workqueue("ib_cm"); From 3cd965646b7cb75ae84dd0daf6258adf20e4f169 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:22:46 -0700 Subject: [PATCH 0771/1063] IB: Whitespace fixes Remove some trailing whitespace that has snuck in despite the best efforts of whitespace=error-all. Also fix a few other whitespace bogosities. Signed-off-by: Roland Dreier --- drivers/infiniband/Kconfig | 2 +- drivers/infiniband/core/addr.c | 4 ++-- drivers/infiniband/core/cm.c | 2 +- drivers/infiniband/core/cma.c | 8 ++++---- drivers/infiniband/core/mad.c | 12 ++++++------ drivers/infiniband/core/mad_rmpp.c | 2 +- drivers/infiniband/core/sa_query.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/core/ucm.c | 6 +++--- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 2 -- drivers/infiniband/hw/ipath/ipath_driver.c | 6 +++--- drivers/infiniband/hw/mthca/mthca_cq.c | 10 +++++----- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- 15 files changed, 31 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index fd2d528daa3a..9a329b2c108c 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD ---help--- Userspace InfiniBand Management Datagram (MAD) support. This is the kernel side of the userspace MAD support, which allows - userspace processes to send and receive MADs. You will also + userspace processes to send and receive MADs. You will also need libibumad from . config INFINIBAND_USER_ACCESS diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1205e8027829..d8e54e002ce3 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -327,10 +327,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); -static int netevent_callback(struct notifier_block *self, unsigned long event, +static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { - if (event == NETEVENT_NEIGH_UPDATE) { + if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; if (neigh->dev->type == ARPHRD_INFINIBAND && diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c8982b02d9b6..1c145fe92a54 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -179,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, + m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5d625a81193f..9d58bb59cd45 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -613,7 +613,7 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv) if (id_priv->cma_dev) { switch (id_priv->id.device->node_type) { case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; default: @@ -692,13 +692,13 @@ void rdma_destroy_id(struct rdma_cm_id *id) if (id_priv->cma_dev) { switch (id->device->node_type) { case IB_NODE_CA: - if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) + if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; default: break; } - mutex_lock(&lock); + mutex_lock(&lock); cma_detach_from_dev(id_priv); mutex_unlock(&lock); } @@ -1492,7 +1492,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { if (cma_any_addr(&cur_id->id.route.addr.src_addr)) return -EADDRNOTAVAIL; - + cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) return -EADDRINUSE; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1c3cfbbe6a97..32d3028b274b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, int i; for (i = 0; i < MAX_MGMT_OUI; i++) - /* Is there matching OUI for this vendor class ? */ - if (!memcmp(vendor_class->oui[i], oui, 3)) + /* Is there matching OUI for this vendor class ? */ + if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; @@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { - mad_send_wr->status = IB_WC_WR_FLUSH_ERR; + mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } @@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } } sg_list.addr = dma_map_single(qp_info->port_priv-> - device->dma_device, + device->dma_device, &mad_priv->grh, sizeof *mad_priv - - sizeof mad_priv->header, + sizeof mad_priv->header, DMA_FROM_DEVICE); pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; @@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) struct ib_qp *qp; attr = kmalloc(sizeof *attr, GFP_KERNEL); - if (!attr) { + if (!attr) { printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 3ace5f492dc4..1ef79d015a1e 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -391,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent) static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, int seg_num) { - struct ib_mad_recv_buf *seg_buf; + struct ib_mad_recv_buf *seg_buf; int cur_seg_num; list_for_each_entry_reverse(seg_buf, rmpp_list, list) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index d6b84226bba7..df762ba4868f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -887,7 +887,7 @@ static void send_handler(struct ib_mad_agent *agent, idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - ib_free_send_mad(mad_send_wc->send_buf); + ib_free_send_mad(mad_send_wc->send_buf); kref_put(&query->sm_ah->ref, free_sm_ah); query->release(query); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 21f9282c1b25..fb6660564a30 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -68,7 +68,7 @@ struct port_table_attribute { int index; }; -static inline int ibdev_is_alive(const struct ib_device *dev) +static inline int ibdev_is_alive(const struct ib_device *dev) { return dev->reg_state == IB_DEV_REGISTERED; } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index c1c6fda9452c..e74c964af7fa 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: - uvt->resp.u.sidr_req_resp.pkey = + uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; - uvt->resp.u.sidr_req_resp.port = + uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; @@ -1237,7 +1237,7 @@ static struct class ucm_class = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; - + dev = container_of(class_dev, struct ib_ucm_device, class_dev); return sprintf(buf, "%s\n", dev->ib_dev->name); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1273f8807e84..8a455aec758f 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3fcb5d189a23..b72c7f69ca90 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1676,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; @@ -1726,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, break; } - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 47c9d15557c8..2108466c7e33 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2126,9 +2126,9 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) dd->ipath_rx_pol_inv = new_pol_inv; val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= ((u64)dd->ipath_rx_pol_inv) << - INFINIPATH_XGXS_RX_POL_SHIFT; + INFINIPATH_XGXS_RX_POL_SHIFT); + val |= ((u64)dd->ipath_rx_pol_inv) << + INFINIPATH_XGXS_RX_POL_SHIFT; ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); } return 0; diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 3e27a084257e..e393681ba7d4 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev, wq = &(*cur_qp)->rq; wqe = be32_to_cpu(cqe->wqe); wqe_index = wqe >> wq->wqe_shift; - /* - * WQE addr == base - 1 might be reported in receive completion - * with error instead of (rq size - 1) by Sinai FW 1.0.800 and - * Arbel FW 5.1.400. This bug should be fixed in later FW revs. - */ + /* + * WQE addr == base - 1 might be reported in receive completion + * with error instead of (rq size - 1) by Sinai FW 1.0.800 and + * Arbel FW 5.1.400. This bug should be fixed in later FW revs. + */ if (unlikely(wqe_index < 0)) wqe_index = wq->max - 1; entry->wr_id = (*cur_qp)->wrid[wqe_index]; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index ec356ce7cdcd..60b09f5cb347 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -795,7 +795,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) } if (priv->broadcast) { - rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); + rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); list_add_tail(&priv->broadcast->list, &remove_list); priv->broadcast = NULL; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 249a98c06aeb..61c13d1e0506 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -330,7 +330,7 @@ static int srp_send_req(struct srp_target_port *target) req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); /* - * In the published SRP specification (draft rev. 16a), the + * In the published SRP specification (draft rev. 16a), the * port identifier format is 8 bytes of ID extension followed * by 8 bytes of GUID. Older drafts put the two halves in the * opposite order, so that the GUID comes first. From 922a8e9fb2e0711212badce47a41137e2ca04cb3 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 3 Aug 2006 16:02:40 -0500 Subject: [PATCH 0772/1063] RDMA: iWARP Connection Manager. Add an iWARP Connection Manager (CM), which abstracts connection management for iWARP devices (RNICs). It is a logical instance of the xx_cm where xx is the transport type (ib or iw). The symbols exported are used by the transport independent rdma_cm module, and are available also for transport dependent ULPs. Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/iwcm.c | 1019 ++++++++++++++++++++++++++++++++ drivers/infiniband/core/iwcm.h | 62 ++ include/rdma/iw_cm.h | 258 ++++++++ 3 files changed, 1339 insertions(+) create mode 100644 drivers/infiniband/core/iwcm.c create mode 100644 drivers/infiniband/core/iwcm.h create mode 100644 include/rdma/iw_cm.h diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c new file mode 100644 index 000000000000..c3fb304a4e86 --- /dev/null +++ b/drivers/infiniband/core/iwcm.c @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "iwcm.h" + +MODULE_AUTHOR("Tom Tucker"); +MODULE_DESCRIPTION("iWARP CM"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct workqueue_struct *iwcm_wq; +struct iwcm_work { + struct work_struct work; + struct iwcm_id_private *cm_id; + struct list_head list; + struct iw_cm_event event; + struct list_head free_list; +}; + +/* + * The following services provide a mechanism for pre-allocating iwcm_work + * elements. The design pre-allocates them based on the cm_id type: + * LISTENING IDS: Get enough elements preallocated to handle the + * listen backlog. + * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE + * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE + * + * Allocating them in connect and listen avoids having to deal + * with allocation failures on the event upcall from the provider (which + * is called in the interrupt context). + * + * One exception is when creating the cm_id for incoming connection requests. + * There are two cases: + * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If + * the backlog is exceeded, then no more connection request events will + * be processed. cm_event_handler() returns -ENOMEM in this case. Its up + * to the provider to reject the connectino request. + * 2) in the connection request workqueue handler, cm_conn_req_handler(). + * If work elements cannot be allocated for the new connect request cm_id, + * then IWCM will call the provider reject method. This is ok since + * cm_conn_req_handler() runs in the workqueue thread context. + */ + +static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) +{ + struct iwcm_work *work; + + if (list_empty(&cm_id_priv->work_free_list)) + return NULL; + work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, + free_list); + list_del_init(&work->free_list); + return work; +} + +static void put_work(struct iwcm_work *work) +{ + list_add(&work->free_list, &work->cm_id->work_free_list); +} + +static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) +{ + struct list_head *e, *tmp; + + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + kfree(list_entry(e, struct iwcm_work, free_list)); +} + +static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) +{ + struct iwcm_work *work; + + BUG_ON(!list_empty(&cm_id_priv->work_free_list)); + while (count--) { + work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); + if (!work) { + dealloc_work_entries(cm_id_priv); + return -ENOMEM; + } + work->cm_id = cm_id_priv; + INIT_LIST_HEAD(&work->list); + put_work(work); + } + return 0; +} + +/* + * Save private data from incoming connection requests in the + * cm_id_priv so the low level driver doesn't have to. Adjust + * the event ptr to point to the local copy. + */ +static int copy_private_data(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *event) +{ + void *p; + + p = kmalloc(event->private_data_len, GFP_ATOMIC); + if (!p) + return -ENOMEM; + memcpy(p, event->private_data, event->private_data_len); + event->private_data = p; + return 0; +} + +/* + * Release a reference on cm_id. If the last reference is being removed + * and iw_destroy_cm_id is waiting, wake up the waiting thread. + */ +static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) +{ + int ret = 0; + + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (atomic_dec_and_test(&cm_id_priv->refcount)) { + BUG_ON(!list_empty(&cm_id_priv->work_list)); + if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) { + BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, + &cm_id_priv->flags)); + ret = 1; + } + complete(&cm_id_priv->destroy_comp); + } + + return ret; +} + +static void add_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + atomic_inc(&cm_id_priv->refcount); +} + +static void rem_ref(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + iwcm_deref_id(cm_id_priv); +} + +static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); + +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, + void *context) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); + if (!cm_id_priv) + return ERR_PTR(-ENOMEM); + + cm_id_priv->state = IW_CM_STATE_IDLE; + cm_id_priv->id.device = device; + cm_id_priv->id.cm_handler = cm_handler; + cm_id_priv->id.context = context; + cm_id_priv->id.event_handler = cm_event_handler; + cm_id_priv->id.add_ref = add_ref; + cm_id_priv->id.rem_ref = rem_ref; + spin_lock_init(&cm_id_priv->lock); + atomic_set(&cm_id_priv->refcount, 1); + init_waitqueue_head(&cm_id_priv->connect_wait); + init_completion(&cm_id_priv->destroy_comp); + INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->work_free_list); + + return &cm_id_priv->id; +} +EXPORT_SYMBOL(iw_create_cm_id); + + +static int iwcm_modify_qp_err(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + if (!qp) + return -EINVAL; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * This is really the RDMAC CLOSING state. It is most similar to the + * IB SQD QP state. + */ +static int iwcm_modify_qp_sqd(struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + BUG_ON(qp == NULL); + qp_attr.qp_state = IB_QPS_SQD; + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); +} + +/* + * CM_ID <-- CLOSING + * + * Block if a passive or active connection is currenlty being processed. Then + * process the event as follows: + * - If we are ESTABLISHED, move to CLOSING and modify the QP state + * based on the abrupt flag + * - If the connection is already in the CLOSING or IDLE state, the peer is + * disconnecting concurrently with us and we've already seen the + * DISCONNECT event -- ignore the request and return 0 + * - Disconnect on a listening endpoint returns -EINVAL + */ +int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + struct ib_qp *qp = NULL; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* Wait if we're currently in a connect or accept downcall */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_CLOSING; + + /* QP could be for user-mode client */ + if (cm_id_priv->qp) + qp = cm_id_priv->qp; + else + ret = -EINVAL; + break; + case IW_CM_STATE_LISTEN: + ret = -EINVAL; + break; + case IW_CM_STATE_CLOSING: + /* remote peer closed first */ + case IW_CM_STATE_IDLE: + /* accept or connect returned !0 */ + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called disconnect before/without calling accept after + * connect_request event delivered. + */ + break; + case IW_CM_STATE_CONN_SENT: + /* Can only get here if wait above fails */ + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + if (qp) { + if (abrupt) + ret = iwcm_modify_qp_err(qp); + else + ret = iwcm_modify_qp_sqd(qp); + + /* + * If both sides are disconnecting the QP could + * already be in ERR or SQD states + */ + ret = 0; + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_disconnect); + +/* + * CM_ID <-- DESTROYING + * + * Clean up all resources associated with the connection and release + * the initial reference taken by iw_create_cm_id. + */ +static void destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + /* + * Wait if we're currently in a connect or accept downcall. A + * listening endpoint should never block here. + */ + wait_event(cm_id_priv->connect_wait, + !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_LISTEN: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* destroy the listening endpoint */ + ret = cm_id->device->iwcm->destroy_listen(cm_id); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_ESTABLISHED: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + /* Abrupt close of the connection */ + (void)iwcm_modify_qp_err(cm_id_priv->qp); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_RECV: + /* + * App called destroy before/without calling accept after + * receiving connection request event notification. + */ + cm_id_priv->state = IW_CM_STATE_DESTROYING; + break; + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_DESTROYING: + default: + BUG(); + break; + } + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + (void)iwcm_deref_id(cm_id_priv); +} + +/* + * This function is only called by the application thread and cannot + * be called by the event thread. The function will wait for all + * references to be released on the cm_id and then kfree the cm_id + * object. + */ +void iw_destroy_cm_id(struct iw_cm_id *cm_id) +{ + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); + + destroy_cm_id(cm_id); + + wait_for_completion(&cm_id_priv->destroy_comp); + + dealloc_work_entries(cm_id_priv); + + kfree(cm_id_priv); +} +EXPORT_SYMBOL(iw_destroy_cm_id); + +/* + * CM_ID <-- LISTEN + * + * Start listening for connect requests. Generates one CONNECT_REQUEST + * event for each inbound connect request. + */ +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, backlog); + if (ret) + return ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + cm_id_priv->state = IW_CM_STATE_LISTEN; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + if (ret) + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + default: + ret = -EINVAL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} +EXPORT_SYMBOL(iw_cm_listen); + +/* + * CM_ID <-- IDLE + * + * Rejects an inbound connection request. No events are generated. + */ +int iw_cm_reject(struct iw_cm_id *cm_id, + const void *private_data, + u8 private_data_len) +{ + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->reject(cm_id, private_data, + private_data_len); + + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} +EXPORT_SYMBOL(iw_cm_reject); + +/* + * CM_ID <-- ESTABLISHED + * + * Accepts an inbound connection request and generates an ESTABLISHED + * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block + * until the ESTABLISHED event is received from the provider. + */ +int iw_cm_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; + unsigned long flags; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->accept(cm_id, iw_param); + if (ret) { + /* An error on accept precludes provider events */ + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_accept); + +/* + * Active Side: CM_ID <-- CONN_SENT + * + * If successful, results in the generation of a CONNECT_REPLY + * event. iw_cm_disconnect and iw_cm_destroy will block until the + * CONNECT_REPLY event is received from the provider. + */ +int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct iwcm_id_private *cm_id_priv; + int ret = 0; + unsigned long flags; + struct ib_qp *qp; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + ret = alloc_work_entries(cm_id_priv, 4); + if (ret) + return ret; + + set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->state != IW_CM_STATE_IDLE) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + return -EINVAL; + } + + /* Get the ib_qp given the QPN */ + qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + if (!qp) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + cm_id->device->iwcm->add_ref(qp); + cm_id_priv->qp = qp; + cm_id_priv->state = IW_CM_STATE_CONN_SENT; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = cm_id->device->iwcm->connect(cm_id, iw_param); + if (ret) { + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->qp) { + cm_id->device->iwcm->rem_ref(qp); + cm_id_priv->qp = NULL; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + cm_id_priv->state = IW_CM_STATE_IDLE; + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + wake_up_all(&cm_id_priv->connect_wait); + } + + return ret; +} +EXPORT_SYMBOL(iw_cm_connect); + +/* + * Passive Side: new CM_ID <-- CONN_RECV + * + * Handles an inbound connect request. The function creates a new + * iw_cm_id to represent the new connection and inherits the client + * callback function and other attributes from the listening parent. + * + * The work item contains a pointer to the listen_cm_id and the event. The + * listen_cm_id contains the client cm_handler, context and + * device. These are copied when the device is cloned. The event + * contains the new four tuple. + * + * An error on the child should not affect the parent, so this + * function does not return a value. + */ +static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + struct iw_cm_id *cm_id; + struct iwcm_id_private *cm_id_priv; + int ret; + + /* + * The provider should never generate a connection request + * event with a bad status. + */ + BUG_ON(iw_event->status); + + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + return; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + + cm_id = iw_create_cm_id(listen_id_priv->id.device, + listen_id_priv->id.cm_handler, + listen_id_priv->id.context); + /* If the cm_id could not be created, ignore the request */ + if (IS_ERR(cm_id)) + return; + + cm_id->provider_data = iw_event->provider_data; + cm_id->local_addr = iw_event->local_addr; + cm_id->remote_addr = iw_event->remote_addr; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + cm_id_priv->state = IW_CM_STATE_CONN_RECV; + + ret = alloc_work_entries(cm_id_priv, 3); + if (ret) { + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + return; + } + + /* Call the client CM handler */ + ret = cm_id->cm_handler(cm_id, iw_event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(cm_id); + if (atomic_read(&cm_id_priv->refcount)==0) + kfree(cm_id); + } + + if (iw_event->private_data_len) + kfree(iw_event->private_data); +} + +/* + * Passive Side: CM_ID <-- ESTABLISHED + * + * The provider generated an ESTABLISHED event which means that + * the MPA negotion has completed successfully and we are now in MPA + * FPDU mode. + * + * This event can only be received in the CONN_RECV state. If the + * remote peer closed, the ESTABLISHED event would be received followed + * by the CLOSE event. If the app closes, it will block until we wake + * it up after processing this event. + */ +static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + + /* + * We clear the CONNECT_WAIT bit here to allow the callback + * function to call iw_cm_disconnect. Calling iw_destroy_cm_id + * from a callback handler is not allowed. + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * Active Side: CM_ID <-- ESTABLISHED + * + * The app has called connect and is waiting for the established event to + * post it's requests to the server. This event will wake up anyone + * blocked in iw_cm_disconnect or iw_destroy_id. + */ +static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + /* + * Clear the connect wait bit so a callback function calling + * iw_cm_disconnect will not wait and deadlock this thread + */ + clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); + BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); + if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { + cm_id_priv->id.local_addr = iw_event->local_addr; + cm_id_priv->id.remote_addr = iw_event->remote_addr; + cm_id_priv->state = IW_CM_STATE_ESTABLISHED; + } else { + /* REJECTED or RESET */ + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + cm_id_priv->state = IW_CM_STATE_IDLE; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + + if (iw_event->private_data_len) + kfree(iw_event->private_data); + + /* Wake up waiters on connect complete */ + wake_up_all(&cm_id_priv->connect_wait); + + return ret; +} + +/* + * CM_ID <-- CLOSING + * + * If in the ESTABLISHED state, move to CLOSING. + */ +static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) + cm_id_priv->state = IW_CM_STATE_CLOSING; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * CM_ID <-- IDLE + * + * If in the ESTBLISHED or CLOSING states, the QP will have have been + * moved by the provider to the ERR state. Disassociate the CM_ID from + * the QP, move to IDLE, and remove the 'connected' reference. + * + * If in some other state, the cm_id was destroyed asynchronously. + * This is the last reference that will result in waking up + * the app thread blocked in iw_destroy_cm_id. + */ +static int cm_close_handler(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + if (cm_id_priv->qp) { + cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->qp = NULL; + } + switch (cm_id_priv->state) { + case IW_CM_STATE_ESTABLISHED: + case IW_CM_STATE_CLOSING: + cm_id_priv->state = IW_CM_STATE_IDLE; + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); + spin_lock_irqsave(&cm_id_priv->lock, flags); + break; + case IW_CM_STATE_DESTROYING: + break; + default: + BUG(); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + return ret; +} + +static int process_event(struct iwcm_id_private *cm_id_priv, + struct iw_cm_event *iw_event) +{ + int ret = 0; + + switch (iw_event->event) { + case IW_CM_EVENT_CONNECT_REQUEST: + cm_conn_req_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CONNECT_REPLY: + ret = cm_conn_rep_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_ESTABLISHED: + ret = cm_conn_est_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_DISCONNECT: + cm_disconnect_handler(cm_id_priv, iw_event); + break; + case IW_CM_EVENT_CLOSE: + ret = cm_close_handler(cm_id_priv, iw_event); + break; + default: + BUG(); + } + + return ret; +} + +/* + * Process events on the work_list for the cm_id. If the callback + * function requests that the cm_id be deleted, a flag is set in the + * cm_id flags to indicate that when the last reference is + * removed, the cm_id is to be destroyed. This is necessary to + * distinguish between an object that will be destroyed by the app + * thread asleep on the destroy_comp list vs. an object destroyed + * here synchronously when the last reference is removed. + */ +static void cm_work_handler(void *arg) +{ + struct iwcm_work *work = arg, lwork; + struct iwcm_id_private *cm_id_priv = work->cm_id; + unsigned long flags; + int empty; + int ret = 0; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + empty = list_empty(&cm_id_priv->work_list); + while (!empty) { + work = list_entry(cm_id_priv->work_list.next, + struct iwcm_work, list); + list_del_init(&work->list); + empty = list_empty(&cm_id_priv->work_list); + lwork = *work; + put_work(work); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + + ret = process_event(cm_id_priv, &work->event); + if (ret) { + set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + destroy_cm_id(&cm_id_priv->id); + } + BUG_ON(atomic_read(&cm_id_priv->refcount)==0); + if (iwcm_deref_id(cm_id_priv)) + return; + + if (atomic_read(&cm_id_priv->refcount)==0 && + test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + dealloc_work_entries(cm_id_priv); + kfree(cm_id_priv); + return; + } + spin_lock_irqsave(&cm_id_priv->lock, flags); + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); +} + +/* + * This function is called on interrupt context. Schedule events on + * the iwcm_wq thread to allow callback functions to downcall into + * the CM and/or block. Events are queued to a per-CM_ID + * work_list. If this is the first event on the work_list, the work + * element is also queued on the iwcm_wq thread. + * + * Each event holds a reference on the cm_id. Until the last posted + * event has been delivered and processed, the cm_id cannot be + * deleted. + * + * Returns: + * 0 - the event was handled. + * -ENOMEM - the event was not handled due to lack of resources. + */ +static int cm_event_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct iwcm_work *work; + struct iwcm_id_private *cm_id_priv; + unsigned long flags; + int ret = 0; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + + spin_lock_irqsave(&cm_id_priv->lock, flags); + work = get_work(cm_id_priv); + if (!work) { + ret = -ENOMEM; + goto out; + } + + INIT_WORK(&work->work, cm_work_handler, work); + work->cm_id = cm_id_priv; + work->event = *iw_event; + + if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || + work->event.event == IW_CM_EVENT_CONNECT_REPLY) && + work->event.private_data_len) { + ret = copy_private_data(cm_id_priv, &work->event); + if (ret) { + put_work(work); + goto out; + } + } + + atomic_inc(&cm_id_priv->refcount); + if (list_empty(&cm_id_priv->work_list)) { + list_add_tail(&work->list, &cm_id_priv->work_list); + queue_work(iwcm_wq, &work->work); + } else + list_add_tail(&work->list, &cm_id_priv->work_list); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_READ; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cm_id_priv->lock, flags); + switch (cm_id_priv->state) { + case IW_CM_STATE_IDLE: + case IW_CM_STATE_CONN_SENT: + case IW_CM_STATE_CONN_RECV: + case IW_CM_STATE_ESTABLISHED: + *qp_attr_mask = 0; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; +} + +int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask) +{ + struct iwcm_id_private *cm_id_priv; + int ret; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + switch (qp_attr->qp_state) { + case IB_QPS_INIT: + case IB_QPS_RTR: + ret = iwcm_init_qp_init_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + case IB_QPS_RTS: + ret = iwcm_init_qp_rts_attr(cm_id_priv, + qp_attr, qp_attr_mask); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL(iw_cm_init_qp_attr); + +static int __init iw_cm_init(void) +{ + iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + if (!iwcm_wq) + return -ENOMEM; + + return 0; +} + +static void __exit iw_cm_cleanup(void) +{ + destroy_workqueue(iwcm_wq); +} + +module_init(iw_cm_init); +module_exit(iw_cm_cleanup); diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h new file mode 100644 index 000000000000..3f6cc82564c8 --- /dev/null +++ b/drivers/infiniband/core/iwcm.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef IWCM_H +#define IWCM_H + +enum iw_cm_state { + IW_CM_STATE_IDLE, /* unbound, inactive */ + IW_CM_STATE_LISTEN, /* listen waiting for connect */ + IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */ + IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */ + IW_CM_STATE_ESTABLISHED, /* established */ + IW_CM_STATE_CLOSING, /* disconnect */ + IW_CM_STATE_DESTROYING /* object being deleted */ +}; + +struct iwcm_id_private { + struct iw_cm_id id; + enum iw_cm_state state; + unsigned long flags; + struct ib_qp *qp; + struct completion destroy_comp; + wait_queue_head_t connect_wait; + struct list_head work_list; + spinlock_t lock; + atomic_t refcount; + struct list_head work_free_list; +}; + +#define IWCM_F_CALLBACK_DESTROY 1 +#define IWCM_F_CONNECT_WAIT 2 + +#endif /* IWCM_H */ diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h new file mode 100644 index 000000000000..aeefa9b740dc --- /dev/null +++ b/include/rdma/iw_cm.h @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef IW_CM_H +#define IW_CM_H + +#include +#include + +struct iw_cm_id; + +enum iw_cm_event_type { + IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */ + IW_CM_EVENT_CONNECT_REPLY, /* reply from active connect request */ + IW_CM_EVENT_ESTABLISHED, /* passive side accept successful */ + IW_CM_EVENT_DISCONNECT, /* orderly shutdown */ + IW_CM_EVENT_CLOSE /* close complete */ +}; + +enum iw_cm_event_status { + IW_CM_EVENT_STATUS_OK = 0, /* request successful */ + IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */ + IW_CM_EVENT_STATUS_REJECTED, /* connect request rejected */ + IW_CM_EVENT_STATUS_TIMEOUT, /* the operation timed out */ + IW_CM_EVENT_STATUS_RESET, /* reset from remote peer */ + IW_CM_EVENT_STATUS_EINVAL, /* asynchronous failure for bad parm */ +}; + +struct iw_cm_event { + enum iw_cm_event_type event; + enum iw_cm_event_status status; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + void *private_data; + u8 private_data_len; + void* provider_data; +}; + +/** + * iw_cm_handler - Function to be called by the IW CM when delivering events + * to the client. + * + * @cm_id: The IW CM identifier associated with the event. + * @event: Pointer to the event structure. + */ +typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id, + struct iw_cm_event *event); + +/** + * iw_event_handler - Function called by the provider when delivering provider + * events to the IW CM. Returns either 0 indicating the event was processed + * or -errno if the event could not be processed. + * + * @cm_id: The IW CM identifier associated with the event. + * @event: Pointer to the event structure. + */ +typedef int (*iw_event_handler)(struct iw_cm_id *cm_id, + struct iw_cm_event *event); + +struct iw_cm_id { + iw_cm_handler cm_handler; /* client callback function */ + void *context; /* client cb context */ + struct ib_device *device; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + void *provider_data; /* provider private data */ + iw_event_handler event_handler; /* cb for provider + events */ + /* Used by provider to add and remove refs on IW cm_id */ + void (*add_ref)(struct iw_cm_id *); + void (*rem_ref)(struct iw_cm_id *); +}; + +struct iw_cm_conn_param { + const void *private_data; + u16 private_data_len; + u32 ord; + u32 ird; + u32 qpn; +}; + +struct iw_cm_verbs { + void (*add_ref)(struct ib_qp *qp); + + void (*rem_ref)(struct ib_qp *qp); + + struct ib_qp * (*get_qp)(struct ib_device *device, + int qpn); + + int (*connect)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + + int (*accept)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + + int (*reject)(struct iw_cm_id *cm_id, + const void *pdata, u8 pdata_len); + + int (*create_listen)(struct iw_cm_id *cm_id, + int backlog); + + int (*destroy_listen)(struct iw_cm_id *cm_id); +}; + +/** + * iw_create_cm_id - Create an IW CM identifier. + * + * @device: The IB device on which to create the IW CM identier. + * @event_handler: User callback invoked to report events associated with the + * returned IW CM identifier. + * @context: User specified context associated with the id. + */ +struct iw_cm_id *iw_create_cm_id(struct ib_device *device, + iw_cm_handler cm_handler, void *context); + +/** + * iw_destroy_cm_id - Destroy an IW CM identifier. + * + * @cm_id: The previously created IW CM identifier to destroy. + * + * The client can assume that no events will be delivered for the CM ID after + * this function returns. + */ +void iw_destroy_cm_id(struct iw_cm_id *cm_id); + +/** + * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP + * + * @cm_id: The IW CM idenfier to unbind from the QP. + * @qp: The QP + * + * This is called by the provider when destroying the QP to ensure + * that any references held by the IWCM are released. It may also + * be called by the IWCM when destroying a CM_ID to that any + * references held by the provider are released. + */ +void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp); + +/** + * iw_cm_get_qp - Return the ib_qp associated with a QPN + * + * @ib_device: The IB device + * @qpn: The queue pair number + */ +struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn); + +/** + * iw_cm_listen - Listen for incoming connection requests on the + * specified IW CM id. + * + * @cm_id: The IW CM identifier. + * @backlog: The maximum number of outstanding un-accepted inbound listen + * requests to queue. + * + * The source address and port number are specified in the IW CM identifier + * structure. + */ +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog); + +/** + * iw_cm_accept - Called to accept an incoming connect request. + * + * @cm_id: The IW CM identifier associated with the connection request. + * @iw_param: Pointer to a structure containing connection establishment + * parameters. + * + * The specified cm_id will have been provided in the event data for a + * CONNECT_REQUEST event. Subsequent events related to this connection will be + * delivered to the specified IW CM identifier prior and may occur prior to + * the return of this function. If this function returns a non-zero value, the + * client can assume that no events will be delivered to the specified IW CM + * identifier. + */ +int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); + +/** + * iw_cm_reject - Reject an incoming connection request. + * + * @cm_id: Connection identifier associated with the request. + * @private_daa: Pointer to data to deliver to the remote peer as part of the + * reject message. + * @private_data_len: The number of bytes in the private_data parameter. + * + * The client can assume that no events will be delivered to the specified IW + * CM identifier following the return of this function. The private_data + * buffer is available for reuse when this function returns. + */ +int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, + u8 private_data_len); + +/** + * iw_cm_connect - Called to request a connection to a remote peer. + * + * @cm_id: The IW CM identifier for the connection. + * @iw_param: Pointer to a structure containing connection establishment + * parameters. + * + * Events may be delivered to the specified IW CM identifier prior to the + * return of this function. If this function returns a non-zero value, the + * client can assume that no events will be delivered to the specified IW CM + * identifier. + */ +int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); + +/** + * iw_cm_disconnect - Close the specified connection. + * + * @cm_id: The IW CM identifier to close. + * @abrupt: If 0, the connection will be closed gracefully, otherwise, the + * connection will be reset. + * + * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is + * delivered. + */ +int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt); + +/** + * iw_cm_init_qp_attr - Called to initialize the attributes of the QP + * associated with a IW CM identifier. + * + * @cm_id: The IW CM identifier associated with the QP + * @qp_attr: Pointer to the QP attributes structure. + * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are + * valid. + */ +int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, + int *qp_attr_mask); + +#endif /* IW_CM_H */ From 07ebafbaaa72aa6a35472879008f5a1d1d469a0c Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Thu, 3 Aug 2006 16:02:42 -0500 Subject: [PATCH 0773/1063] RDMA: iWARP Core Changes. Modifications to the existing rdma header files, core files, drivers, and ulp files to support iWARP, including: - Hook iWARP CM into the build system and use it in rdma_cm. - Convert enum ib_node_type to enum rdma_node_type, which includes the possibility of RDMA_NODE_RNIC, and update everything for this. Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/core/Makefile | 4 +- drivers/infiniband/core/addr.c | 18 +- drivers/infiniband/core/cache.c | 5 +- drivers/infiniband/core/cm.c | 3 + drivers/infiniband/core/cma.c | 356 ++++++++++++++++--- drivers/infiniband/core/device.c | 4 +- drivers/infiniband/core/mad.c | 7 +- drivers/infiniband/core/sa_query.c | 5 +- drivers/infiniband/core/smi.c | 16 +- drivers/infiniband/core/sysfs.c | 11 +- drivers/infiniband/core/ucm.c | 3 +- drivers/infiniband/core/user_mad.c | 5 +- drivers/infiniband/core/verbs.c | 17 + drivers/infiniband/hw/ehca/ehca_main.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_addr.h | 17 +- include/rdma/ib_verbs.h | 25 +- 20 files changed, 431 insertions(+), 81 deletions(-) diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 68e73ec2d1f8..163d991eb8c9 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,7 +1,7 @@ infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o $(infiniband-y) + ib_cm.o iw_cm.o $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o @@ -14,6 +14,8 @@ ib_sa-y := sa_query.o ib_cm-y := cm.o +iw_cm-y := iwcm.o + rdma_cm-y := cma.o ib_addr-y := addr.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d8e54e002ce3..9cbf09e2052f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -61,12 +61,15 @@ static LIST_HEAD(req_list); static DECLARE_WORK(work, process_req, NULL); static struct workqueue_struct *addr_wq; -static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, - unsigned char *dst_dev_addr) +int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + const unsigned char *dst_dev_addr) { switch (dev->type) { case ARPHRD_INFINIBAND: - dev_addr->dev_type = IB_NODE_CA; + dev_addr->dev_type = RDMA_NODE_IB_CA; + break; + case ARPHRD_ETHER: + dev_addr->dev_type = RDMA_NODE_RNIC; break; default: return -EADDRNOTAVAIL; @@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); return 0; } +EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { @@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) if (!dev) return -EADDRNOTAVAIL; - ret = copy_addr(dev_addr, dev, NULL); + ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } @@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, /* If the device does ARP internally, return 'done' */ if (rt->idev->dev->flags & IFF_NOARP) { - copy_addr(addr, rt->idev->dev, NULL); + rdma_copy_addr(addr, rt->idev->dev, NULL); goto put; } @@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, src_in->sin_addr.s_addr = rt->rt_src; } - ret = copy_addr(addr, neigh->dev, neigh->ha); + ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); release: neigh_release(neigh); put: @@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, if (ZERONET(src_ip)) { src_in->sin_family = dst_in->sin_family; src_in->sin_addr.s_addr = dst_ip; - ret = copy_addr(addr, dev, dev->dev_addr); + ret = rdma_copy_addr(addr, dev, dev->dev_addr); } else if (LOOPBACK(src_ip)) { ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); if (!ret) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 75313ade2e0d..20e9f64e67a6 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -62,12 +62,13 @@ struct ib_update_work { static inline int start_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : 1; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; } static inline int end_port(struct ib_device *device) { - return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; } int ib_get_cached_gid(struct ib_device *device, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1c145fe92a54..e130d2e89515 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3280,6 +3280,9 @@ static void cm_add_one(struct ib_device *device) int ret; u8 i; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9d58bb59cd45..e88a7c652ca0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -43,6 +44,7 @@ #include #include #include +#include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); @@ -124,6 +126,7 @@ struct rdma_id_private { int query_id; union { struct ib_cm_id *ib; + struct iw_cm_id *iw; } cm_id; u32 seq_num; @@ -259,14 +262,23 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) id_priv->cma_dev = NULL; } -static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv) { + enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type; struct cma_device *cma_dev; union ib_gid gid; int ret = -ENODEV; - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), - + switch (rdma_node_get_transport(dev_type)) { + case RDMA_TRANSPORT_IB: + ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + case RDMA_TRANSPORT_IWARP: + iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + break; + default: + return -ENODEV; + } mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, @@ -280,16 +292,6 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) return ret; } -static int cma_acquire_dev(struct rdma_id_private *id_priv) -{ - switch (id_priv->id.route.addr.dev_addr.dev_type) { - case IB_NODE_CA: - return cma_acquire_ib_dev(id_priv); - default: - return -ENODEV; - } -} - static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) @@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) IB_QP_PKEY_INDEX | IB_QP_PORT); } +static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_INIT; + qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; + + return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { @@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (IS_ERR(qp)) return PTR_ERR(qp); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_init_ib_qp(id_priv, qp); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_init_iw_qp(id_priv, qp); + break; default: ret = -ENOSYS; break; @@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret; id_priv = container_of(id, struct rdma_id_private, id); - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, + qp_attr_mask); + break; default: ret = -ENOSYS; break; @@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv, static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv) cma_exch(id_priv, CMA_DESTROYING); if (id_priv->cma_dev) { - switch (id_priv->id.device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { + case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } @@ -690,11 +713,15 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_cancel_operation(id_priv, state); if (id_priv->cma_dev) { - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); break; + case RDMA_TRANSPORT_IWARP: + if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) + iw_destroy_cm_id(id_priv->cm_id.iw); + break; default: break; } @@ -869,7 +896,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id, ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); - rt->addr.dev_addr.dev_type = IB_NODE_CA; + rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -898,7 +925,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } atomic_inc(&conn_id->dev_remove); - ret = cma_acquire_ib_dev(conn_id); + ret = cma_acquire_dev(conn_id); if (ret) { ret = -ENODEV; cma_release_remove(conn_id); @@ -982,6 +1009,128 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, } } +static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) +{ + struct rdma_id_private *id_priv = iw_id->context; + enum rdma_cm_event_type event = 0; + struct sockaddr_in *sin; + int ret = 0; + + atomic_inc(&id_priv->dev_remove); + + switch (iw_event->event) { + case IW_CM_EVENT_CLOSE: + event = RDMA_CM_EVENT_DISCONNECTED; + break; + case IW_CM_EVENT_CONNECT_REPLY: + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; + *sin = iw_event->remote_addr; + if (iw_event->status) + event = RDMA_CM_EVENT_REJECTED; + else + event = RDMA_CM_EVENT_ESTABLISHED; + break; + case IW_CM_EVENT_ESTABLISHED: + event = RDMA_CM_EVENT_ESTABLISHED; + break; + default: + BUG_ON(1); + } + + ret = cma_notify_user(id_priv, event, iw_event->status, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* Destroy the CM ID by returning a non-zero value. */ + id_priv->cm_id.iw = NULL; + cma_exch(id_priv, CMA_DESTROYING); + cma_release_remove(id_priv); + rdma_destroy_id(&id_priv->id); + return ret; + } + + cma_release_remove(id_priv); + return ret; +} + +static int iw_conn_req_handler(struct iw_cm_id *cm_id, + struct iw_cm_event *iw_event) +{ + struct rdma_cm_id *new_cm_id; + struct rdma_id_private *listen_id, *conn_id; + struct sockaddr_in *sin; + struct net_device *dev = NULL; + int ret; + + listen_id = cm_id->context; + atomic_inc(&listen_id->dev_remove); + if (!cma_comp(listen_id, CMA_LISTEN)) { + ret = -ECONNABORTED; + goto out; + } + + /* Create a new RDMA id for the new IW CM ID */ + new_cm_id = rdma_create_id(listen_id->id.event_handler, + listen_id->id.context, + RDMA_PS_TCP); + if (!new_cm_id) { + ret = -ENOMEM; + goto out; + } + conn_id = container_of(new_cm_id, struct rdma_id_private, id); + atomic_inc(&conn_id->dev_remove); + conn_id->state = CMA_CONNECT; + + dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + if (!dev) { + ret = -EADDRNOTAVAIL; + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + ret = cma_acquire_dev(conn_id); + if (ret) { + cma_release_remove(conn_id); + rdma_destroy_id(new_cm_id); + goto out; + } + + conn_id->cm_id.iw = cm_id; + cm_id->context = conn_id; + cm_id->cm_handler = cma_iw_handler; + + sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; + *sin = iw_event->local_addr; + sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; + *sin = iw_event->remote_addr; + + ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0, + iw_event->private_data, + iw_event->private_data_len); + if (ret) { + /* User wants to destroy the CM ID */ + conn_id->cm_id.iw = NULL; + cma_exch(conn_id, CMA_DESTROYING); + cma_release_remove(conn_id); + rdma_destroy_id(&conn_id->id); + } + +out: + if (dev) + dev_put(dev); + cma_release_remove(listen_id); + return ret; +} + static int cma_ib_listen(struct rdma_id_private *id_priv) { struct ib_cm_compare_data compare_data; @@ -1011,6 +1160,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) return ret; } +static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) +{ + int ret; + struct sockaddr_in *sin; + + id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, + iw_conn_req_handler, + id_priv); + if (IS_ERR(id_priv->cm_id.iw)) + return PTR_ERR(id_priv->cm_id.iw); + + sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + id_priv->cm_id.iw->local_addr = *sin; + + ret = iw_cm_listen(id_priv->cm_id.iw, backlog); + + if (ret) { + iw_destroy_cm_id(id_priv->cm_id.iw); + id_priv->cm_id.iw = NULL; + } + + return ret; +} + static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -1087,12 +1260,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_ib_listen(id_priv); if (ret) goto err; break; + case RDMA_TRANSPORT_IWARP: + ret = cma_iw_listen(id_priv, backlog); + if (ret) + goto err; + break; default: ret = -ENOSYS; goto err; @@ -1231,6 +1409,23 @@ err: } EXPORT_SYMBOL(rdma_set_ib_paths); +static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) +{ + struct cma_work *work; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler, work); + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1241,10 +1436,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_resolve_ib_route(id_priv, timeout_ms); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_resolve_iw_route(id_priv, timeout_ms); + break; default: ret = -ENOSYS; break; @@ -1649,6 +1847,47 @@ out: return ret; } +static int cma_connect_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_id *cm_id; + struct sockaddr_in* sin; + int ret; + struct iw_cm_conn_param iw_param; + + cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + goto out; + } + + id_priv->cm_id.iw = cm_id; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; + cm_id->local_addr = *sin; + + sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; + cm_id->remote_addr = *sin; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) { + iw_destroy_cm_id(cm_id); + return ret; + } + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) + iw_param.qpn = id_priv->qp_num; + else + iw_param.qpn = conn_param->qp_num; + ret = iw_cm_connect(cm_id, &iw_param); +out: + return ret; +} + int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1664,10 +1903,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = cma_connect_ib(id_priv, conn_param); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_connect_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1708,6 +1950,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, return ib_send_cm_rep(id_priv->cm_id.ib, &rep); } +static int cma_accept_iw(struct rdma_id_private *id_priv, + struct rdma_conn_param *conn_param) +{ + struct iw_cm_conn_param iw_param; + int ret; + + ret = cma_modify_qp_rtr(&id_priv->id); + if (ret) + return ret; + + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + if (id_priv->id.qp) { + iw_param.qpn = id_priv->qp_num; + } else + iw_param.qpn = conn_param->qp_num; + + return iw_cm_accept(id_priv->cm_id.iw, &iw_param); +} + int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; @@ -1723,13 +1987,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); break; + case RDMA_TRANSPORT_IWARP: + ret = cma_accept_iw(id_priv, conn_param); + break; default: ret = -ENOSYS; break; @@ -1756,12 +2023,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!cma_comp(id_priv, CMA_CONNECT)) return -EINVAL; - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_reject(id_priv->cm_id.iw, + private_data, private_data_len); + break; default: ret = -ENOSYS; break; @@ -1780,17 +2051,20 @@ int rdma_disconnect(struct rdma_cm_id *id) !cma_comp(id_priv, CMA_DISCONNECT)) return -EINVAL; - ret = cma_modify_qp_err(id); - if (ret) - goto out; - - switch (id->device->node_type) { - case IB_NODE_CA: + switch (rdma_node_get_transport(id->device->node_type)) { + case RDMA_TRANSPORT_IB: + ret = cma_modify_qp_err(id); + if (ret) + goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); break; + case RDMA_TRANSPORT_IWARP: + ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); + break; default: + ret = -EINVAL; break; } out: diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b2f3cb91d9bc..d978fbe97535 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) @@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { if (port_num) return -EINVAL; } else if (port_num < 1 || port_num > device->phys_port_cnt) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 32d3028b274b..082f03c158f0 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (device->node_type == IB_NODE_SWITCH) { + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; } else { @@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; } else { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index df762ba4868f..ca8760a7d88c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -919,7 +919,10 @@ static void ib_sa_add_one(struct ib_device *device) struct ib_sa_device *sa_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 35852e794e26..54b81e17ad50 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-9:2 */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->return_path set when received */ @@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == hop_cnt) { /* smp->return_path set when received */ smp->hop_ptr++; - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->hop_ptr--; @@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp, if (hop_ptr == 1) { smp->hop_ptr--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_slid == IB_LID_PERMISSIVE); } @@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; smp->return_path[hop_ptr] = port_num; @@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, smp->return_path[hop_ptr] = port_num; /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH || + return (node_type == RDMA_NODE_IB_SWITCH || smp->dr_dlid == IB_LID_PERMISSIVE); } @@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, /* C14-13:2 */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) { - if (node_type != IB_NODE_SWITCH) + if (node_type != RDMA_NODE_IB_SWITCH) return 0; /* smp->hop_ptr updated when sending */ @@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp, return 1; } /* smp->hop_ptr updated when sending */ - return (node_type == IB_NODE_SWITCH); + return (node_type == RDMA_NODE_IB_SWITCH); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index fb6660564a30..709323c14c5d 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) return -ENODEV; switch (dev->node_type) { - case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); - case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); - case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); - default: return sprintf(buf, "%d: \n", dev->node_type); + case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); + case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); + case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); + case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); + default: return sprintf(buf, "%d: \n", dev->node_type); } } @@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device) if (ret) goto err_put; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0); if (ret) goto err_put; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index e74c964af7fa..ad4f4d5c2924 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device) { struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext) + if (!device->alloc_ucontext || + rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8a455aec758f..807fbd6b8414 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device) struct ib_umad_device *umad_dev; int s, e, i; - if (device->node_type == IB_NODE_SWITCH) + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; else { s = 1; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 06f98e9e14f9..8b5dd3649bbf 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + case RDMA_NODE_IB_SWITCH: + case RDMA_NODE_IB_ROUTER: + return RDMA_TRANSPORT_IB; + case RDMA_NODE_RNIC: + return RDMA_TRANSPORT_IWARP; + default: + BUG(); + return 0; + } +} +EXPORT_SYMBOL(rdma_node_get_transport); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 2a99f2d13cdb..2380994418a5 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -269,7 +269,7 @@ int ehca_register_device(struct ehca_shca *shca) (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - shca->ib_device.node_type = IB_NODE_CA; + shca->ib_device.node_type = RDMA_NODE_IB_CA; shca->ib_device.phys_port_cnt = shca->num_ports; shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; shca->ib_device.query_device = ehca_query_device; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index fbda7739715f..b8381c5e72bd 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1538,7 +1538,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->node_type = IB_NODE_CA; + dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->dma_device = &dd->pcidev->dev; dev->class_dev.dev = dev->dma_device; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 265b1d1c4a62..981fe2eebdfa 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - dev->ib_dev.node_type = IB_NODE_CA; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; dev->ib_dev.class_dev.dev = &dev->pdev->dev; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 36d76987a481..e9a7659eb1d7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1111,13 +1111,16 @@ static void ipoib_add_one(struct ib_device *device) struct ipoib_dev_priv *priv; int s, e, p; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) return; INIT_LIST_HEAD(dev_list); - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { @@ -1141,6 +1144,9 @@ static void ipoib_remove_one(struct ib_device *device) struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + dev_list = ib_get_client_data(device, &ipoib_client); list_for_each_entry_safe(priv, tmp, dev_list, list) { diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 61c13d1e0506..feb1fcd0f2fb 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1899,7 +1899,7 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->fmr_pool)) srp_dev->fmr_pool = NULL; - if (device->node_type == IB_NODE_SWITCH) { + if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 0ff67398928d..81b62307621d 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -40,7 +40,7 @@ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; - enum ib_node_type dev_type; + enum rdma_node_type dev_type; }; /** @@ -72,6 +72,9 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr); +int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, + const unsigned char *dst_dev_addr); + static inline int ip_addr_size(struct sockaddr *addr) { return addr->sa_family == AF_INET6 ? @@ -113,4 +116,16 @@ static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); } +static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); +} + +static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); +} + #endif /* IB_ADDR_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 61eed3996117..8eacc3510993 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -56,12 +56,22 @@ union ib_gid { } global; }; -enum ib_node_type { - IB_NODE_CA = 1, - IB_NODE_SWITCH, - IB_NODE_ROUTER +enum rdma_node_type { + /* IB values map to NodeInfo:NodeType. */ + RDMA_NODE_IB_CA = 1, + RDMA_NODE_IB_SWITCH, + RDMA_NODE_IB_ROUTER, + RDMA_NODE_RNIC }; +enum rdma_transport_type { + RDMA_TRANSPORT_IB, + RDMA_TRANSPORT_IWARP +}; + +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; + enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, IB_DEVICE_BAD_PKEY_CNTR = (1<<1), @@ -78,6 +88,9 @@ enum ib_device_cap_flags { IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), IB_DEVICE_SRQ_RESIZE = (1<<13), IB_DEVICE_N_NOTIFY_CQ = (1<<14), + IB_DEVICE_ZERO_STAG = (1<<15), + IB_DEVICE_SEND_W_INV = (1<<16), + IB_DEVICE_MEM_WINDOW = (1<<17) }; enum ib_atomic_cap { @@ -835,6 +848,8 @@ struct ib_cache { u8 *lmc_cache; }; +struct iw_cm_verbs; + struct ib_device { struct device *dma_device; @@ -851,6 +866,8 @@ struct ib_device { u32 flags; + struct iw_cm_verbs *iwcm; + int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr); int (*query_port)(struct ib_device *device, From f94b533d091a42da92d908eb7b3f9ade1923f90d Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Fri, 22 Sep 2006 15:22:48 -0700 Subject: [PATCH 0774/1063] RDMA/amso1100: Add driver for Ammasso 1100 RNIC Add a driver for the Ammasso 1100 gigabit ethernet RNIC. Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- MAINTAINERS | 8 + drivers/infiniband/Kconfig | 1 + drivers/infiniband/Makefile | 1 + drivers/infiniband/hw/amso1100/Kbuild | 8 + drivers/infiniband/hw/amso1100/Kconfig | 15 + drivers/infiniband/hw/amso1100/c2.c | 1255 +++++++++++++++ drivers/infiniband/hw/amso1100/c2.h | 551 +++++++ drivers/infiniband/hw/amso1100/c2_ae.c | 321 ++++ drivers/infiniband/hw/amso1100/c2_ae.h | 108 ++ drivers/infiniband/hw/amso1100/c2_alloc.c | 144 ++ drivers/infiniband/hw/amso1100/c2_cm.c | 452 ++++++ drivers/infiniband/hw/amso1100/c2_cq.c | 433 +++++ drivers/infiniband/hw/amso1100/c2_intr.c | 209 +++ drivers/infiniband/hw/amso1100/c2_mm.c | 375 +++++ drivers/infiniband/hw/amso1100/c2_mq.c | 174 ++ drivers/infiniband/hw/amso1100/c2_mq.h | 106 ++ drivers/infiniband/hw/amso1100/c2_pd.c | 89 + drivers/infiniband/hw/amso1100/c2_provider.c | 869 ++++++++++ drivers/infiniband/hw/amso1100/c2_provider.h | 181 +++ drivers/infiniband/hw/amso1100/c2_qp.c | 975 +++++++++++ drivers/infiniband/hw/amso1100/c2_rnic.c | 663 ++++++++ drivers/infiniband/hw/amso1100/c2_status.h | 158 ++ drivers/infiniband/hw/amso1100/c2_user.h | 82 + drivers/infiniband/hw/amso1100/c2_vq.c | 260 +++ drivers/infiniband/hw/amso1100/c2_vq.h | 63 + drivers/infiniband/hw/amso1100/c2_wr.h | 1520 ++++++++++++++++++ 26 files changed, 9021 insertions(+) create mode 100644 drivers/infiniband/hw/amso1100/Kbuild create mode 100644 drivers/infiniband/hw/amso1100/Kconfig create mode 100644 drivers/infiniband/hw/amso1100/c2.c create mode 100644 drivers/infiniband/hw/amso1100/c2.h create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.c create mode 100644 drivers/infiniband/hw/amso1100/c2_ae.h create mode 100644 drivers/infiniband/hw/amso1100/c2_alloc.c create mode 100644 drivers/infiniband/hw/amso1100/c2_cm.c create mode 100644 drivers/infiniband/hw/amso1100/c2_cq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_intr.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mm.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_mq.h create mode 100644 drivers/infiniband/hw/amso1100/c2_pd.c create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.c create mode 100644 drivers/infiniband/hw/amso1100/c2_provider.h create mode 100644 drivers/infiniband/hw/amso1100/c2_qp.c create mode 100644 drivers/infiniband/hw/amso1100/c2_rnic.c create mode 100644 drivers/infiniband/hw/amso1100/c2_status.h create mode 100644 drivers/infiniband/hw/amso1100/c2_user.h create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.c create mode 100644 drivers/infiniband/hw/amso1100/c2_vq.h create mode 100644 drivers/infiniband/hw/amso1100/c2_wr.h diff --git a/MAINTAINERS b/MAINTAINERS index 830bec779d47..b08c537018de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -298,6 +298,14 @@ L: info-linux@geode.amd.com W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html S: Supported +AMSO1100 RNIC DRIVER +P: Tom Tucker +M: tom@opengridcomputing.com +P: Steve Wise +M: swise@opengridcomputing.com +L: openib-general@openib.org +S: Maintained + AOA (Apple Onboard Audio) ALSA DRIVER P: Johannes Berg M: johannes@sipsolutions.net diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 9a329b2c108c..9edfacee7d84 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -37,6 +37,7 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ipath/Kconfig" source "drivers/infiniband/hw/ehca/Kconfig" +source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 08cff32d900e..2b5d1098ef45 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_INFINIBAND) += core/ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ +obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild new file mode 100644 index 000000000000..06964c4af849 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -0,0 +1,8 @@ +ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG +EXTRA_CFLAGS += -DDEBUG +endif + +obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o + +iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \ + c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig new file mode 100644 index 000000000000..809cb14ac6de --- /dev/null +++ b/drivers/infiniband/hw/amso1100/Kconfig @@ -0,0 +1,15 @@ +config INFINIBAND_AMSO1100 + tristate "Ammasso 1100 HCA support" + depends on PCI && INET && INFINIBAND + ---help--- + This is a low-level driver for the Ammasso 1100 host + channel adapter (HCA). + +config INFINIBAND_AMSO1100_DEBUG + bool "Verbose debugging output" + depends on INFINIBAND_AMSO1100 + default n + ---help--- + This option causes the amso1100 driver to produce a bunch of + debug messages. Select this if you are developing the driver + or trying to diagnose a problem. diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c new file mode 100644 index 000000000000..9e9120f36019 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -0,0 +1,1255 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "c2.h" +#include "c2_provider.h" + +MODULE_AUTHOR("Tom Tucker "); +MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK + | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; + +static int debug = -1; /* defaults above */ +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static int c2_up(struct net_device *netdev); +static int c2_down(struct net_device *netdev); +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +static void c2_tx_interrupt(struct net_device *netdev); +static void c2_rx_interrupt(struct net_device *netdev); +static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static void c2_tx_timeout(struct net_device *netdev); +static int c2_change_mtu(struct net_device *netdev, int new_mtu); +static void c2_reset(struct c2_port *c2_port); +static struct net_device_stats *c2_get_stats(struct net_device *netdev); + +static struct pci_device_id c2_pci_table[] = { + { PCI_DEVICE(0x18b8, 0xb001) }, + { 0 } +}; + +MODULE_DEVICE_TABLE(pci, c2_pci_table); + +static void c2_print_macaddr(struct net_device *netdev) +{ + pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, " + "IRQ %u\n", netdev->name, + netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], + netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5], + netdev->irq); +} + +static void c2_set_rxbufsize(struct c2_port *c2_port) +{ + struct net_device *netdev = c2_port->netdev; + + if (netdev->mtu > RX_BUF_SIZE) + c2_port->rx_buf_size = + netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) + + NET_IP_ALIGN; + else + c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE; +} + +/* + * Allocate TX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_txp_ring) +{ + struct c2_tx_desc *tx_desc; + struct c2_txp_desc __iomem *txp_desc; + struct c2_element *elem; + int i; + + tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL); + if (!tx_ring->start) + return -ENOMEM; + + elem = tx_ring->start; + tx_desc = vaddr; + txp_desc = mmio_txp_ring; + for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) { + tx_desc->len = 0; + tx_desc->status = 0; + + /* Set TXP_HTXD_UNINIT */ + __raw_writeq(cpu_to_be64(0x1122334455667788ULL), + (void __iomem *) txp_desc + C2_TXP_ADDR); + __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT), + (void __iomem *) txp_desc + C2_TXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = tx_desc; + elem->hw_desc = txp_desc; + + if (i == tx_ring->count - 1) { + elem->next = tx_ring->start; + tx_desc->next_offset = base; + } else { + elem->next = elem + 1; + tx_desc->next_offset = + base + (i + 1) * sizeof(*tx_desc); + } + } + + tx_ring->to_use = tx_ring->to_clean = tx_ring->start; + + return 0; +} + +/* + * Allocate RX ring elements and chain them together. + * One-to-one association of adapter descriptors with ring elements. + */ +static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr, + dma_addr_t base, void __iomem * mmio_rxp_ring) +{ + struct c2_rx_desc *rx_desc; + struct c2_rxp_desc __iomem *rxp_desc; + struct c2_element *elem; + int i; + + rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL); + if (!rx_ring->start) + return -ENOMEM; + + elem = rx_ring->start; + rx_desc = vaddr; + rxp_desc = mmio_rxp_ring; + for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) { + rx_desc->len = 0; + rx_desc->status = 0; + + /* Set RXP_HRXD_UNINIT */ + __raw_writew(cpu_to_be16(RXP_HRXD_OK), + (void __iomem *) rxp_desc + C2_RXP_STATUS); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT); + __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL), + (void __iomem *) rxp_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT), + (void __iomem *) rxp_desc + C2_RXP_FLAGS); + + elem->skb = NULL; + elem->ht_desc = rx_desc; + elem->hw_desc = rxp_desc; + + if (i == rx_ring->count - 1) { + elem->next = rx_ring->start; + rx_desc->next_offset = base; + } else { + elem->next = elem + 1; + rx_desc->next_offset = + base + (i + 1) * sizeof(*rx_desc); + } + } + + rx_ring->to_use = rx_ring->to_clean = rx_ring->start; + + return 0; +} + +/* Setup buffer for receiving */ +static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; + struct c2_rxp_hdr *rxp_hdr; + + skb = dev_alloc_skb(c2_port->rx_buf_size); + if (unlikely(!skb)) { + pr_debug("%s: out of memory for receive\n", + c2_port->netdev->name); + return -ENOMEM; + } + + /* Zero out the rxp hdr in the sk_buff */ + memset(skb->data, 0, sizeof(*rxp_hdr)); + + skb->dev = c2_port->netdev; + + maplen = c2_port->rx_buf_size; + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, + PCI_DMA_FROMDEVICE); + + /* Set the sk_buff RXP_header to RXP_HRXD_READY */ + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + rxp_hdr->flags = RXP_HRXD_READY; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS); + + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + rx_desc->len = maplen; + + return 0; +} + +/* + * Allocate buffers for the Rx ring + * For receive: rx_ring.to_clean is next received frame + */ +static int c2_rx_fill(struct c2_port *c2_port) +{ + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + int ret = 0; + + elem = rx_ring->start; + do { + if (c2_rx_alloc(c2_port, elem)) { + ret = 1; + break; + } + } while ((elem = elem->next) != rx_ring->start); + + rx_ring->to_clean = rx_ring->start; + return ret; +} + +/* Free all buffers in RX ring, assumes receiver stopped */ +static void c2_rx_clean(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + + elem = rx_ring->start; + do { + rx_desc = elem->ht_desc; + rx_desc->len = 0; + + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew(0, elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL), + elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT), + elem->hw_desc + C2_RXP_FLAGS); + + if (elem->skb) { + pci_unmap_single(c2dev->pcidev, elem->mapaddr, + elem->maplen, PCI_DMA_FROMDEVICE); + dev_kfree_skb(elem->skb); + elem->skb = NULL; + } + } while ((elem = elem->next) != rx_ring->start); +} + +static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem) +{ + struct c2_tx_desc *tx_desc = elem->ht_desc; + + tx_desc->len = 0; + + pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen, + PCI_DMA_TODEVICE); + + if (elem->skb) { + dev_kfree_skb_any(elem->skb); + elem->skb = NULL; + } + + return 0; +} + +/* Free all buffers in TX ring, assumes transmitter stopped */ +static void c2_tx_clean(struct c2_port *c2_port) +{ + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + int retry; + unsigned long flags; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + elem = tx_ring->start; + + do { + retry = 0; + do { + txp_htxd.flags = + readw(elem->hw_desc + C2_TXP_FLAGS); + + if (txp_htxd.flags == TXP_HTXD_READY) { + retry = 1; + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq(0, + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(TXP_HTXD_DONE), + elem->hw_desc + C2_TXP_FLAGS); + c2_port->netstats.tx_dropped++; + break; + } else { + __raw_writew(0, + elem->hw_desc + C2_TXP_LEN); + __raw_writeq(cpu_to_be64(0x1122334455667788ULL), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT), + elem->hw_desc + C2_TXP_FLAGS); + } + + c2_tx_free(c2_port->c2dev, elem); + + } while ((elem = elem->next) != tx_ring->start); + } while (retry); + + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start; + + if (c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(c2_port->netdev); + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); +} + +/* + * Process transmit descriptors marked 'DONE' by the firmware, + * freeing up their unneeded sk_buffs. + */ +static void c2_tx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + struct c2_txp_desc txp_htxd; + + spin_lock(&c2_port->tx_lock); + + for (elem = tx_ring->to_clean; elem != tx_ring->to_use; + elem = elem->next) { + txp_htxd.flags = + be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS)); + + if (txp_htxd.flags != TXP_HTXD_DONE) + break; + + if (netif_msg_tx_done(c2_port)) { + /* PCI reads are expensive in fast path */ + txp_htxd.len = + be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN)); + pr_debug("%s: tx done slot %3Zu status 0x%x len " + "%5u bytes\n", + netdev->name, elem - tx_ring->start, + txp_htxd.flags, txp_htxd.len); + } + + c2_tx_free(c2dev, elem); + ++(c2_port->tx_avail); + } + + tx_ring->to_clean = elem; + + if (netif_queue_stopped(netdev) + && c2_port->tx_avail > MAX_SKB_FRAGS + 1) + netif_wake_queue(netdev); + + spin_unlock(&c2_port->tx_lock); +} + +static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) +{ + struct c2_rx_desc *rx_desc = elem->ht_desc; + struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + + if (rxp_hdr->status != RXP_HRXD_OK || + rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) { + pr_debug("BAD RXP_HRXD\n"); + pr_debug(" rx_desc : %p\n", rx_desc); + pr_debug(" index : %Zu\n", + elem - c2_port->rx_ring.start); + pr_debug(" len : %u\n", rx_desc->len); + pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr, + (void *) __pa((unsigned long) rxp_hdr)); + pr_debug(" flags : 0x%x\n", rxp_hdr->flags); + pr_debug(" status: 0x%x\n", rxp_hdr->status); + pr_debug(" len : %u\n", rxp_hdr->len); + pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd); + } + + /* Setup the skb for reuse since we're dropping this pkt */ + elem->skb->tail = elem->skb->data = elem->skb->head; + + /* Zero out the rxp hdr in the sk_buff */ + memset(elem->skb->data, 0, sizeof(*rxp_hdr)); + + /* Write the descriptor to the adapter's rx ring */ + __raw_writew(0, elem->hw_desc + C2_RXP_STATUS); + __raw_writew(0, elem->hw_desc + C2_RXP_COUNT); + __raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)), + elem->hw_desc + C2_RXP_LEN); + __raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR); + __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS); + + pr_debug("packet dropped\n"); + c2_port->netstats.rx_dropped++; +} + +static void c2_rx_interrupt(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *rx_ring = &c2_port->rx_ring; + struct c2_element *elem; + struct c2_rx_desc *rx_desc; + struct c2_rxp_hdr *rxp_hdr; + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen, buflen; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + + /* Begin where we left off */ + rx_ring->to_clean = rx_ring->start + c2dev->cur_rx; + + for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean; + elem = elem->next) { + rx_desc = elem->ht_desc; + mapaddr = elem->mapaddr; + maplen = elem->maplen; + skb = elem->skb; + rxp_hdr = (struct c2_rxp_hdr *) skb->data; + + if (rxp_hdr->flags != RXP_HRXD_DONE) + break; + buflen = rxp_hdr->len; + + /* Sanity check the RXP header */ + if (rxp_hdr->status != RXP_HRXD_OK || + buflen > (rx_desc->len - sizeof(*rxp_hdr))) { + c2_rx_error(c2_port, elem); + continue; + } + + /* + * Allocate and map a new skb for replenishing the host + * RX desc + */ + if (c2_rx_alloc(c2_port, elem)) { + c2_rx_error(c2_port, elem); + continue; + } + + /* Unmap the old skb */ + pci_unmap_single(c2dev->pcidev, mapaddr, maplen, + PCI_DMA_FROMDEVICE); + + prefetch(skb->data); + + /* + * Skip past the leading 8 bytes comprising of the + * "struct c2_rxp_hdr", prepended by the adapter + * to the usual Ethernet header ("struct ethhdr"), + * to the start of the raw Ethernet packet. + * + * Fix up the various fields in the sk_buff before + * passing it up to netif_rx(). The transfer size + * (in bytes) specified by the adapter len field of + * the "struct rxp_hdr_t" does NOT include the + * "sizeof(struct c2_rxp_hdr)". + */ + skb->data += sizeof(*rxp_hdr); + skb->tail = skb->data + buflen; + skb->len = buflen; + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, netdev); + + netif_rx(skb); + + netdev->last_rx = jiffies; + c2_port->netstats.rx_packets++; + c2_port->netstats.rx_bytes += buflen; + } + + /* Save where we left off */ + rx_ring->to_clean = elem; + c2dev->cur_rx = elem - rx_ring->start; + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + spin_unlock_irqrestore(&c2dev->lock, flags); +} + +/* + * Handle netisr0 TX & RX interrupts. + */ +static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned int netisr0, dmaisr; + int handled = 0; + struct c2_dev *c2dev = (struct c2_dev *) dev_id; + + /* Process CCILNET interrupts */ + netisr0 = readl(c2dev->regs + C2_NISR0); + if (netisr0) { + + /* + * There is an issue with the firmware that always + * provides the status of RX for both TX & RX + * interrupts. So process both queues here. + */ + c2_rx_interrupt(c2dev->netdev); + c2_tx_interrupt(c2dev->netdev); + + /* Clear the interrupt */ + writel(netisr0, c2dev->regs + C2_NISR0); + handled++; + } + + /* Process RNIC interrupts */ + dmaisr = readl(c2dev->regs + C2_DISR); + if (dmaisr) { + writel(dmaisr, c2dev->regs + C2_DISR); + c2_rnic_interrupt(c2dev); + handled++; + } + + if (handled) { + return IRQ_HANDLED; + } else { + return IRQ_NONE; + } +} + +static int c2_up(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_element *elem; + struct c2_rxp_hdr *rxp_hdr; + struct in_device *in_dev; + size_t rx_size, tx_size; + int ret, i; + unsigned int netimr0; + + if (netif_msg_ifup(c2_port)) + pr_debug("%s: enabling interface\n", netdev->name); + + /* Set the Rx buffer size based on MTU */ + c2_set_rxbufsize(c2_port); + + /* Allocate DMA'able memory for Tx/Rx host descriptor rings */ + rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc); + tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc); + + c2_port->mem_size = tx_size + rx_size; + c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size, + &c2_port->dma); + if (c2_port->mem == NULL) { + pr_debug("Unable to allocate memory for " + "host descriptor rings\n"); + return -ENOMEM; + } + + memset(c2_port->mem, 0, c2_port->mem_size); + + /* Create the Rx host descriptor ring */ + if ((ret = + c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma, + c2dev->mmio_rxp_ring))) { + pr_debug("Unable to create RX ring\n"); + goto bail0; + } + + /* Allocate Rx buffers for the host descriptor ring */ + if (c2_rx_fill(c2_port)) { + pr_debug("Unable to fill RX ring\n"); + goto bail1; + } + + /* Create the Tx host descriptor ring */ + if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size, + c2_port->dma + rx_size, + c2dev->mmio_txp_ring))) { + pr_debug("Unable to create TX ring\n"); + goto bail1; + } + + /* Set the TX pointer to where we left off */ + c2_port->tx_avail = c2_port->tx_ring.count - 1; + c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean = + c2_port->tx_ring.start + c2dev->cur_tx; + + /* missing: Initialize MAC */ + + BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean); + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */ + for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count; + i++, elem++) { + rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data; + rxp_hdr->flags = 0; + __raw_writew(cpu_to_be16(RXP_HRXD_READY), + elem->hw_desc + C2_RXP_FLAGS); + } + + /* Enable network packets */ + netif_start_queue(netdev); + + /* Enable IRQ */ + writel(0, c2dev->regs + C2_IDIS); + netimr0 = readl(c2dev->regs + C2_NIMR0); + netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT); + writel(netimr0, c2dev->regs + C2_NIMR0); + + /* Tell the stack to ignore arp requests for ipaddrs bound to + * other interfaces. This is needed to prevent the host stack + * from responding to arp requests to the ipaddr bound on the + * rdma interface. + */ + in_dev = in_dev_get(netdev); + in_dev->cnf.arp_ignore = 1; + in_dev_put(in_dev); + + return 0; + + bail1: + c2_rx_clean(c2_port); + kfree(c2_port->rx_ring.start); + + bail0: + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return ret; +} + +static int c2_down(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + + if (netif_msg_ifdown(c2_port)) + pr_debug("%s: disabling interface\n", + netdev->name); + + /* Wait for all the queued packets to get sent */ + c2_tx_interrupt(netdev); + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Disable IRQs by clearing the interrupt mask */ + writel(1, c2dev->regs + C2_IDIS); + writel(0, c2dev->regs + C2_NIMR0); + + /* missing: Stop transmitter */ + + /* missing: Stop receiver */ + + /* Reset the adapter, ensures the driver is in sync with the RXP */ + c2_reset(c2_port); + + /* missing: Turn off LEDs here */ + + /* Free all buffers in the host descriptor rings */ + c2_tx_clean(c2_port); + c2_rx_clean(c2_port); + + /* Free the host descriptor rings */ + kfree(c2_port->rx_ring.start); + kfree(c2_port->tx_ring.start); + pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem, + c2_port->dma); + + return 0; +} + +static void c2_reset(struct c2_port *c2_port) +{ + struct c2_dev *c2dev = c2_port->c2dev; + unsigned int cur_rx = c2dev->cur_rx; + + /* Tell the hardware to quiesce */ + C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI); + + /* + * The hardware will reset the C2_PCI_HRX_QUI bit once + * the RXP is quiesced. Wait 2 seconds for this. + */ + ssleep(2); + + cur_rx = C2_GET_CUR_RX(c2dev); + + if (cur_rx & C2_PCI_HRX_QUI) + pr_debug("c2_reset: failed to quiesce the hardware!\n"); + + cur_rx &= ~C2_PCI_HRX_QUI; + + c2dev->cur_rx = cur_rx; + + pr_debug("Current RX: %u\n", c2dev->cur_rx); +} + +static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + struct c2_dev *c2dev = c2_port->c2dev; + struct c2_ring *tx_ring = &c2_port->tx_ring; + struct c2_element *elem; + dma_addr_t mapaddr; + u32 maplen; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&c2_port->tx_lock, flags); + + if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) { + netif_stop_queue(netdev); + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + pr_debug("%s: Tx ring full when queue awake!\n", + netdev->name); + return NETDEV_TX_BUSY; + } + + maplen = skb_headlen(skb); + mapaddr = + pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE); + + elem = tx_ring->to_use; + elem->skb = skb; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); + + c2_port->netstats.tx_packets++; + c2_port->netstats.tx_bytes += maplen; + + /* Loop thru additional data fragments and queue them */ + if (skb_shinfo(skb)->nr_frags) { + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + maplen = frag->size; + mapaddr = + pci_map_page(c2dev->pcidev, frag->page, + frag->page_offset, maplen, + PCI_DMA_TODEVICE); + + elem = elem->next; + elem->skb = NULL; + elem->mapaddr = mapaddr; + elem->maplen = maplen; + + /* Tell HW to xmit */ + __raw_writeq(cpu_to_be64(mapaddr), + elem->hw_desc + C2_TXP_ADDR); + __raw_writew(cpu_to_be16(maplen), + elem->hw_desc + C2_TXP_LEN); + __raw_writew(cpu_to_be16(TXP_HTXD_READY), + elem->hw_desc + C2_TXP_FLAGS); + + c2_port->netstats.tx_packets++; + c2_port->netstats.tx_bytes += maplen; + } + } + + tx_ring->to_use = elem->next; + c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1); + + if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) { + netif_stop_queue(netdev); + if (netif_msg_tx_queued(c2_port)) + pr_debug("%s: transmit queue full\n", + netdev->name); + } + + spin_unlock_irqrestore(&c2_port->tx_lock, flags); + + netdev->trans_start = jiffies; + + return NETDEV_TX_OK; +} + +static struct net_device_stats *c2_get_stats(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + + return &c2_port->netstats; +} + +static void c2_tx_timeout(struct net_device *netdev) +{ + struct c2_port *c2_port = netdev_priv(netdev); + + if (netif_msg_timer(c2_port)) + pr_debug("%s: tx timeout\n", netdev->name); + + c2_tx_clean(c2_port); +} + +static int c2_change_mtu(struct net_device *netdev, int new_mtu) +{ + int ret = 0; + + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + if (netif_running(netdev)) { + c2_down(netdev); + + c2_up(netdev); + } + + return ret; +} + +/* Initialize network device */ +static struct net_device *c2_devinit(struct c2_dev *c2dev, + void __iomem * mmio_addr) +{ + struct c2_port *c2_port = NULL; + struct net_device *netdev = alloc_etherdev(sizeof(*c2_port)); + + if (!netdev) { + pr_debug("c2_port etherdev alloc failed"); + return NULL; + } + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + netdev->open = c2_up; + netdev->stop = c2_down; + netdev->hard_start_xmit = c2_xmit_frame; + netdev->get_stats = c2_get_stats; + netdev->tx_timeout = c2_tx_timeout; + netdev->change_mtu = c2_change_mtu; + netdev->watchdog_timeo = C2_TX_TIMEOUT; + netdev->irq = c2dev->pcidev->irq; + + c2_port = netdev_priv(netdev); + c2_port->netdev = netdev; + c2_port->c2dev = c2dev; + c2_port->msg_enable = netif_msg_init(debug, default_msg); + c2_port->tx_ring.count = C2_NUM_TX_DESC; + c2_port->rx_ring.count = C2_NUM_RX_DESC; + + spin_lock_init(&c2_port->tx_lock); + + /* Copy our 48-bit ethernet hardware address */ + memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6); + + /* Validate the MAC address */ + if (!is_valid_ether_addr(netdev->dev_addr)) { + pr_debug("Invalid MAC Address\n"); + c2_print_macaddr(netdev); + free_netdev(netdev); + return NULL; + } + + c2dev->netdev = netdev; + + return netdev; +} + +static int __devinit c2_probe(struct pci_dev *pcidev, + const struct pci_device_id *ent) +{ + int ret = 0, i; + unsigned long reg0_start, reg0_flags, reg0_len; + unsigned long reg2_start, reg2_flags, reg2_len; + unsigned long reg4_start, reg4_flags, reg4_len; + unsigned kva_map_size; + struct net_device *netdev = NULL; + struct c2_dev *c2dev = NULL; + void __iomem *mmio_regs = NULL; + + printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n", + DRV_VERSION); + + /* Enable PCI device */ + ret = pci_enable_device(pcidev); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to enable PCI device\n", + pci_name(pcidev)); + goto bail0; + } + + reg0_start = pci_resource_start(pcidev, BAR_0); + reg0_len = pci_resource_len(pcidev, BAR_0); + reg0_flags = pci_resource_flags(pcidev, BAR_0); + + reg2_start = pci_resource_start(pcidev, BAR_2); + reg2_len = pci_resource_len(pcidev, BAR_2); + reg2_flags = pci_resource_flags(pcidev, BAR_2); + + reg4_start = pci_resource_start(pcidev, BAR_4); + reg4_len = pci_resource_len(pcidev, BAR_4); + reg4_flags = pci_resource_flags(pcidev, BAR_4); + + pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len); + pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len); + pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len); + + /* Make sure PCI base addr are MMIO */ + if (!(reg0_flags & IORESOURCE_MEM) || + !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "PCI regions not an MMIO resource\n"); + ret = -ENODEV; + goto bail1; + } + + /* Check for weird/broken PCI region reporting */ + if ((reg0_len < C2_REG0_SIZE) || + (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) { + printk(KERN_ERR PFX "Invalid PCI region sizes\n"); + ret = -ENODEV; + goto bail1; + } + + /* Reserve PCI I/O and memory resources */ + ret = pci_request_regions(pcidev, DRV_NAME); + if (ret) { + printk(KERN_ERR PFX "%s: Unable to request regions\n", + pci_name(pcidev)); + goto bail1; + } + + if ((sizeof(dma_addr_t) > 4)) { + ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "64b DMA configuration failed\n"); + goto bail2; + } + } else { + ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK); + if (ret < 0) { + printk(KERN_ERR PFX "32b DMA configuration failed\n"); + goto bail2; + } + } + + /* Enables bus-mastering on the device */ + pci_set_master(pcidev); + + /* Remap the adapter PCI registers in BAR4 */ + mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + sizeof(struct c2_adapter_pci_regs)); + if (mmio_regs == 0UL) { + printk(KERN_ERR PFX + "Unable to remap adapter PCI registers in BAR4\n"); + ret = -EIO; + goto bail2; + } + + /* Validate PCI regs magic */ + for (i = 0; i < sizeof(c2_magic); i++) { + if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) { + printk(KERN_ERR PFX "Downlevel Firmware boot loader " + "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash " + "utility to update your boot loader\n", + i + 1, sizeof(c2_magic), + readb(mmio_regs + C2_REGS_MAGIC + i), + c2_magic[i]); + printk(KERN_ERR PFX "Adapter not claimed\n"); + iounmap(mmio_regs); + ret = -EIO; + goto bail2; + } + } + + /* Validate the adapter version */ + if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) { + printk(KERN_ERR PFX "Version mismatch " + "[fw=%u, c2=%u], Adapter not claimed\n", + be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)), + C2_VERSION); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Validate the adapter IVN */ + if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) { + printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using " + "the OpenIB device support kit. " + "[fw=0x%x, c2=0x%x], Adapter not claimed\n", + be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)), + C2_IVN); + ret = -EINVAL; + iounmap(mmio_regs); + goto bail2; + } + + /* Allocate hardware structure */ + c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev)); + if (!c2dev) { + printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", + pci_name(pcidev)); + ret = -ENOMEM; + iounmap(mmio_regs); + goto bail2; + } + + memset(c2dev, 0, sizeof(*c2dev)); + spin_lock_init(&c2dev->lock); + c2dev->pcidev = pcidev; + c2dev->cur_tx = 0; + + /* Get the last RX index */ + c2dev->cur_rx = + (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) - + 0xffffc000) / sizeof(struct c2_rxp_desc); + + /* Request an interrupt line for the driver */ + ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev); + if (ret) { + printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", + pci_name(pcidev), pcidev->irq); + iounmap(mmio_regs); + goto bail3; + } + + /* Set driver specific data */ + pci_set_drvdata(pcidev, c2dev); + + /* Initialize network device */ + if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { + iounmap(mmio_regs); + goto bail4; + } + + /* Save off the actual size prior to unmapping mmio_regs */ + kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE)); + + /* Unmap the adapter PCI registers in BAR4 */ + iounmap(mmio_regs); + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", + ret); + goto bail5; + } + + /* Disable network packets */ + netif_stop_queue(netdev); + + /* Remap the adapter HRXDQ PA space to kernel VA space */ + c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET, + C2_RXP_HRXDQ_SIZE); + if (c2dev->mmio_rxp_ring == 0UL) { + printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n"); + ret = -EIO; + goto bail6; + } + + /* Remap the adapter HTXDQ PA space to kernel VA space */ + c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET, + C2_TXP_HTXDQ_SIZE); + if (c2dev->mmio_txp_ring == 0UL) { + printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n"); + ret = -EIO; + goto bail7; + } + + /* Save off the current RX index in the last 4 bytes of the TXP Ring */ + C2_SET_CUR_RX(c2dev, c2dev->cur_rx); + + /* Remap the PCI registers in adapter BAR0 to kernel VA space */ + c2dev->regs = ioremap_nocache(reg0_start, reg0_len); + if (c2dev->regs == 0UL) { + printk(KERN_ERR PFX "Unable to remap BAR0\n"); + ret = -EIO; + goto bail8; + } + + /* Remap the PCI registers in adapter BAR4 to kernel VA space */ + c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET; + c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, + kva_map_size); + if (c2dev->kva == 0UL) { + printk(KERN_ERR PFX "Unable to remap BAR4\n"); + ret = -EIO; + goto bail9; + } + + /* Print out the MAC address */ + c2_print_macaddr(netdev); + + ret = c2_rnic_init(c2dev); + if (ret) { + printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret); + goto bail10; + } + + c2_register_device(c2dev); + + return 0; + + bail10: + iounmap(c2dev->kva); + + bail9: + iounmap(c2dev->regs); + + bail8: + iounmap(c2dev->mmio_txp_ring); + + bail7: + iounmap(c2dev->mmio_rxp_ring); + + bail6: + unregister_netdev(netdev); + + bail5: + free_netdev(netdev); + + bail4: + free_irq(pcidev->irq, c2dev); + + bail3: + ib_dealloc_device(&c2dev->ibdev); + + bail2: + pci_release_regions(pcidev); + + bail1: + pci_disable_device(pcidev); + + bail0: + return ret; +} + +static void __devexit c2_remove(struct pci_dev *pcidev) +{ + struct c2_dev *c2dev = pci_get_drvdata(pcidev); + struct net_device *netdev = c2dev->netdev; + + /* Unregister with OpenIB */ + c2_unregister_device(c2dev); + + /* Clean up the RNIC resources */ + c2_rnic_term(c2dev); + + /* Remove network device from the kernel */ + unregister_netdev(netdev); + + /* Free network device */ + free_netdev(netdev); + + /* Free the interrupt line */ + free_irq(pcidev->irq, c2dev); + + /* missing: Turn LEDs off here */ + + /* Unmap adapter PA space */ + iounmap(c2dev->kva); + iounmap(c2dev->regs); + iounmap(c2dev->mmio_txp_ring); + iounmap(c2dev->mmio_rxp_ring); + + /* Free the hardware structure */ + ib_dealloc_device(&c2dev->ibdev); + + /* Release reserved PCI I/O and memory resources */ + pci_release_regions(pcidev); + + /* Disable PCI device */ + pci_disable_device(pcidev); + + /* Clear driver specific data */ + pci_set_drvdata(pcidev, NULL); +} + +static struct pci_driver c2_pci_driver = { + .name = DRV_NAME, + .id_table = c2_pci_table, + .probe = c2_probe, + .remove = __devexit_p(c2_remove), +}; + +static int __init c2_init_module(void) +{ + return pci_module_init(&c2_pci_driver); +} + +static void __exit c2_exit_module(void) +{ + pci_unregister_driver(&c2_pci_driver); +} + +module_init(c2_init_module); +module_exit(c2_exit_module); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h new file mode 100644 index 000000000000..1b17dcdd0505 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __C2_H +#define __C2_H + +#include +#include +#include +#include +#include +#include +#include + +#include "c2_provider.h" +#include "c2_mq.h" +#include "c2_status.h" + +#define DRV_NAME "c2" +#define DRV_VERSION "1.1" +#define PFX DRV_NAME ": " + +#define BAR_0 0 +#define BAR_2 2 +#define BAR_4 4 + +#define RX_BUF_SIZE (1536 + 8) +#define ETH_JUMBO_MTU 9000 +#define C2_MAGIC "CEPHEUS" +#define C2_VERSION 4 +#define C2_IVN (18 & 0x7fffffff) + +#define C2_REG0_SIZE (16 * 1024) +#define C2_REG2_SIZE (2 * 1024 * 1024) +#define C2_REG4_SIZE (256 * 1024 * 1024) +#define C2_NUM_TX_DESC 341 +#define C2_NUM_RX_DESC 256 +#define C2_PCI_REGS_OFFSET (0x10000) +#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2)) +#define C2_RXP_HRXDQ_SIZE (4096) +#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE) +#define C2_TXP_HTXDQ_SIZE (4096) +#define C2_TX_TIMEOUT (6*HZ) + +/* CEPHEUS */ +static const u8 c2_magic[] = { + 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53 +}; + +enum adapter_pci_regs { + C2_REGS_MAGIC = 0x0000, + C2_REGS_VERS = 0x0008, + C2_REGS_IVN = 0x000C, + C2_REGS_PCI_WINSIZE = 0x0010, + C2_REGS_Q0_QSIZE = 0x0014, + C2_REGS_Q0_MSGSIZE = 0x0018, + C2_REGS_Q0_POOLSTART = 0x001C, + C2_REGS_Q0_SHARED = 0x0020, + C2_REGS_Q1_QSIZE = 0x0024, + C2_REGS_Q1_MSGSIZE = 0x0028, + C2_REGS_Q1_SHARED = 0x0030, + C2_REGS_Q2_QSIZE = 0x0034, + C2_REGS_Q2_MSGSIZE = 0x0038, + C2_REGS_Q2_SHARED = 0x0040, + C2_REGS_ENADDR = 0x004C, + C2_REGS_RDMA_ENADDR = 0x0054, + C2_REGS_HRX_CUR = 0x006C, +}; + +struct c2_adapter_pci_regs { + char reg_magic[8]; + u32 version; + u32 ivn; + u32 pci_window_size; + u32 q0_q_size; + u32 q0_msg_size; + u32 q0_pool_start; + u32 q0_shared; + u32 q1_q_size; + u32 q1_msg_size; + u32 q1_pool_start; + u32 q1_shared; + u32 q2_q_size; + u32 q2_msg_size; + u32 q2_pool_start; + u32 q2_shared; + u32 log_start; + u32 log_size; + u8 host_enaddr[8]; + u8 rdma_enaddr[8]; + u32 crash_entry; + u32 crash_ready[2]; + u32 fw_txd_cur; + u32 fw_hrxd_cur; + u32 fw_rxd_cur; +}; + +enum pci_regs { + C2_HISR = 0x0000, + C2_DISR = 0x0004, + C2_HIMR = 0x0008, + C2_DIMR = 0x000C, + C2_NISR0 = 0x0010, + C2_NISR1 = 0x0014, + C2_NIMR0 = 0x0018, + C2_NIMR1 = 0x001C, + C2_IDIS = 0x0020, +}; + +enum { + C2_PCI_HRX_INT = 1 << 8, + C2_PCI_HTX_INT = 1 << 17, + C2_PCI_HRX_QUI = 1 << 31, +}; + +/* + * Cepheus registers in BAR0. + */ +struct c2_pci_regs { + u32 hostisr; + u32 dmaisr; + u32 hostimr; + u32 dmaimr; + u32 netisr0; + u32 netisr1; + u32 netimr0; + u32 netimr1; + u32 int_disable; +}; + +/* TXP flags */ +enum c2_txp_flags { + TXP_HTXD_DONE = 0, + TXP_HTXD_READY = 1 << 0, + TXP_HTXD_UNINIT = 1 << 1, +}; + +/* RXP flags */ +enum c2_rxp_flags { + RXP_HRXD_UNINIT = 0, + RXP_HRXD_READY = 1 << 0, + RXP_HRXD_DONE = 1 << 1, +}; + +/* RXP status */ +enum c2_rxp_status { + RXP_HRXD_ZERO = 0, + RXP_HRXD_OK = 1 << 0, + RXP_HRXD_BUF_OV = 1 << 1, +}; + +/* TXP descriptor fields */ +enum txp_desc { + C2_TXP_FLAGS = 0x0000, + C2_TXP_LEN = 0x0002, + C2_TXP_ADDR = 0x0004, +}; + +/* RXP descriptor fields */ +enum rxp_desc { + C2_RXP_FLAGS = 0x0000, + C2_RXP_STATUS = 0x0002, + C2_RXP_COUNT = 0x0004, + C2_RXP_LEN = 0x0006, + C2_RXP_ADDR = 0x0008, +}; + +struct c2_txp_desc { + u16 flags; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_desc { + u16 flags; + u16 status; + u16 count; + u16 len; + u64 addr; +} __attribute__ ((packed)); + +struct c2_rxp_hdr { + u16 flags; + u16 status; + u16 len; + u16 rsvd; +} __attribute__ ((packed)); + +struct c2_tx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_rx_desc { + u32 len; + u32 status; + dma_addr_t next_offset; +}; + +struct c2_alloc { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_array { + struct { + void **page; + int used; + } *page_list; +}; + +/* + * The MQ shared pointer pool is organized as a linked list of + * chunks. Each chunk contains a linked list of free shared pointers + * that can be allocated to a given user mode client. + * + */ +struct sp_chunk { + struct sp_chunk *next; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + u16 head; + u16 shared_ptr[0]; +}; + +struct c2_pd_table { + u32 last; + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c2_qp_table { + struct idr idr; + spinlock_t lock; + int last; +}; + +struct c2_element { + struct c2_element *next; + void *ht_desc; /* host descriptor */ + void __iomem *hw_desc; /* hardware descriptor */ + struct sk_buff *skb; + dma_addr_t mapaddr; + u32 maplen; +}; + +struct c2_ring { + struct c2_element *to_clean; + struct c2_element *to_use; + struct c2_element *start; + unsigned long count; +}; + +struct c2_dev { + struct ib_device ibdev; + void __iomem *regs; + void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */ + void __iomem *mmio_rxp_ring; + spinlock_t lock; + struct pci_dev *pcidev; + struct net_device *netdev; + struct net_device *pseudo_netdev; + unsigned int cur_tx; + unsigned int cur_rx; + u32 adapter_handle; + int device_cap_flags; + void __iomem *kva; /* KVA device memory */ + unsigned long pa; /* PA device memory */ + void **qptr_array; + + kmem_cache_t *host_msg_cache; + + struct list_head cca_link; /* adapter list */ + struct list_head eh_wakeup_list; /* event wakeup list */ + wait_queue_head_t req_vq_wo; + + /* Cached RNIC properties */ + struct ib_device_attr props; + + struct c2_pd_table pd_table; + struct c2_qp_table qp_table; + int ports; /* num of GigE ports */ + int devnum; + spinlock_t vqlock; /* sync vbs req MQ */ + + /* Verbs Queues */ + struct c2_mq req_vq; /* Verbs Request MQ */ + struct c2_mq rep_vq; /* Verbs Reply MQ */ + struct c2_mq aeq; /* Async Events MQ */ + + /* Kernel client MQs */ + struct sp_chunk *kern_mqsp_pool; + + /* Device updates these values when posting messages to a host + * target queue */ + u16 req_vq_shared; + u16 rep_vq_shared; + u16 aeq_shared; + u16 irq_claimed; + + /* + * Shared host target pages for user-accessible MQs. + */ + int hthead; /* index of first free entry */ + void *htpages; /* kernel vaddr */ + int htlen; /* length of htpages memory */ + void *htuva; /* user mapped vaddr */ + spinlock_t htlock; /* serialize allocation */ + + u64 adapter_hint_uva; /* access to the activity FIFO */ + + // spinlock_t aeq_lock; + // spinlock_t rnic_lock; + + u16 *hint_count; + dma_addr_t hint_count_dma; + u16 hints_read; + + int init; /* TRUE if it's ready */ + char ae_cache_name[16]; + char vq_cache_name[16]; +}; + +struct c2_port { + u32 msg_enable; + struct c2_dev *c2dev; + struct net_device *netdev; + + spinlock_t tx_lock; + u32 tx_avail; + struct c2_ring tx_ring; + struct c2_ring rx_ring; + + void *mem; /* PCI memory for host rings */ + dma_addr_t dma; + unsigned long mem_size; + + u32 rx_buf_size; + + struct net_device_stats netstats; +}; + +/* + * Activity FIFO registers in BAR0. + */ +#define PCI_BAR0_HOST_HINT 0x100 +#define PCI_BAR0_ADAPTER_HINT 0x2000 + +/* + * Ammasso PCI vendor id and Cepheus PCI device id. + */ +#define CQ_ARMED 0x01 +#define CQ_WAIT_FOR_DMA 0x80 + +/* + * The format of a hint is as follows: + * Lower 16 bits are the count of hints for the queue. + * Next 15 bits are the qp_index + * Upper most bit depends on who reads it: + * If read by producer, then it means Full (1) or Not-Full (0) + * If read by consumer, then it means Empty (1) or Not-Empty (0) + */ +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + + +/* + * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t + * struct. + */ +#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000 + +#ifndef readq +static inline u64 readq(const void __iomem * addr) +{ + u64 ret = readl(addr + 4); + ret <<= 32; + ret |= readl(addr); + + return ret; +} +#endif + +#ifndef writeq +static inline void __raw_writeq(u64 val, void __iomem * addr) +{ + __raw_writel((u32) (val), addr); + __raw_writel((u32) (val >> 32), (addr + 4)); +} +#endif + +#define C2_SET_CUR_RX(c2dev, cur_rx) \ + __raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092) + +#define C2_GET_CUR_RX(c2dev) \ + be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092)) + +static inline struct c2_dev *to_c2dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c2_dev, ibdev); +} + +static inline int c2_errno(void *reply) +{ + switch (c2_wr_get_result(reply)) { + case C2_OK: + return 0; + case CCERR_NO_BUFS: + case CCERR_INSUFFICIENT_RESOURCES: + case CCERR_ZERO_RDMA_READ_RESOURCES: + return -ENOMEM; + case CCERR_MR_IN_USE: + case CCERR_QP_IN_USE: + return -EBUSY; + case CCERR_ADDR_IN_USE: + return -EADDRINUSE; + case CCERR_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + case CCERR_CONN_RESET: + return -ECONNRESET; + case CCERR_NOT_IMPLEMENTED: + case CCERR_INVALID_WQE: + return -ENOSYS; + case CCERR_QP_NOT_PRIVILEGED: + return -EPERM; + case CCERR_STACK_ERROR: + return -EPROTO; + case CCERR_ACCESS_VIOLATION: + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return -EFAULT; + case CCERR_STAG_STATE_NOT_INVALID: + case CCERR_INVALID_ADDRESS: + case CCERR_INVALID_CQ: + case CCERR_INVALID_EP: + case CCERR_INVALID_MODIFIER: + case CCERR_INVALID_MTU: + case CCERR_INVALID_PD_ID: + case CCERR_INVALID_QP: + case CCERR_INVALID_RNIC: + case CCERR_INVALID_STAG: + return -EINVAL; + default: + return -EAGAIN; + } +} + +/* Device */ +extern int c2_register_device(struct c2_dev *c2dev); +extern void c2_unregister_device(struct c2_dev *c2dev); +extern int c2_rnic_init(struct c2_dev *c2dev); +extern void c2_rnic_term(struct c2_dev *c2dev); +extern void c2_rnic_interrupt(struct c2_dev *c2dev); +extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask); +extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask); + +/* QPs */ +extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp); +extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp); +extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn); +extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask); +extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird); +extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr); +extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr); +extern void __devinit c2_init_qp_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev); +extern void c2_set_qp_state(struct c2_qp *, int); +extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); + +/* PDs */ +extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); +extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); +extern int __devinit c2_init_pd_table(struct c2_dev *c2dev); +extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev); + +/* CQs */ +extern int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq); +extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); +extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); +extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); +extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); + +/* CM */ +extern int c2_llp_connect(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_accept(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *iw_param); +extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); +extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog); +extern int c2_llp_service_destroy(struct iw_cm_id *cm_id); + +/* MM */ +extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 off, u64 *va, enum c2_acf acf, + struct c2_mr *mr); +extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index); + +/* AE */ +extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index); + +/* MQSP Allocator */ +extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root); +extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root); +extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask); +extern void c2_free_mqsp(u16 * mqsp); +#endif diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c new file mode 100644 index 000000000000..08f46c83a3a4 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include +#include "c2_status.h" +#include "c2_ae.h" + +static int c2_convert_cm_status(u32 c2_status) +{ + switch (c2_status) { + case C2_CONN_STATUS_SUCCESS: + return 0; + case C2_CONN_STATUS_REJECTED: + return -ENETRESET; + case C2_CONN_STATUS_REFUSED: + return -ECONNREFUSED; + case C2_CONN_STATUS_TIMEDOUT: + return -ETIMEDOUT; + case C2_CONN_STATUS_NETUNREACH: + return -ENETUNREACH; + case C2_CONN_STATUS_HOSTUNREACH: + return -EHOSTUNREACH; + case C2_CONN_STATUS_INVALID_RNIC: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP: + return -EINVAL; + case C2_CONN_STATUS_INVALID_QP_STATE: + return -EINVAL; + case C2_CONN_STATUS_ADDR_NOT_AVAIL: + return -EADDRNOTAVAIL; + default: + printk(KERN_ERR PFX + "%s - Unable to convert CM status: %d\n", + __FUNCTION__, c2_status); + return -EIO; + } +} + +#ifdef DEBUG +static const char* to_event_str(int event) +{ + static const char* event_str[] = { + "CCAE_REMOTE_SHUTDOWN", + "CCAE_ACTIVE_CONNECT_RESULTS", + "CCAE_CONNECTION_REQUEST", + "CCAE_LLP_CLOSE_COMPLETE", + "CCAE_TERMINATE_MESSAGE_RECEIVED", + "CCAE_LLP_CONNECTION_RESET", + "CCAE_LLP_CONNECTION_LOST", + "CCAE_LLP_SEGMENT_SIZE_INVALID", + "CCAE_LLP_INVALID_CRC", + "CCAE_LLP_BAD_FPDU", + "CCAE_INVALID_DDP_VERSION", + "CCAE_INVALID_RDMA_VERSION", + "CCAE_UNEXPECTED_OPCODE", + "CCAE_INVALID_DDP_QUEUE_NUMBER", + "CCAE_RDMA_READ_NOT_ENABLED", + "CCAE_RDMA_WRITE_NOT_ENABLED", + "CCAE_RDMA_READ_TOO_SMALL", + "CCAE_NO_L_BIT", + "CCAE_TAGGED_INVALID_STAG", + "CCAE_TAGGED_BASE_BOUNDS_VIOLATION", + "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION", + "CCAE_TAGGED_INVALID_PD", + "CCAE_WRAP_ERROR", + "CCAE_BAD_CLOSE", + "CCAE_BAD_LLP_CLOSE", + "CCAE_INVALID_MSN_RANGE", + "CCAE_INVALID_MSN_GAP", + "CCAE_IRRQ_OVERFLOW", + "CCAE_IRRQ_MSN_GAP", + "CCAE_IRRQ_MSN_RANGE", + "CCAE_IRRQ_INVALID_STAG", + "CCAE_IRRQ_BASE_BOUNDS_VIOLATION", + "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION", + "CCAE_IRRQ_INVALID_PD", + "CCAE_IRRQ_WRAP_ERROR", + "CCAE_CQ_SQ_COMPLETION_OVERFLOW", + "CCAE_CQ_RQ_COMPLETION_ERROR", + "CCAE_QP_SRQ_WQE_ERROR", + "CCAE_QP_LOCAL_CATASTROPHIC_ERROR", + "CCAE_CQ_OVERFLOW", + "CCAE_CQ_OPERATION_ERROR", + "CCAE_SRQ_LIMIT_REACHED", + "CCAE_QP_RQ_LIMIT_REACHED", + "CCAE_SRQ_CATASTROPHIC_ERROR", + "CCAE_RNIC_CATASTROPHIC_ERROR" + }; + + if (event < CCAE_REMOTE_SHUTDOWN || + event > CCAE_RNIC_CATASTROPHIC_ERROR) + return ""; + + event -= CCAE_REMOTE_SHUTDOWN; + return event_str[event]; +} + +static const char *to_qp_state_str(int state) +{ + switch (state) { + case C2_QP_STATE_IDLE: + return "C2_QP_STATE_IDLE"; + case C2_QP_STATE_CONNECTING: + return "C2_QP_STATE_CONNECTING"; + case C2_QP_STATE_RTS: + return "C2_QP_STATE_RTS"; + case C2_QP_STATE_CLOSING: + return "C2_QP_STATE_CLOSING"; + case C2_QP_STATE_TERMINATE: + return "C2_QP_STATE_TERMINATE"; + case C2_QP_STATE_ERROR: + return "C2_QP_STATE_ERROR"; + default: + return ""; + }; +} +#endif + +void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_mq *mq = c2dev->qptr_array[mq_index]; + union c2wr *wr; + void *resource_user_context; + struct iw_cm_event cm_event; + struct ib_event ib_event; + enum c2_resource_indicator resource_indicator; + enum c2_event_id event_id; + unsigned long flags; + int status; + + /* + * retreive the message + */ + wr = c2_mq_consume(mq); + if (!wr) + return; + + memset(&ib_event, 0, sizeof(ib_event)); + memset(&cm_event, 0, sizeof(cm_event)); + + event_id = c2_wr_get_id(wr); + resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type); + resource_user_context = + (void *) (unsigned long) wr->ae.ae_generic.user_context; + + status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr)); + + pr_debug("event received c2_dev=%p, event_id=%d, " + "resource_indicator=%d, user_context=%p, status = %d\n", + c2dev, event_id, resource_indicator, resource_user_context, + status); + + switch (resource_indicator) { + case C2_RES_IND_QP:{ + + struct c2_qp *qp = (struct c2_qp *)resource_user_context; + struct iw_cm_id *cm_id = qp->cm_id; + struct c2wr_ae_active_connect_results *res; + + if (!cm_id) { + pr_debug("event received, but cm_id is , qp=%p!\n", + qp); + goto ignore_it; + } + pr_debug("%s: event = %s, user_context=%llx, " + "resource_type=%x, " + "resource=%x, qp_state=%s\n", + __FUNCTION__, + to_event_str(event_id), + be64_to_cpu(wr->ae.ae_generic.user_context), + be32_to_cpu(wr->ae.ae_generic.resource_type), + be32_to_cpu(wr->ae.ae_generic.resource), + to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); + + c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state)); + + switch (event_id) { + case CCAE_ACTIVE_CONNECT_RESULTS: + res = &wr->ae.ae_active_connect_results; + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.local_addr.sin_addr.s_addr = res->laddr; + cm_event.remote_addr.sin_addr.s_addr = res->raddr; + cm_event.local_addr.sin_port = res->lport; + cm_event.remote_addr.sin_port = res->rport; + if (status == 0) { + cm_event.private_data_len = + be32_to_cpu(res->private_data_length); + cm_event.private_data = res->private_data; + } else { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.private_data_len = 0; + cm_event.private_data = NULL; + } + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + case CCAE_TERMINATE_MESSAGE_RECEIVED: + case CCAE_CQ_SQ_COMPLETION_OVERFLOW: + ib_event.device = &c2dev->ibdev; + ib_event.element.qp = &qp->ibqp; + ib_event.event = IB_EVENT_QP_REQ_ERR; + + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&ib_event, + qp->ibqp. + qp_context); + break; + case CCAE_BAD_CLOSE: + case CCAE_LLP_CLOSE_COMPLETE: + case CCAE_LLP_CONNECTION_RESET: + case CCAE_LLP_CONNECTION_LOST: + BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + default: + BUG_ON(1); + pr_debug("%s:%d Unexpected event_id=%d on QP=%p, " + "CM_ID=%p\n", + __FUNCTION__, __LINE__, + event_id, qp, cm_id); + break; + } + break; + } + + case C2_RES_IND_EP:{ + + struct c2wr_ae_connection_request *req = + &wr->ae.ae_connection_request; + struct iw_cm_id *cm_id = + (struct iw_cm_id *)resource_user_context; + + pr_debug("C2_RES_IND_EP event_id=%d\n", event_id); + if (event_id != CCAE_CONNECTION_REQUEST) { + pr_debug("%s: Invalid event_id: %d\n", + __FUNCTION__, event_id); + break; + } + cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; + cm_event.provider_data = (void*)(unsigned long)req->cr_handle; + cm_event.local_addr.sin_addr.s_addr = req->laddr; + cm_event.remote_addr.sin_addr.s_addr = req->raddr; + cm_event.local_addr.sin_port = req->lport; + cm_event.remote_addr.sin_port = req->rport; + cm_event.private_data_len = + be32_to_cpu(req->private_data_length); + cm_event.private_data = req->private_data; + + if (cm_id->event_handler) + cm_id->event_handler(cm_id, &cm_event); + break; + } + + case C2_RES_IND_CQ:{ + struct c2_cq *cq = + (struct c2_cq *) resource_user_context; + + pr_debug("IB_EVENT_CQ_ERR\n"); + ib_event.device = &c2dev->ibdev; + ib_event.element.cq = &cq->ibcq; + ib_event.event = IB_EVENT_CQ_ERR; + + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&ib_event, + cq->ibcq.cq_context); + } + + default: + printk("Bad resource indicator = %d\n", + resource_indicator); + break; + } + + ignore_it: + c2_mq_free(mq); +} diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h new file mode 100644 index 000000000000..3a065c33b83b --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_ae.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_AE_H_ +#define _C2_AE_H_ + +/* + * WARNING: If you change this file, also bump C2_IVN_BASE + * in common/include/clustercore/c2_ivn.h. + */ + +/* + * Asynchronous Event Identifiers + * + * These start at 0x80 only so it's obvious from inspection that + * they are not work-request statuses. This isn't critical. + * + * NOTE: these event id's must fit in eight bits. + */ +enum c2_event_id { + CCAE_REMOTE_SHUTDOWN = 0x80, + CCAE_ACTIVE_CONNECT_RESULTS, + CCAE_CONNECTION_REQUEST, + CCAE_LLP_CLOSE_COMPLETE, + CCAE_TERMINATE_MESSAGE_RECEIVED, + CCAE_LLP_CONNECTION_RESET, + CCAE_LLP_CONNECTION_LOST, + CCAE_LLP_SEGMENT_SIZE_INVALID, + CCAE_LLP_INVALID_CRC, + CCAE_LLP_BAD_FPDU, + CCAE_INVALID_DDP_VERSION, + CCAE_INVALID_RDMA_VERSION, + CCAE_UNEXPECTED_OPCODE, + CCAE_INVALID_DDP_QUEUE_NUMBER, + CCAE_RDMA_READ_NOT_ENABLED, + CCAE_RDMA_WRITE_NOT_ENABLED, + CCAE_RDMA_READ_TOO_SMALL, + CCAE_NO_L_BIT, + CCAE_TAGGED_INVALID_STAG, + CCAE_TAGGED_BASE_BOUNDS_VIOLATION, + CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION, + CCAE_TAGGED_INVALID_PD, + CCAE_WRAP_ERROR, + CCAE_BAD_CLOSE, + CCAE_BAD_LLP_CLOSE, + CCAE_INVALID_MSN_RANGE, + CCAE_INVALID_MSN_GAP, + CCAE_IRRQ_OVERFLOW, + CCAE_IRRQ_MSN_GAP, + CCAE_IRRQ_MSN_RANGE, + CCAE_IRRQ_INVALID_STAG, + CCAE_IRRQ_BASE_BOUNDS_VIOLATION, + CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION, + CCAE_IRRQ_INVALID_PD, + CCAE_IRRQ_WRAP_ERROR, + CCAE_CQ_SQ_COMPLETION_OVERFLOW, + CCAE_CQ_RQ_COMPLETION_ERROR, + CCAE_QP_SRQ_WQE_ERROR, + CCAE_QP_LOCAL_CATASTROPHIC_ERROR, + CCAE_CQ_OVERFLOW, + CCAE_CQ_OPERATION_ERROR, + CCAE_SRQ_LIMIT_REACHED, + CCAE_QP_RQ_LIMIT_REACHED, + CCAE_SRQ_CATASTROPHIC_ERROR, + CCAE_RNIC_CATASTROPHIC_ERROR +/* WARNING If you add more id's, make sure their values fit in eight bits. */ +}; + +/* + * Resource Indicators and Identifiers + */ +enum c2_resource_indicator { + C2_RES_IND_QP = 1, + C2_RES_IND_EP, + C2_RES_IND_CQ, + C2_RES_IND_SRQ, +}; + +#endif /* _C2_AE_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c new file mode 100644 index 000000000000..1d2529992c0c --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "c2.h" + +static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **head) +{ + int i; + struct sp_chunk *new_head; + + new_head = (struct sp_chunk *) __get_free_page(gfp_mask); + if (new_head == NULL) + return -ENOMEM; + + new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head, + PAGE_SIZE, DMA_FROM_DEVICE); + pci_unmap_addr_set(new_head, mapping, new_head->dma_addr); + + new_head->next = NULL; + new_head->head = 0; + + /* build list where each index is the next free slot */ + for (i = 0; + i < (PAGE_SIZE - sizeof(struct sp_chunk) - + sizeof(u16)) / sizeof(u16) - 1; + i++) { + new_head->shared_ptr[i] = i + 1; + } + /* terminate list */ + new_head->shared_ptr[i] = 0xFFFF; + + *head = new_head; + return 0; +} + +int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask, + struct sp_chunk **root) +{ + return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root); +} + +void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) +{ + struct sp_chunk *next; + + while (root) { + next = root->next; + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(root, mapping), PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page((struct page *) root); + root = next; + } +} + +u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, + dma_addr_t *dma_addr, gfp_t gfp_mask) +{ + u16 mqsp; + + while (head) { + mqsp = head->head; + if (mqsp != 0xFFFF) { + head->head = head->shared_ptr[mqsp]; + break; + } else if (head->next == NULL) { + if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) == + 0) { + head = head->next; + mqsp = head->head; + head->head = head->shared_ptr[mqsp]; + break; + } else + return NULL; + } else + head = head->next; + } + if (head) { + *dma_addr = head->dma_addr + + ((unsigned long) &(head->shared_ptr[mqsp]) - + (unsigned long) head); + pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__, + &(head->shared_ptr[mqsp]), (u64)*dma_addr); + return &(head->shared_ptr[mqsp]); + } + return NULL; +} + +void c2_free_mqsp(u16 * mqsp) +{ + struct sp_chunk *head; + u16 idx; + + /* The chunk containing this ptr begins at the page boundary */ + head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK); + + /* Link head to new mqsp */ + *mqsp = head->head; + + /* Compute the shared_ptr index */ + idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1; + idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1; + + /* Point this index at the head */ + head->shared_ptr[idx] = head->head; + + /* Point head at this index */ + head->head = idx; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c new file mode 100644 index 000000000000..485254efdd1e --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "c2.h" +#include "c2_wr.h" +#include "c2_vq.h" +#include + +int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct ib_qp *ibqp; + struct c2_qp *qp; + struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */ + struct c2_vq_req *vq_req; + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Associate QP <--> CM_ID */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + /* + * only support the max private_data length + */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail0; + } + /* + * Set the rdma read limits + */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* + * Create and send a WR_QP_CONNECT... + */ + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + c2_wr_set_id(wr, CCWR_QP_CONNECT); + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->qp_handle = qp->adapter_handle; + + wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr; + wr->remote_port = cm_id->remote_addr.sin_port; + + /* + * Move any private data from the callers's buf into + * the WR. + */ + if (iw_param->private_data) { + wr->private_data_length = + cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], iw_param->private_data, + iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* + * Send WR to adapter. NOTE: There is no synch reply from + * the adapter. + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + vq_req_free(c2dev, vq_req); + + bail1: + kfree(wr); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) +{ + struct c2_dev *c2dev; + struct c2wr_ep_listen_create_req wr; + struct c2wr_ep_listen_create_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.local_addr = cm_id->local_addr.sin_addr.s_addr; + wr.local_port = cm_id->local_addr.sin_port; + wr.backlog = cpu_to_be32(backlog); + wr.user_context = (u64) (unsigned long) cm_id; + + /* + * Reference the request struct. Dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = + (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + if ((err = c2_errno(reply)) != 0) + goto bail1; + + /* + * Keep the adapter handle. Used in subsequent destroy + */ + cm_id->provider_data = (void*)(unsigned long) reply->ep_handle; + + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + + +int c2_llp_service_destroy(struct iw_cm_id *cm_id) +{ + + struct c2_dev *c2dev; + struct c2wr_ep_listen_destroy_req wr; + struct c2wr_ep_listen_destroy_rep *reply; + struct c2_vq_req *vq_req; + int err; + + c2dev = to_c2dev(cm_id->device); + if (c2dev == NULL) + return -EINVAL; + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32)(unsigned long)cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + if ((err = c2_errno(reply)) != 0) + goto bail1; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + struct c2_dev *c2dev = to_c2dev(cm_id->device); + struct c2_qp *qp; + struct ib_qp *ibqp; + struct c2wr_cr_accept_req *wr; /* variable length WR */ + struct c2_vq_req *vq_req; + struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */ + int err; + + ibqp = c2_get_qp(cm_id->device, iw_param->qpn); + if (!ibqp) + return -EINVAL; + qp = to_c2qp(ibqp); + + /* Set the RDMA read limits */ + err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird); + if (err) + goto bail0; + + /* Allocate verbs request. */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + vq_req->qp = qp; + vq_req->cm_id = cm_id; + vq_req->event = IW_CM_EVENT_ESTABLISHED; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail2; + } + + /* Build the WR */ + c2_wr_set_id(wr, CCWR_CR_ACCEPT); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->ep_handle = (u32) (unsigned long) cm_id->provider_data; + wr->qp_handle = qp->adapter_handle; + + /* Replace the cr_handle with the QP after accept */ + cm_id->provider_data = qp; + cm_id->add_ref(cm_id); + qp->cm_id = cm_id; + + cm_id->provider_data = qp; + + /* Validate private_data length */ + if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { + err = -EINVAL; + goto bail2; + } + + if (iw_param->private_data) { + wr->private_data_length = cpu_to_be32(iw_param->private_data_len); + memcpy(&wr->private_data[0], + iw_param->private_data, iw_param->private_data_len); + } else + wr->private_data_length = 0; + + /* Reference the request struct. Dereferenced in the int handler. */ + vq_req_get(c2dev, vq_req); + + /* Send WR to adapter */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail2; + } + + /* Wait for reply from adapter */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail2; + + /* Check that reply is present */ + reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail2; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + if (!err) + c2_set_qp_state(qp, C2_QP_STATE_RTS); + bail2: + kfree(wr); + bail1: + vq_req_free(c2dev, vq_req); + bail0: + if (err) { + /* + * If we fail, release reference on QP and + * disassociate QP from CM_ID + */ + cm_id->provider_data = NULL; + qp->cm_id = NULL; + cm_id->rem_ref(cm_id); + } + return err; +} + +int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + struct c2_dev *c2dev; + struct c2wr_cr_reject_req wr; + struct c2_vq_req *vq_req; + struct c2wr_cr_reject_rep *reply; + int err; + + c2dev = to_c2dev(cm_id->device); + + /* + * Allocate verbs request. + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_CR_REJECT); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.ep_handle = (u32) (unsigned long) cm_id->provider_data; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + /* + * Process reply + */ + reply = (struct c2wr_cr_reject_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + err = c2_errno(reply); + /* + * free vq stuff + */ + vq_repbuf_free(c2dev, reply); + + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c new file mode 100644 index 000000000000..9d7bcc5ade93 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1)) + +static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn) +{ + struct c2_cq *cq; + unsigned long flags; + + spin_lock_irqsave(&c2dev->lock, flags); + cq = c2dev->qptr_array[cqn]; + if (!cq) { + spin_unlock_irqrestore(&c2dev->lock, flags); + return NULL; + } + atomic_inc(&cq->refcount); + spin_unlock_irqrestore(&c2dev->lock, flags); + return cq; +} + +static void c2_cq_put(struct c2_cq *cq) +{ + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +void c2_cq_event(struct c2_dev *c2dev, u32 mq_index) +{ + struct c2_cq *cq; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) { + printk("discarding events on destroyed CQN=%d\n", mq_index); + return; + } + + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + c2_cq_put(cq); +} + +void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index) +{ + struct c2_cq *cq; + struct c2_mq *q; + + cq = c2_cq_get(c2dev, mq_index); + if (!cq) + return; + + spin_lock_irq(&cq->lock); + q = &cq->mq; + if (q && !c2_mq_empty(q)) { + u16 priv = q->priv; + struct c2wr_ce *msg; + + while (priv != be16_to_cpu(*q->shared)) { + msg = (struct c2wr_ce *) + (q->msg_pool.host + priv * q->msg_size); + if (msg->qp_user_context == (u64) (unsigned long) qp) { + msg->qp_user_context = (u64) 0; + } + priv = (priv + 1) % q->q_size; + } + } + spin_unlock_irq(&cq->lock); + c2_cq_put(cq); +} + +static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status) +{ + switch (status) { + case C2_OK: + return IB_WC_SUCCESS; + case CCERR_FLUSHED: + return IB_WC_WR_FLUSH_ERR; + case CCERR_BASE_AND_BOUNDS_VIOLATION: + return IB_WC_LOC_PROT_ERR; + case CCERR_ACCESS_VIOLATION: + return IB_WC_LOC_ACCESS_ERR; + case CCERR_TOTAL_LENGTH_TOO_BIG: + return IB_WC_LOC_LEN_ERR; + case CCERR_INVALID_WINDOW: + return IB_WC_MW_BIND_ERR; + default: + return IB_WC_GENERAL_ERR; + } +} + + +static inline int c2_poll_one(struct c2_dev *c2dev, + struct c2_cq *cq, struct ib_wc *entry) +{ + struct c2wr_ce *ce; + struct c2_qp *qp; + int is_recv = 0; + + ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + if (!ce) { + return -EAGAIN; + } + + /* + * if the qp returned is null then this qp has already + * been freed and we are unable process the completion. + * try pulling the next message + */ + while ((qp = + (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { + c2_mq_free(&cq->mq); + ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + if (!ce) + return -EAGAIN; + } + + entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce)); + entry->wr_id = ce->hdr.context; + entry->qp_num = ce->handle; + entry->wc_flags = 0; + entry->slid = 0; + entry->sl = 0; + entry->src_qp = 0; + entry->dlid_path_bits = 0; + entry->pkey_index = 0; + + switch (c2_wr_get_id(ce)) { + case C2_WR_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case C2_WR_TYPE_RDMA_WRITE: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case C2_WR_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case C2_WR_TYPE_BIND_MW: + entry->opcode = IB_WC_BIND_MW; + break; + case C2_WR_TYPE_RECV: + entry->byte_len = be32_to_cpu(ce->bytes_rcvd); + entry->opcode = IB_WC_RECV; + is_recv = 1; + break; + default: + break; + } + + /* consume the WQEs */ + if (is_recv) + c2_mq_lconsume(&qp->rq_mq, 1); + else + c2_mq_lconsume(&qp->sq_mq, + be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1); + + /* free the message */ + c2_mq_free(&cq->mq); + + return 0; +} + +int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct c2_dev *c2dev = to_c2dev(ibcq->device); + struct c2_cq *cq = to_c2cq(ibcq); + unsigned long flags; + int npolled, err; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + + err = c2_poll_one(c2dev, cq, entry + npolled); + if (err) + break; + } + + spin_unlock_irqrestore(&cq->lock, flags); + + return npolled; +} + +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +{ + struct c2_mq_shared __iomem *shared; + struct c2_cq *cq; + + cq = to_c2cq(ibcq); + shared = cq->mq.peer; + + if (notify == IB_CQ_NEXT_COMP) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); + else if (notify == IB_CQ_SOLICITED) + writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); + else + return -EINVAL; + + writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed); + + /* + * Now read back shared->armed to make the PCI + * write synchronous. This is necessary for + * correct cq notification semantics. + */ + readb(&shared->armed); + + return 0; +} + +static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) +{ + + dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping), + mq->q_size * mq->msg_size, DMA_FROM_DEVICE); + free_pages((unsigned long) mq->msg_pool.host, + get_order(mq->q_size * mq->msg_size)); +} + +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, + int msg_size) +{ + unsigned long pool_start; + + pool_start = __get_free_pages(GFP_KERNEL, + get_order(q_size * msg_size)); + if (!pool_start) + return -ENOMEM; + + c2_mq_rep_init(mq, + 0, /* index (currently unknown) */ + q_size, + msg_size, + (u8 *) pool_start, + NULL, /* peer (currently unknown) */ + C2_MQ_HOST_TARGET); + + mq->host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)pool_start, + q_size * msg_size, DMA_FROM_DEVICE); + pci_unmap_addr_set(mq, mapping, mq->host_dma); + + return 0; +} + +int c2_init_cq(struct c2_dev *c2dev, int entries, + struct c2_ucontext *ctx, struct c2_cq *cq) +{ + struct c2wr_cq_create_req wr; + struct c2wr_cq_create_rep *reply; + unsigned long peer_pa; + struct c2_vq_req *vq_req; + int err; + + might_sleep(); + + cq->ibcq.cqe = entries - 1; + cq->is_kernel = !ctx; + + /* Allocate a shared pointer */ + cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &cq->mq.shared_dma, GFP_KERNEL); + if (!cq->mq.shared) + return -ENOMEM; + + /* Allocate pages for the message pool */ + err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE); + if (err) + goto bail0; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + err = -ENOMEM; + goto bail1; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.msg_size = cpu_to_be32(cq->mq.msg_size); + wr.depth = cpu_to_be32(cq->mq.q_size); + wr.shared_ht = cpu_to_be64(cq->mq.shared_dma); + wr.msg_pool = cpu_to_be64(cq->mq.host_dma); + wr.user_context = (u64) (unsigned long) (cq); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail2; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail2; + + reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail2; + } + + if ((err = c2_errno(reply)) != 0) + goto bail3; + + cq->adapter_handle = reply->cq_handle; + cq->mq.index = be32_to_cpu(reply->mq_index); + + peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared); + cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE); + if (!cq->mq.peer) { + err = -ENOMEM; + goto bail3; + } + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + spin_lock_init(&cq->lock); + atomic_set(&cq->refcount, 1); + init_waitqueue_head(&cq->wait); + + /* + * Use the MQ index allocated by the adapter to + * store the CQ in the qptr_array + */ + cq->cqn = cq->mq.index; + c2dev->qptr_array[cq->cqn] = cq; + + return 0; + + bail3: + vq_repbuf_free(c2dev, reply); + bail2: + vq_req_free(c2dev, vq_req); + bail1: + c2_free_cq_buf(c2dev, &cq->mq); + bail0: + c2_free_mqsp(cq->mq.shared); + + return err; +} + +void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq) +{ + int err; + struct c2_vq_req *vq_req; + struct c2wr_cq_destroy_req wr; + struct c2wr_cq_destroy_rep *reply; + + might_sleep(); + + /* Clear CQ from the qptr array */ + spin_lock_irq(&c2dev->lock); + c2dev->qptr_array[cq->mq.index] = NULL; + atomic_dec(&cq->refcount); + spin_unlock_irq(&c2dev->lock); + + wait_event(cq->wait, !atomic_read(&cq->refcount)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + goto bail0; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_CQ_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.cq_handle = cq->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg); + + vq_repbuf_free(c2dev, reply); + bail1: + vq_req_free(c2dev, vq_req); + bail0: + if (cq->is_kernel) { + c2_free_cq_buf(c2dev, &cq->mq); + } + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c new file mode 100644 index 000000000000..0d0bc33ca30a --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include +#include "c2_vq.h" + +static void handle_mq(struct c2_dev *c2dev, u32 index); +static void handle_vq(struct c2_dev *c2dev, u32 mq_index); + +/* + * Handle RNIC interrupts + */ +void c2_rnic_interrupt(struct c2_dev *c2dev) +{ + unsigned int mq_index; + + while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) { + mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT); + if (mq_index & 0x80000000) { + break; + } + + c2dev->hints_read++; + handle_mq(c2dev, mq_index); + } + +} + +/* + * Top level MQ handler + */ +static void handle_mq(struct c2_dev *c2dev, u32 mq_index) +{ + if (c2dev->qptr_array[mq_index] == NULL) { + pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n", + mq_index); + return; + } + + switch (mq_index) { + case (0): + /* + * An index of 0 in the activity queue + * indicates the req vq now has messages + * available... + * + * Wake up any waiters waiting on req VQ + * message availability. + */ + wake_up(&c2dev->req_vq_wo); + break; + case (1): + handle_vq(c2dev, mq_index); + break; + case (2): + /* We have to purge the VQ in case there are pending + * accept reply requests that would result in the + * generation of an ESTABLISHED event. If we don't + * generate these first, a CLOSE event could end up + * being delivered before the ESTABLISHED event. + */ + handle_vq(c2dev, 1); + + c2_ae_event(c2dev, mq_index); + break; + default: + /* There is no event synchronization between CQ events + * and AE or CM events. In fact, CQE could be + * delivered for all of the I/O up to and including the + * FLUSH for a peer disconenct prior to the ESTABLISHED + * event being delivered to the app. The reason for this + * is that CM events are delivered on a thread, while AE + * and CM events are delivered on interrupt context. + */ + c2_cq_event(c2dev, mq_index); + break; + } + + return; +} + +/* + * Handles verbs WR replies. + */ +static void handle_vq(struct c2_dev *c2dev, u32 mq_index) +{ + void *adapter_msg, *reply_msg; + struct c2wr_hdr *host_msg; + struct c2wr_hdr tmp; + struct c2_mq *reply_vq; + struct c2_vq_req *req; + struct iw_cm_event cm_event; + int err; + + reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index]; + + /* + * get next msg from mq_index into adapter_msg. + * don't free it yet. + */ + adapter_msg = c2_mq_consume(reply_vq); + if (adapter_msg == NULL) { + return; + } + + host_msg = vq_repbuf_alloc(c2dev); + + /* + * If we can't get a host buffer, then we'll still + * wakeup the waiter, we just won't give him the msg. + * It is assumed the waiter will deal with this... + */ + if (!host_msg) { + pr_debug("handle_vq: no repbufs!\n"); + + /* + * just copy the WR header into a local variable. + * this allows us to still demux on the context + */ + host_msg = &tmp; + memcpy(host_msg, adapter_msg, sizeof(tmp)); + reply_msg = NULL; + } else { + memcpy(host_msg, adapter_msg, reply_vq->msg_size); + reply_msg = host_msg; + } + + /* + * consume the msg from the MQ + */ + c2_mq_free(reply_vq); + + /* + * wakeup the waiter. + */ + req = (struct c2_vq_req *) (unsigned long) host_msg->context; + if (req == NULL) { + /* + * We should never get here, as the adapter should + * never send us a reply that we're not expecting. + */ + vq_repbuf_free(c2dev, host_msg); + pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); + return; + } + + err = c2_errno(reply_msg); + if (!err) switch (req->event) { + case IW_CM_EVENT_ESTABLISHED: + c2_set_qp_state(req->qp, + C2_QP_STATE_RTS); + case IW_CM_EVENT_CLOSE: + + /* + * Move the QP to RTS if this is + * the established event + */ + cm_event.event = req->event; + cm_event.status = 0; + cm_event.local_addr = req->cm_id->local_addr; + cm_event.remote_addr = req->cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + req->cm_id->event_handler(req->cm_id, &cm_event); + break; + default: + break; + } + + req->reply_msg = (u64) (unsigned long) (reply_msg); + atomic_set(&req->reply_ready, 1); + wake_up(&req->wait_object); + + /* + * If the request was cancelled, then this put will + * free the vq_req memory...and reply_msg!!! + */ + vq_req_put(c2dev, req); +} diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c new file mode 100644 index 000000000000..1e4f46493fcb --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mm.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include "c2_vq.h" + +#define PBL_VIRT 1 +#define PBL_PHYS 2 + +/* + * Send all the PBL messages to convey the remainder of the PBL + * Wait for the adapter's reply on the last one. + * This is indicated by setting the MEM_PBL_COMPLETE in the flags. + * + * NOTE: vq_req is _not_ freed by this function. The VQ Host + * Reply buffer _is_ freed by this function. + */ +static int +send_pbl_messages(struct c2_dev *c2dev, u32 stag_index, + unsigned long va, u32 pbl_depth, + struct c2_vq_req *vq_req, int pbl_type) +{ + u32 pbe_count; /* amt that fits in a PBL msg */ + u32 count; /* amt in this PBL MSG. */ + struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */ + struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */ + int err, pbl_virt, pbl_index, i; + + switch (pbl_type) { + case PBL_VIRT: + pbl_virt = 1; + break; + case PBL_PHYS: + pbl_virt = 0; + break; + default: + return -EINVAL; + break; + } + + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + return -ENOMEM; + } + c2_wr_set_id(wr, CCWR_NSMR_PBL); + + /* + * Only the last PBL message will generate a reply from the verbs, + * so we set the context to 0 indicating there is no kernel verbs + * handler blocked awaiting this reply. + */ + wr->hdr.context = 0; + wr->rnic_handle = c2dev->adapter_handle; + wr->stag_index = stag_index; /* already swapped */ + wr->flags = 0; + pbl_index = 0; + while (pbl_depth) { + count = min(pbe_count, pbl_depth); + wr->addrs_length = cpu_to_be32(count); + + /* + * If this is the last message, then reference the + * vq request struct cuz we're gonna wait for a reply. + * also make this PBL msg as the last one. + */ + if (count == pbl_depth) { + /* + * reference the request struct. dereferenced in the + * int handler. + */ + vq_req_get(c2dev, vq_req); + wr->flags = cpu_to_be32(MEM_PBL_COMPLETE); + + /* + * This is the last PBL message. + * Set the context to our VQ Request Object so we can + * wait for the reply. + */ + wr->hdr.context = (unsigned long) vq_req; + } + + /* + * If pbl_virt is set then va is a virtual address + * that describes a virtually contiguous memory + * allocation. The wr needs the start of each virtual page + * to be converted to the corresponding physical address + * of the page. If pbl_virt is not set then va is an array + * of physical addresses and there is no conversion to do. + * Just fill in the wr with what is in the array. + */ + for (i = 0; i < count; i++) { + if (pbl_virt) { + va += PAGE_SIZE; + } else { + wr->paddrs[i] = + cpu_to_be64(((u64 *)va)[pbl_index + i]); + } + } + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + if (count <= pbe_count) { + vq_req_put(c2dev, vq_req); + } + goto bail0; + } + pbl_depth -= count; + pbl_index += count; + } + + /* + * Now wait for the reply... + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + kfree(wr); + return err; +} + +#define C2_PBL_MAX_DEPTH 131072 +int +c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list, + int page_size, int pbl_depth, u32 length, + u32 offset, u64 *va, enum c2_acf acf, + struct c2_mr *mr) +{ + struct c2_vq_req *vq_req; + struct c2wr_nsmr_register_req *wr; + struct c2wr_nsmr_register_rep *reply; + u16 flags; + int i, pbe_count, count; + int err; + + if (!va || !length || !addr_list || !pbl_depth) + return -EINTR; + + /* + * Verify PBL depth is within rnic max + */ + if (pbl_depth > C2_PBL_MAX_DEPTH) { + return -EINTR; + } + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + /* + * build the WR + */ + c2_wr_set_id(wr, CCWR_NSMR_REGISTER); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + + flags = (acf | MEM_VA_BASED | MEM_REMOTE); + + /* + * compute how many pbes can fit in the message + */ + pbe_count = (c2dev->req_vq.msg_size - + sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64); + + if (pbl_depth <= pbe_count) { + flags |= MEM_PBL_COMPLETE; + } + wr->flags = cpu_to_be16(flags); + wr->stag_key = 0; //stag_key; + wr->va = cpu_to_be64(*va); + wr->pd_id = mr->pd->pd_id; + wr->pbe_size = cpu_to_be32(page_size); + wr->length = cpu_to_be32(length); + wr->pbl_depth = cpu_to_be32(pbl_depth); + wr->fbo = cpu_to_be32(offset); + count = min(pbl_depth, pbe_count); + wr->addrs_length = cpu_to_be32(count); + + /* + * fill out the PBL for this message + */ + for (i = 0; i < count; i++) { + wr->paddrs[i] = cpu_to_be64(addr_list[i]); + } + + /* + * regerence the request struct + */ + vq_req_get(c2dev, vq_req); + + /* + * send the WR to the adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + /* + * wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail1; + } + + /* + * process reply + */ + reply = + (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + if ((err = c2_errno(reply))) { + goto bail2; + } + //*p_pb_entries = be32_to_cpu(reply->pbl_depth); + mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index); + vq_repbuf_free(c2dev, reply); + + /* + * if there are still more PBEs we need to send them to + * the adapter and wait for a reply on the final one. + * reuse vq_req for this purpose. + */ + pbl_depth -= count; + if (pbl_depth) { + + vq_req->reply_msg = (unsigned long) NULL; + atomic_set(&vq_req->reply_ready, 0); + err = send_pbl_messages(c2dev, + cpu_to_be32(mr->ibmr.lkey), + (unsigned long) &addr_list[i], + pbl_depth, vq_req, PBL_PHYS); + if (err) { + goto bail1; + } + } + + vq_req_free(c2dev, vq_req); + kfree(wr); + + return err; + + bail2: + vq_repbuf_free(c2dev, reply); + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index) +{ + struct c2_vq_req *vq_req; /* verbs request object */ + struct c2wr_stag_dealloc_req wr; /* work request */ + struct c2wr_stag_dealloc_rep *reply; /* WR reply */ + int err; + + + /* + * allocate verbs request object + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Build the WR + */ + c2_wr_set_id(&wr, CCWR_STAG_DEALLOC); + wr.hdr.context = (u64) (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.stag_index = cpu_to_be32(stag_index); + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c new file mode 100644 index 000000000000..b88a75592102 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "c2.h" +#include "c2_mq.h" + +void *c2_mq_alloc(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (c2_mq_full(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = + (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC)); + m->magic = cpu_to_be32(CCWR_MAGIC); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_produce(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + if (!c2_mq_full(q)) { + q->priv = (q->priv + 1) % q->q_size; + q->hint_count++; + /* Update peer's offset. */ + __raw_writew(cpu_to_be16(q->priv), &q->peer->shared); + } +} + +void *c2_mq_consume(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (c2_mq_empty(q)) { + return NULL; + } else { +#ifdef DEBUG + struct c2wr_hdr *m = (struct c2wr_hdr *) + (q->msg_pool.host + q->priv * q->msg_size); +#ifdef CCMSGMAGIC + BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC)); +#endif + return m; +#else + return q->msg_pool.host + q->priv * q->msg_size; +#endif + } +} + +void c2_mq_free(struct c2_mq *q) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_HOST_TARGET); + + if (!c2_mq_empty(q)) { + +#ifdef CCMSGMAGIC + { + struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *) + (q->msg_pool.adapter + q->priv * q->msg_size); + __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic); + } +#endif + q->priv = (q->priv + 1) % q->q_size; + /* Update peer's offset. */ + __raw_writew(cpu_to_be16(q->priv), &q->peer->shared); + } +} + + +void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count) +{ + BUG_ON(q->magic != C2_MQ_MAGIC); + BUG_ON(q->type != C2_MQ_ADAPTER_TARGET); + + while (wqe_count--) { + BUG_ON(c2_mq_empty(q)); + *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size); + } +} + +#if 0 +u32 c2_mq_count(struct c2_mq *q) +{ + s32 count; + + if (q->type == C2_MQ_HOST_TARGET) + count = be16_to_cpu(*q->shared) - q->priv; + else + count = q->priv - be16_to_cpu(*q->shared); + + if (count < 0) + count += q->q_size; + + return (u32) count; +} +#endif /* 0 */ + +void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.adapter = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} +void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type) +{ + BUG_ON(!q->shared); + + /* This code assumes the byte swapping has already been done! */ + q->index = index; + q->q_size = q_size; + q->msg_size = msg_size; + q->msg_pool.host = pool_start; + q->peer = (struct c2_mq_shared __iomem *) peer; + q->magic = C2_MQ_MAGIC; + q->type = type; + q->priv = 0; + q->hint_count = 0; + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h new file mode 100644 index 000000000000..9185bbb21658 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_mq.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _C2_MQ_H_ +#define _C2_MQ_H_ +#include +#include +#include "c2_wr.h" + +enum c2_shared_regs { + + C2_SHARED_ARMED = 0x10, + C2_SHARED_NOTIFY = 0x18, + C2_SHARED_SHARED = 0x40, +}; + +struct c2_mq_shared { + u16 unused1; + u8 armed; + u8 notification_type; + u32 unused2; + u16 shared; + /* Pad to 64 bytes. */ + u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)]; +}; + +enum c2_mq_type { + C2_MQ_HOST_TARGET = 1, + C2_MQ_ADAPTER_TARGET = 2, +}; + +/* + * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ. + * c2_user_mq_t (which is the same format) is for user-mode MQs... + */ +#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */ +struct c2_mq { + u32 magic; + union { + u8 *host; + u8 __iomem *adapter; + } msg_pool; + dma_addr_t host_dma; + DECLARE_PCI_UNMAP_ADDR(mapping); + u16 hint_count; + u16 priv; + struct c2_mq_shared __iomem *peer; + u16 *shared; + dma_addr_t shared_dma; + u32 q_size; + u32 msg_size; + u32 index; + enum c2_mq_type type; +}; + +static __inline__ int c2_mq_empty(struct c2_mq *q) +{ + return q->priv == be16_to_cpu(*q->shared); +} + +static __inline__ int c2_mq_full(struct c2_mq *q) +{ + return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size; +} + +extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count); +extern void *c2_mq_alloc(struct c2_mq *q); +extern void c2_mq_produce(struct c2_mq *q); +extern void *c2_mq_consume(struct c2_mq *q); +extern void c2_mq_free(struct c2_mq *q); +extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 __iomem *pool_start, u16 __iomem *peer, u32 type); +extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size, + u8 *pool_start, u16 __iomem *peer, u32 type); + +#endif /* _C2_MQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c new file mode 100644 index 000000000000..00c709926c8d --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_pd.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "c2.h" +#include "c2_provider.h" + +int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd) +{ + u32 obj; + int ret = 0; + + spin_lock(&c2dev->pd_table.lock); + obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max, + c2dev->pd_table.last); + if (obj >= c2dev->pd_table.max) + obj = find_first_zero_bit(c2dev->pd_table.table, + c2dev->pd_table.max); + if (obj < c2dev->pd_table.max) { + pd->pd_id = obj; + __set_bit(obj, c2dev->pd_table.table); + c2dev->pd_table.last = obj+1; + if (c2dev->pd_table.last >= c2dev->pd_table.max) + c2dev->pd_table.last = 0; + } else + ret = -ENOMEM; + spin_unlock(&c2dev->pd_table.lock); + return ret; +} + +void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd) +{ + spin_lock(&c2dev->pd_table.lock); + __clear_bit(pd->pd_id, c2dev->pd_table.table); + spin_unlock(&c2dev->pd_table.lock); +} + +int __devinit c2_init_pd_table(struct c2_dev *c2dev) +{ + + c2dev->pd_table.last = 0; + c2dev->pd_table.max = c2dev->props.max_pd; + spin_lock_init(&c2dev->pd_table.lock); + c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) * + sizeof(long), GFP_KERNEL); + if (!c2dev->pd_table.table) + return -ENOMEM; + bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd); + return 0; +} + +void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev) +{ + kfree(c2dev->pd_table.table); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c new file mode 100644 index 000000000000..8fddc8cccdf3 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "c2.h" +#include "c2_provider.h" +#include "c2_user.h" + +static int c2_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + *props = c2dev->props; + return 0; +} + +static int c2_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + props->max_mtu = IB_MTU_4096; + props->lid = 0; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = IB_PORT_ACTIVE; + props->phys_state = 0; + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = 1; + props->active_speed = 1; + + return 0; +} + +static int c2_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return 0; +} + +static int c2_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 * pkey) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + *pkey = 0; + return 0; +} + +static int c2_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct c2_dev *c2dev = to_c2dev(ibdev); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6); + + return 0; +} + +/* Allocate the user context data structure. This keeps track + * of all objects associated with a particular user-mode client. + */ +static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct c2_ucontext *context; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + return &context->ibucontext; +} + +static int c2_dealloc_ucontext(struct ib_ucontext *context) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + kfree(context); + return 0; +} + +static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_pd *pd; + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = c2_pd_alloc(to_c2dev(ibdev), !context, pd); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) { + c2_pd_free(to_c2dev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +static int c2_dealloc_pd(struct ib_pd *pd) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + c2_pd_free(to_c2dev(pd->device), to_c2pd(pd)); + kfree(pd); + + return 0; +} + +static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return ERR_PTR(-ENOSYS); +} + +static int c2_ah_destroy(struct ib_ah *ah) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static void c2_add_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + atomic_inc(&qp->refcount); +} + +static void c2_rem_ref(struct ib_qp *ibqp) +{ + struct c2_qp *qp; + BUG_ON(!ibqp); + qp = to_c2qp(ibqp); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +struct ib_qp *c2_get_qp(struct ib_device *device, int qpn) +{ + struct c2_dev* c2dev = to_c2dev(device); + struct c2_qp *qp; + + qp = c2_find_qpn(c2dev, qpn); + pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n", + __FUNCTION__, qp, qpn, device, + (qp?atomic_read(&qp->refcount):0)); + + return (qp?&qp->ibqp:NULL); +} + +static struct ib_qp *c2_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct c2_qp *qp; + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + switch (init_attr->qp_type) { + case IB_QPT_RC: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + pr_debug("%s: Unable to allocate QP\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + spin_lock_init(&qp->lock); + if (pd->uobject) { + /* userspace specific */ + } + + err = c2_alloc_qp(to_c2dev(pd->device), + to_c2pd(pd), init_attr, qp); + + if (err && pd->uobject) { + /* userspace specific */ + } + + break; + default: + pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__, + init_attr->qp_type); + return ERR_PTR(-EINVAL); + break; + } + + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + return &qp->ibqp; +} + +static int c2_destroy_qp(struct ib_qp *ib_qp) +{ + struct c2_qp *qp = to_c2qp(ib_qp); + + pr_debug("%s:%u qp=%p,qp->state=%d\n", + __FUNCTION__, __LINE__,ib_qp,qp->state); + c2_free_qp(to_c2dev(ib_qp->device), qp); + kfree(qp); + return 0; +} + +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c2_cq *cq; + int err; + + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + + err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq); + if (err) { + pr_debug("%s: error initializing CQ\n", __FUNCTION__); + kfree(cq); + return ERR_PTR(err); + } + + return &cq->ibcq; +} + +static int c2_destroy_cq(struct ib_cq *ib_cq) +{ + struct c2_cq *cq = to_c2cq(ib_cq); + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + c2_free_cq(to_c2dev(ib_cq->device), cq); + kfree(cq); + + return 0; +} + +static inline u32 c2_convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) | + C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND; +} + +static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 * iova_start) +{ + struct c2_mr *mr; + u64 *page_list; + u32 total_len; + int err, i, j, k, page_shift, pbl_depth; + + pbl_depth = 0; + total_len = 0; + + page_shift = PAGE_SHIFT; + /* + * If there is only 1 buffer we assume this could + * be a map of all phy mem...use a 32k page_shift. + */ + if (num_phys_buf == 1) + page_shift += 3; + + for (i = 0; i < num_phys_buf; i++) { + + if (buffer_list[i].addr & ~PAGE_MASK) { + pr_debug("Unaligned Memory Buffer: 0x%x\n", + (unsigned int) buffer_list[i].addr); + return ERR_PTR(-EINVAL); + } + + if (!buffer_list[i].size) { + pr_debug("Invalid Buffer Size\n"); + return ERR_PTR(-EINVAL); + } + + total_len += buffer_list[i].size; + pbl_depth += ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + } + + page_list = vmalloc(sizeof(u64) * pbl_depth); + if (!page_list) { + pr_debug("couldn't vmalloc page_list of size %zd\n", + (sizeof(u64) * pbl_depth)); + return ERR_PTR(-ENOMEM); + } + + for (i = 0, j = 0; i < num_phys_buf; i++) { + + int naddrs; + + naddrs = ALIGN(buffer_list[i].size, + (1 << page_shift)) >> page_shift; + for (k = 0; k < naddrs; k++) + page_list[j++] = (buffer_list[i].addr + + (k << page_shift)); + } + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->pd = to_c2pd(ib_pd); + pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " + "*iova_start %llx, first pa %llx, last pa %llx\n", + __FUNCTION__, page_shift, pbl_depth, total_len, + *iova_start, page_list[0], page_list[pbl_depth-1]); + err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, + (1 << page_shift), pbl_depth, + total_len, 0, iova_start, + c2_convert_access(acc), mr); + vfree(page_list); + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + return &mr->ibmr; +} + +static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva = 0; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* AMSO1100 limit */ + bl.size = 0xffffffff; + bl.addr = 0; + return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); +} + +static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + u64 *pages; + u64 kva = 0; + int shift, n, len; + int i, j, k; + int err = 0; + struct ib_umem_chunk *chunk; + struct c2_pd *c2pd = to_c2pd(pd); + struct c2_mr *c2mr; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + shift = ffs(region->page_size) - 1; + + c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); + if (!c2mr) + return ERR_PTR(-ENOMEM); + c2mr->pd = c2pd; + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err; + } + + i = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) { + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(&chunk->page_list[j]) + + (region->page_size * k); + } + } + } + + kva = (u64)region->virt_base; + err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), + pages, + region->page_size, + i, + region->length, + region->offset, + &kva, + c2_convert_access(acc), + c2mr); + kfree(pages); + if (err) { + kfree(c2mr); + return ERR_PTR(err); + } + return &c2mr->ibmr; + +err: + kfree(c2mr); + return ERR_PTR(err); +} + +static int c2_dereg_mr(struct ib_mr *ib_mr) +{ + struct c2_mr *mr = to_c2mr(ib_mr); + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); + if (err) + pr_debug("c2_stag_dealloc failed: %d\n", err); + else + kfree(mr); + + return err; +} + +static ssize_t show_rev(struct class_device *cdev, char *buf) +{ + struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev); + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%x\n", dev->props.hw_ver); +} + +static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +{ + struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev); + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%x.%x.%x\n", + (int) (dev->props.fw_ver >> 32), + (int) (dev->props.fw_ver >> 16) & 0xffff, + (int) (dev->props.fw_ver & 0xffff)); +} + +static ssize_t show_hca(struct class_device *cdev, char *buf) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "AMSO1100\n"); +} + +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID"); +} + +static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct class_device_attribute *c2_class_attributes[] = { + &class_device_attr_hw_rev, + &class_device_attr_fw_ver, + &class_device_attr_hca_type, + &class_device_attr_board_id +}; + +static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + int err; + + err = + c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr, + attr_mask); + + return err; +} + +static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return -ENOSYS; +} + +static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* Request a connection */ + return c2_llp_connect(cm_id, iw_param); +} + +static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + /* Accept the new connection */ + return c2_llp_accept(cm_id, iw_param); +} + +static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_llp_reject(cm_id, pdata, pdata_len); + return err; +} + +static int c2_service_create(struct iw_cm_id *cm_id, int backlog) +{ + int err; + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + err = c2_llp_service_create(cm_id, backlog); + pr_debug("%s:%u err=%d\n", + __FUNCTION__, __LINE__, + err); + return err; +} + +static int c2_service_destroy(struct iw_cm_id *cm_id) +{ + int err; + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + + err = c2_llp_service_destroy(cm_id); + + return err; +} + +static int c2_pseudo_up(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("adding...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_down(struct net_device *netdev) +{ + struct in_device *ind; + struct c2_dev *c2dev = netdev->priv; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + pr_debug("deleting...\n"); + for_ifa(ind) { +#ifdef DEBUG + u8 *ip = (u8 *) & ifa->ifa_address; + + pr_debug("%s: %d.%d.%d.%d\n", + ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]); +#endif + c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask); + } + endfor_ifa(ind); + in_dev_put(ind); + + return 0; +} + +static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu) +{ + int ret = 0; + + if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) + return -EINVAL; + + netdev->mtu = new_mtu; + + /* TODO: Tell rnic about new rmda interface mtu */ + return ret; +} + +static void setup(struct net_device *netdev) +{ + SET_MODULE_OWNER(netdev); + netdev->open = c2_pseudo_up; + netdev->stop = c2_pseudo_down; + netdev->hard_start_xmit = c2_pseudo_xmit_frame; + netdev->get_stats = NULL; + netdev->tx_timeout = NULL; + netdev->set_mac_address = NULL; + netdev->change_mtu = c2_pseudo_change_mtu; + netdev->watchdog_timeo = 0; + netdev->type = ARPHRD_ETHER; + netdev->mtu = 1500; + netdev->hard_header_len = ETH_HLEN; + netdev->addr_len = ETH_ALEN; + netdev->tx_queue_len = 0; + netdev->flags |= IFF_NOARP; + return; +} + +static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) +{ + char name[IFNAMSIZ]; + struct net_device *netdev; + + /* change ethxxx to iwxxx */ + strcpy(name, "iw"); + strcat(name, &c2dev->netdev->name[3]); + netdev = alloc_netdev(sizeof(*netdev), name, setup); + if (!netdev) { + printk(KERN_ERR PFX "%s - etherdev alloc failed", + __FUNCTION__); + return NULL; + } + + netdev->priv = c2dev; + + SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); + + memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); + + /* Print out the MAC address */ + pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n", + netdev->name, + netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], + netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); + +#if 0 + /* Disable network packets */ + netif_stop_queue(netdev); +#endif + return netdev; +} + +int c2_register_device(struct c2_dev *dev) +{ + int ret; + int i; + + /* Register pseudo network device */ + dev->pseudo_netdev = c2_pseudo_netdev_init(dev); + if (dev->pseudo_netdev) { + ret = register_netdev(dev->pseudo_netdev); + if (ret) { + printk(KERN_ERR PFX + "Unable to register netdev, ret = %d\n", ret); + free_netdev(dev->pseudo_netdev); + return ret; + } + } + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + + dev->ibdev.node_type = RDMA_NODE_RNIC; + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.dma_device = &dev->pcidev->dev; + dev->ibdev.class_dev.dev = &dev->pcidev->dev; + dev->ibdev.query_device = c2_query_device; + dev->ibdev.query_port = c2_query_port; + dev->ibdev.modify_port = c2_modify_port; + dev->ibdev.query_pkey = c2_query_pkey; + dev->ibdev.query_gid = c2_query_gid; + dev->ibdev.alloc_ucontext = c2_alloc_ucontext; + dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext; + dev->ibdev.mmap = c2_mmap_uar; + dev->ibdev.alloc_pd = c2_alloc_pd; + dev->ibdev.dealloc_pd = c2_dealloc_pd; + dev->ibdev.create_ah = c2_ah_create; + dev->ibdev.destroy_ah = c2_ah_destroy; + dev->ibdev.create_qp = c2_create_qp; + dev->ibdev.modify_qp = c2_modify_qp; + dev->ibdev.destroy_qp = c2_destroy_qp; + dev->ibdev.create_cq = c2_create_cq; + dev->ibdev.destroy_cq = c2_destroy_cq; + dev->ibdev.poll_cq = c2_poll_cq; + dev->ibdev.get_dma_mr = c2_get_dma_mr; + dev->ibdev.reg_phys_mr = c2_reg_phys_mr; + dev->ibdev.reg_user_mr = c2_reg_user_mr; + dev->ibdev.dereg_mr = c2_dereg_mr; + + dev->ibdev.alloc_fmr = NULL; + dev->ibdev.unmap_fmr = NULL; + dev->ibdev.dealloc_fmr = NULL; + dev->ibdev.map_phys_fmr = NULL; + + dev->ibdev.attach_mcast = c2_multicast_attach; + dev->ibdev.detach_mcast = c2_multicast_detach; + dev->ibdev.process_mad = c2_process_mad; + + dev->ibdev.req_notify_cq = c2_arm_cq; + dev->ibdev.post_send = c2_post_send; + dev->ibdev.post_recv = c2_post_receive; + + dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); + dev->ibdev.iwcm->add_ref = c2_add_ref; + dev->ibdev.iwcm->rem_ref = c2_rem_ref; + dev->ibdev.iwcm->get_qp = c2_get_qp; + dev->ibdev.iwcm->connect = c2_connect; + dev->ibdev.iwcm->accept = c2_accept; + dev->ibdev.iwcm->reject = c2_reject; + dev->ibdev.iwcm->create_listen = c2_service_create; + dev->ibdev.iwcm->destroy_listen = c2_service_destroy; + + ret = ib_register_device(&dev->ibdev); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) { + ret = class_device_create_file(&dev->ibdev.class_dev, + c2_class_attributes[i]); + if (ret) { + unregister_netdev(dev->pseudo_netdev); + free_netdev(dev->pseudo_netdev); + ib_unregister_device(&dev->ibdev); + return ret; + } + } + + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + return 0; +} + +void c2_unregister_device(struct c2_dev *dev) +{ + pr_debug("%s:%u\n", __FUNCTION__, __LINE__); + unregister_netdev(dev->pseudo_netdev); + free_netdev(dev->pseudo_netdev); + ib_unregister_device(&dev->ibdev); +} diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h new file mode 100644 index 000000000000..fc906223220f --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_provider.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_PROVIDER_H +#define C2_PROVIDER_H +#include + +#include +#include + +#include "c2_mq.h" +#include + +#define C2_MPT_FLAG_ATOMIC (1 << 14) +#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define C2_MPT_FLAG_REMOTE_READ (1 << 12) +#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define C2_MPT_FLAG_LOCAL_READ (1 << 10) + +struct c2_buf_list { + void *buf; + DECLARE_PCI_UNMAP_ADDR(mapping) +}; + + +/* The user context keeps track of objects allocated for a + * particular user-mode client. */ +struct c2_ucontext { + struct ib_ucontext ibucontext; +}; + +struct c2_mtt; + +/* All objects associated with a PD are kept in the + * associated user context if present. + */ +struct c2_pd { + struct ib_pd ibpd; + u32 pd_id; +}; + +struct c2_mr { + struct ib_mr ibmr; + struct c2_pd *pd; +}; + +struct c2_av; + +enum c2_ah_type { + C2_AH_ON_HCA, + C2_AH_PCI_POOL, + C2_AH_KMALLOC +}; + +struct c2_ah { + struct ib_ah ibah; +}; + +struct c2_cq { + struct ib_cq ibcq; + spinlock_t lock; + atomic_t refcount; + int cqn; + int is_kernel; + wait_queue_head_t wait; + + u32 adapter_handle; + struct c2_mq mq; +}; + +struct c2_wq { + spinlock_t lock; +}; +struct iw_cm_id; +struct c2_qp { + struct ib_qp ibqp; + struct iw_cm_id *cm_id; + spinlock_t lock; + atomic_t refcount; + wait_queue_head_t wait; + int qpn; + + u32 adapter_handle; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u8 state; + + struct c2_mq sq_mq; + struct c2_mq rq_mq; +}; + +struct c2_cr_query_attrs { + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +}; + +static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c2_pd, ibpd); +} + +static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct c2_ucontext, ibucontext); +} + +static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c2_mr, ibmr); +} + + +static inline struct c2_ah *to_c2ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct c2_ah, ibah); +} + +static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c2_cq, ibcq); +} + +static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c2_qp, ibqp); +} + +static inline int is_rnic_addr(struct net_device *netdev, u32 addr) +{ + struct in_device *ind; + int ret = 0; + + ind = in_dev_get(netdev); + if (!ind) + return 0; + + for_ifa(ind) { + if (ifa->ifa_address == addr) { + ret = 1; + break; + } + } + endfor_ifa(ind); + in_dev_put(ind); + return ret; +} +#endif /* C2_PROVIDER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c new file mode 100644 index 000000000000..12261132b077 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -0,0 +1,975 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "c2.h" +#include "c2_vq.h" +#include "c2_status.h" + +#define C2_MAX_ORD_PER_QP 128 +#define C2_MAX_IRD_PER_QP 128 + +#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count) +#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16) +#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF) + +#define NO_SUPPORT -1 +static const u8 c2_opcode[] = { + [IB_WR_SEND] = C2_WR_TYPE_SEND, + [IB_WR_SEND_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT, + [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT, + [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT, +}; + +static int to_c2_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + return C2_QP_STATE_IDLE; + case IB_QPS_RTS: + return C2_QP_STATE_RTS; + case IB_QPS_SQD: + return C2_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C2_QP_STATE_CLOSING; + case IB_QPS_ERR: + return C2_QP_STATE_ERROR; + default: + return -1; + } +} + +static int to_ib_state(enum c2_qp_state c2_state) +{ + switch (c2_state) { + case C2_QP_STATE_IDLE: + return IB_QPS_RESET; + case C2_QP_STATE_CONNECTING: + return IB_QPS_RTR; + case C2_QP_STATE_RTS: + return IB_QPS_RTS; + case C2_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C2_QP_STATE_ERROR: + return IB_QPS_ERR; + case C2_QP_STATE_TERMINATE: + return IB_QPS_SQE; + default: + return -1; + } +} + +static const char *to_ib_state_str(int ib_state) +{ + static const char *state_str[] = { + "IB_QPS_RESET", + "IB_QPS_INIT", + "IB_QPS_RTR", + "IB_QPS_RTS", + "IB_QPS_SQD", + "IB_QPS_SQE", + "IB_QPS_ERR" + }; + if (ib_state < IB_QPS_RESET || + ib_state > IB_QPS_ERR) + return ""; + + ib_state -= IB_QPS_RESET; + return state_str[ib_state]; +} + +void c2_set_qp_state(struct c2_qp *qp, int c2_state) +{ + int new_state = to_ib_state(c2_state); + + pr_debug("%s: qp[%p] state modify %s --> %s\n", + __FUNCTION__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(new_state)); + qp->state = new_state; +} + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp, + struct ib_qp_attr *attr, int attr_mask) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + unsigned long flags; + u8 next_state; + int err; + + pr_debug("%s:%d qp=%p, %s --> %s\n", + __FUNCTION__, __LINE__, + qp, + to_ib_state_str(qp->state), + to_ib_state_str(attr->qp_state)); + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + if (attr_mask & IB_QP_STATE) { + /* Ensure the state is valid */ + if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) + return -EINVAL; + + wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state)); + + if (attr->qp_state == IB_QPS_ERR) { + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("Generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + } + next_state = attr->qp_state; + + } else if (attr_mask & IB_QP_CUR_STATE) { + + if (attr->cur_qp_state != IB_QPS_RTR && + attr->cur_qp_state != IB_QPS_RTS && + attr->cur_qp_state != IB_QPS_SQD && + attr->cur_qp_state != IB_QPS_SQE) + return -EINVAL; + else + wr.next_qp_state = + cpu_to_be32(to_c2_state(attr->cur_qp_state)); + + next_state = attr->cur_qp_state; + + } else { + err = 0; + goto bail0; + } + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + if (!err) + qp->state = next_state; +#ifdef DEBUG + else + pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err); +#endif + /* + * If we're going to error and generating the event here, then + * we need to remove the reference because there will be no + * close event generated by the adapter + */ + spin_lock_irqsave(&qp->lock, flags); + if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + + pr_debug("%s:%d qp=%p, cur_state=%s\n", + __FUNCTION__, __LINE__, + qp, + to_ib_state_str(qp->state)); + return err; +} + +int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp, + int ord, int ird) +{ + struct c2wr_qp_modify_req wr; + struct c2wr_qp_modify_rep *reply; + struct c2_vq_req *vq_req; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_QP_MODIFY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + wr.ord = cpu_to_be32(ord); + wr.ird = cpu_to_be32(ird); + wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE); + + /* reference the request struct */ + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail0; + + reply = (struct c2wr_qp_modify_rep *) (unsigned long) + vq_req->reply_msg; + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_vq_req *vq_req; + struct c2wr_qp_destroy_req wr; + struct c2wr_qp_destroy_rep *reply; + unsigned long flags; + int err; + + /* + * Allocate a verb request message + */ + vq_req = vq_req_alloc(c2dev); + if (!vq_req) { + return -ENOMEM; + } + + /* + * Initialize the WR + */ + c2_wr_set_id(&wr, CCWR_QP_DESTROY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.qp_handle = qp->adapter_handle; + + /* + * reference the request struct. dereferenced in the int handler. + */ + vq_req_get(c2dev, vq_req); + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id && qp->state == IB_QPS_RTS) { + pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, " + "qp=%p, cm_id=%p\n",qp,qp->cm_id); + /* Generate an CLOSE event */ + vq_req->qp = qp; + vq_req->cm_id = qp->cm_id; + vq_req->event = IW_CM_EVENT_CLOSE; + } + spin_unlock_irqrestore(&qp->lock, flags); + + /* + * Send WR to adapter + */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + /* + * Wait for reply from adapter + */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + /* + * Process reply + */ + reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + spin_lock_irqsave(&qp->lock, flags); + if (qp->cm_id) { + qp->cm_id->rem_ref(qp->cm_id); + qp->cm_id = NULL; + } + spin_unlock_irqrestore(&qp->lock, flags); + + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) +{ + int ret; + + do { + spin_lock_irq(&c2dev->qp_table.lock); + ret = idr_get_new_above(&c2dev->qp_table.idr, qp, + c2dev->qp_table.last++, &qp->qpn); + spin_unlock_irq(&c2dev->qp_table.lock); + } while ((ret == -EAGAIN) && + idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); + return ret; +} + +static void c2_free_qpn(struct c2_dev *c2dev, int qpn) +{ + spin_lock_irq(&c2dev->qp_table.lock); + idr_remove(&c2dev->qp_table.idr, qpn); + spin_unlock_irq(&c2dev->qp_table.lock); +} + +struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn) +{ + unsigned long flags; + struct c2_qp *qp; + + spin_lock_irqsave(&c2dev->qp_table.lock, flags); + qp = idr_find(&c2dev->qp_table.idr, qpn); + spin_unlock_irqrestore(&c2dev->qp_table.lock, flags); + return qp; +} + +int c2_alloc_qp(struct c2_dev *c2dev, + struct c2_pd *pd, + struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp) +{ + struct c2wr_qp_create_req wr; + struct c2wr_qp_create_rep *reply; + struct c2_vq_req *vq_req; + struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq); + struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq); + unsigned long peer_pa; + u32 q_size, msg_size, mmap_size; + void __iomem *mmap; + int err; + + err = c2_alloc_qpn(c2dev, qp); + if (err) + return err; + qp->ibqp.qp_num = qp->qpn; + qp->ibqp.qp_type = IB_QPT_RC; + + /* Allocate the SQ and RQ shared pointers */ + qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->sq_mq.shared_dma, GFP_KERNEL); + if (!qp->sq_mq.shared) { + err = -ENOMEM; + goto bail0; + } + + qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &qp->rq_mq.shared_dma, GFP_KERNEL); + if (!qp->rq_mq.shared) { + err = -ENOMEM; + goto bail1; + } + + /* Allocate the verbs request */ + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + err = -ENOMEM; + goto bail2; + } + + /* Initialize the work request */ + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_QP_CREATE); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + wr.sq_cq_handle = send_cq->adapter_handle; + wr.rq_cq_handle = recv_cq->adapter_handle; + wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1); + wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1); + wr.srq_handle = 0; + wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND | + QP_ZERO_STAG | QP_RDMA_READ_RESPONSE); + wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge); + wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); + wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma); + wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma); + wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP); + wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP); + wr.pd_id = pd->pd_id; + wr.user_context = (unsigned long) qp; + + vq_req_get(c2dev, vq_req); + + /* Send the WR to the adapter */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail3; + } + + /* Wait for the verb reply */ + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail3; + } + + /* Process the reply */ + reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail3; + } + + if ((err = c2_wr_get_result(reply)) != 0) { + goto bail4; + } + + /* Fill in the kernel QP struct */ + atomic_set(&qp->refcount, 1); + qp->adapter_handle = reply->qp_handle; + qp->state = IB_QPS_RESET; + qp->send_sgl_depth = qp_attrs->cap.max_send_sge; + qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge; + qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge; + + /* Initialize the SQ MQ */ + q_size = be32_to_cpu(reply->sq_depth); + msg_size = be32_to_cpu(reply->sq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail5; + } + + c2_mq_req_init(&qp->sq_mq, + be32_to_cpu(reply->sq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + /* Initialize the RQ mq */ + q_size = be32_to_cpu(reply->rq_depth); + msg_size = be32_to_cpu(reply->rq_msg_size); + peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start); + mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); + mmap = ioremap_nocache(peer_pa, mmap_size); + if (!mmap) { + err = -ENOMEM; + goto bail6; + } + + c2_mq_req_init(&qp->rq_mq, + be32_to_cpu(reply->rq_mq_index), + q_size, + msg_size, + mmap + sizeof(struct c2_mq_shared), /* pool start */ + mmap, /* peer */ + C2_MQ_ADAPTER_TARGET); + + vq_repbuf_free(c2dev, reply); + vq_req_free(c2dev, vq_req); + + return 0; + + bail6: + iounmap(qp->sq_mq.peer); + bail5: + destroy_qp(c2dev, qp); + bail4: + vq_repbuf_free(c2dev, reply); + bail3: + vq_req_free(c2dev, vq_req); + bail2: + c2_free_mqsp(qp->rq_mq.shared); + bail1: + c2_free_mqsp(qp->sq_mq.shared); + bail0: + c2_free_qpn(c2dev, qp->qpn); + return err; +} + +void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) +{ + struct c2_cq *send_cq; + struct c2_cq *recv_cq; + + send_cq = to_c2cq(qp->ibqp.send_cq); + recv_cq = to_c2cq(qp->ibqp.recv_cq); + + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + spin_lock_irq(&send_cq->lock); + if (send_cq != recv_cq) + spin_lock(&recv_cq->lock); + + c2_free_qpn(c2dev, qp->qpn); + + if (send_cq != recv_cq) + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + + /* + * Destory qp in the rnic... + */ + destroy_qp(c2dev, qp); + + /* + * Mark any unreaped CQEs as null and void. + */ + c2_cq_clean(c2dev, qp, send_cq->cqn); + if (send_cq != recv_cq) + c2_cq_clean(c2dev, qp, recv_cq->cqn); + /* + * Unmap the MQs and return the shared pointers + * to the message pool. + */ + iounmap(qp->sq_mq.peer); + iounmap(qp->rq_mq.peer); + c2_free_mqsp(qp->sq_mq.shared); + c2_free_mqsp(qp->rq_mq.shared); + + atomic_dec(&qp->refcount); + wait_event(qp->wait, !atomic_read(&qp->refcount)); +} + +/* + * Function: move_sgl + * + * Description: + * Move an SGL from the user's work request struct into a CCIL Work Request + * message, swapping to WR byte order and ensure the total length doesn't + * overflow. + * + * IN: + * dst - ptr to CCIL Work Request message SGL memory. + * src - ptr to the consumers SGL memory. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int +move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len, + u8 * actual_count) +{ + u32 tot = 0; /* running total */ + u8 acount = 0; /* running total non-0 len sge's */ + + while (count > 0) { + /* + * If the addition of this SGE causes the + * total SGL length to exceed 2^32-1, then + * fail-n-bail. + * + * If the current total plus the next element length + * wraps, then it will go negative and be less than the + * current total... + */ + if ((tot + src->length) < tot) { + return -EINVAL; + } + /* + * Bug: 1456 (as well as 1498 & 1643) + * Skip over any sge's supplied with len=0 + */ + if (src->length) { + tot += src->length; + dst->stag = cpu_to_be32(src->lkey); + dst->to = cpu_to_be64(src->addr); + dst->length = cpu_to_be32(src->length); + dst++; + acount++; + } + src++; + count--; + } + + if (acount == 0) { + /* + * Bug: 1476 (as well as 1498, 1456 and 1643) + * Setup the SGL in the WR to make it easier for the RNIC. + * This way, the FW doesn't have to deal with special cases. + * Setting length=0 should be sufficient. + */ + dst->stag = 0; + dst->to = 0; + dst->length = 0; + } + + *p_len = tot; + *actual_count = acount; + return 0; +} + +/* + * Function: c2_activity (private function) + * + * Description: + * Post an mq index to the host->adapter activity fifo. + * + * IN: + * c2dev - ptr to c2dev structure + * mq_index - mq index to post + * shared - value most recently written to shared + * + * OUT: + * + * Return: + * none + */ +static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared) +{ + /* + * First read the register to see if the FIFO is full, and if so, + * spin until it's not. This isn't perfect -- there is no + * synchronization among the clients of the register, but in + * practice it prevents multiple CPU from hammering the bus + * with PCI RETRY. Note that when this does happen, the card + * cannot get on the bus and the card and system hang in a + * deadlock -- thus the need for this code. [TOT] + */ + while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(0); + } + + __raw_writel(C2_HINT_MAKE(mq_index, shared), + c2dev->regs + PCI_BAR0_ADAPTER_HINT); +} + +/* + * Function: qp_wr_post + * + * Description: + * This in-line function allocates a MQ msg, then moves the host-copy of + * the completed WR into msg. Then it posts the message. + * + * IN: + * q - ptr to user MQ. + * wr - ptr to host-copy of the WR. + * qp - ptr to user qp + * size - Number of bytes to post. Assumed to be divisible by 4. + * + * OUT: none + * + * Return: + * CCIL status codes. + */ +static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size) +{ + union c2wr *msg; + + msg = c2_mq_alloc(q); + if (msg == NULL) { + return -EINVAL; + } +#ifdef CCMSGMAGIC + ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC); +#endif + + /* + * Since all header fields in the WR are the same as the + * CQE, set the following so the adapter need not. + */ + c2_wr_set_result(wr, CCERR_PENDING); + + /* + * Copy the wr down to the adapter + */ + memcpy((void *) msg, (void *) wr, size); + + c2_mq_produce(q); + return 0; +} + + +int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, + struct ib_send_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + int err = 0; + + u32 flags; + u32 tot_len; + u8 actual_sge_count; + u32 msg_size; + + if (qp->state > IB_QPS_RTS) + return -EINVAL; + + while (ib_wr) { + + flags = 0; + wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + if (ib_wr->send_flags & IB_SEND_SIGNALED) { + flags |= SQ_SIGNALED; + } + + switch (ib_wr->opcode) { + case IB_WR_SEND: + if (ib_wr->send_flags & IB_SEND_SOLICITED) { + c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE); + msg_size = sizeof(struct c2wr_send_req); + } else { + c2_wr_set_id(&wr, C2_WR_TYPE_SEND); + msg_size = sizeof(struct c2wr_send_req); + } + + wr.sqwr.send.remote_stag = 0; + msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge; + if (ib_wr->num_sge > qp->send_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.send.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_WRITE: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE); + msg_size = sizeof(struct c2wr_rdma_write_req) + + (sizeof(struct c2_data_addr) * ib_wr->num_sge); + if (ib_wr->num_sge > qp->rdma_write_sgl_depth) { + err = -EINVAL; + break; + } + if (ib_wr->send_flags & IB_SEND_FENCE) { + flags |= SQ_READ_FENCE; + } + wr.sqwr.rdma_write.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_write.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + err = move_sgl((struct c2_data_addr *) + & (wr.sqwr.rdma_write.data), + ib_wr->sg_list, + ib_wr->num_sge, + &tot_len, &actual_sge_count); + wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len); + c2_wr_set_sge_count(&wr, actual_sge_count); + break; + case IB_WR_RDMA_READ: + c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ); + msg_size = sizeof(struct c2wr_rdma_read_req); + + /* IWarp only suppots 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + err = -EINVAL; + break; + } + + /* + * Move the local and remote stag/to/len into the WR. + */ + wr.sqwr.rdma_read.local_stag = + cpu_to_be32(ib_wr->sg_list->lkey); + wr.sqwr.rdma_read.local_to = + cpu_to_be64(ib_wr->sg_list->addr); + wr.sqwr.rdma_read.remote_stag = + cpu_to_be32(ib_wr->wr.rdma.rkey); + wr.sqwr.rdma_read.remote_to = + cpu_to_be64(ib_wr->wr.rdma.remote_addr); + wr.sqwr.rdma_read.length = + cpu_to_be32(ib_wr->sg_list->length); + break; + default: + /* error */ + msg_size = 0; + err = -EINVAL; + break; + } + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + /* + * Store flags + */ + c2_wr_set_flags(&wr, flags); + + /* + * Post the puppy! + */ + err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size); + if (err) { + break; + } + + /* + * Enqueue mq index to activity FIFO. + */ + c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count); + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + return err; +} + +int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, + struct ib_recv_wr **bad_wr) +{ + struct c2_dev *c2dev = to_c2dev(ibqp->device); + struct c2_qp *qp = to_c2qp(ibqp); + union c2wr wr; + int err = 0; + + if (qp->state > IB_QPS_RTS) + return -EINVAL; + + /* + * Try and post each work request + */ + while (ib_wr) { + u32 tot_len; + u8 actual_sge_count; + + if (ib_wr->num_sge > qp->recv_sgl_depth) { + err = -EINVAL; + break; + } + + /* + * Create local host-copy of the WR + */ + wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id; + c2_wr_set_id(&wr, CCWR_RECV); + c2_wr_set_flags(&wr, 0); + + /* sge_count is limited to eight bits. */ + BUG_ON(ib_wr->num_sge >= 256); + err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data), + ib_wr->sg_list, + ib_wr->num_sge, &tot_len, &actual_sge_count); + c2_wr_set_sge_count(&wr, actual_sge_count); + + /* + * If we had an error on the last wr build, then + * break out. Possible errors include bogus WR + * type, and a bogus SGL length... + */ + if (err) { + break; + } + + err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size); + if (err) { + break; + } + + /* + * Enqueue mq index to activity FIFO + */ + c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count); + + ib_wr = ib_wr->next; + } + + if (err) + *bad_wr = ib_wr; + return err; +} + +void __devinit c2_init_qp_table(struct c2_dev *c2dev) +{ + spin_lock_init(&c2dev->qp_table.lock); + idr_init(&c2dev->qp_table.idr); +} + +void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev) +{ + idr_destroy(&c2dev->qp_table.idr); +} diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c new file mode 100644 index 000000000000..1c3c9d65ecea --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -0,0 +1,663 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include "c2.h" +#include "c2_vq.h" + +/* Device capabilities */ +#define C2_MIN_PAGESIZE 1024 + +#define C2_MAX_MRS 32768 +#define C2_MAX_QPS 16000 +#define C2_MAX_WQE_SZ 256 +#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ) +#define C2_MAX_SGES 4 +#define C2_MAX_SGE_RD 1 +#define C2_MAX_CQS 32768 +#define C2_MAX_CQES 4096 +#define C2_MAX_PDS 16384 + +/* + * Send the adapter INIT message to the amso1100 + */ +static int c2_adapter_init(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + int err; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_INIT); + wr.hdr.context = 0; + wr.hint_count = cpu_to_be64(c2dev->hint_count_dma); + wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma); + wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma); + wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma); + wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma); + wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma); + + /* Post the init message */ + err = vq_send_wr(c2dev, (union c2wr *) & wr); + + return err; +} + +/* + * Send the adapter TERM message to the amso1100 + */ +static void c2_adapter_term(struct c2_dev *c2dev) +{ + struct c2wr_init_req wr; + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_TERM); + wr.hdr.context = 0; + + /* Post the init message */ + vq_send_wr(c2dev, (union c2wr *) & wr); + c2dev->init = 0; + + return; +} + +/* + * Query the adapter + */ +static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_query_req wr; + struct c2wr_rnic_query_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + c2_wr_set_id(&wr, CCWR_RNIC_QUERY); + wr.hdr.context = (unsigned long) vq_req; + wr.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) + err = -ENOMEM; + + err = c2_errno(reply); + if (err) + goto bail2; + + props->fw_ver = + ((u64)be32_to_cpu(reply->fw_ver_major) << 32) | + ((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) | + (be32_to_cpu(reply->fw_ver_patch) && 0xFFFF); + memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6); + props->max_mr_size = 0xFFFFFFFF; + props->page_size_cap = ~(C2_MIN_PAGESIZE-1); + props->vendor_id = be32_to_cpu(reply->vendor_id); + props->vendor_part_id = be32_to_cpu(reply->part_number); + props->hw_ver = be32_to_cpu(reply->hw_version); + props->max_qp = be32_to_cpu(reply->max_qps); + props->max_qp_wr = be32_to_cpu(reply->max_qp_depth); + props->device_cap_flags = c2dev->device_cap_flags; + props->max_sge = C2_MAX_SGES; + props->max_sge_rd = C2_MAX_SGE_RD; + props->max_cq = be32_to_cpu(reply->max_cqs); + props->max_cqe = be32_to_cpu(reply->max_cq_depth); + props->max_mr = be32_to_cpu(reply->max_mrs); + props->max_pd = be32_to_cpu(reply->max_pds); + props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird); + props->max_ee_rd_atom = 0; + props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird); + props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord); + props->max_ee_init_rd_atom = 0; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_ee = 0; + props->max_rdd = 0; + props->max_mw = be32_to_cpu(reply->max_mws); + props->max_raw_ipv6_qp = 0; + props->max_raw_ethy_qp = 0; + props->max_mcast_grp = 0; + props->max_mcast_qp_attach = 0; + props->max_total_mcast_qp_attach = 0; + props->max_ah = 0; + props->max_fmr = 0; + props->max_map_per_fmr = 0; + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_pkeys = 0; + props->local_ca_ack_delay = 0; + + bail2: + vq_repbuf_free(c2dev, reply); + + bail1: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Add an IP address to the RNIC interface + */ +int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_ADD_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Delete an IP address from the RNIC interface + */ +int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask) +{ + struct c2_vq_req *vq_req; + struct c2wr_rnic_setconfig_req *wr; + struct c2wr_rnic_setconfig_rep *reply; + struct c2_netaddr netaddr; + int err, len; + + vq_req = vq_req_alloc(c2dev); + if (!vq_req) + return -ENOMEM; + + len = sizeof(struct c2_netaddr); + wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); + if (!wr) { + err = -ENOMEM; + goto bail0; + } + + c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG); + wr->hdr.context = (unsigned long) vq_req; + wr->rnic_handle = c2dev->adapter_handle; + wr->option = cpu_to_be32(C2_CFG_DEL_ADDR); + + netaddr.ip_addr = inaddr; + netaddr.netmask = inmask; + netaddr.mtu = 0; + + memcpy(wr->data, &netaddr, len); + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, (union c2wr *) wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail1; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) + goto bail1; + + reply = + (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail1; + } + + err = c2_errno(reply); + vq_repbuf_free(c2dev, reply); + + bail1: + kfree(wr); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Open a single RNIC instance to use with all + * low level openib calls + */ +static int c2_rnic_open(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_open_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_OPEN); + wr.rnic_open.req.hdr.context = (unsigned long) (vq_req); + wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE); + wr.rnic_open.req.port_num = cpu_to_be16(0); + wr.rnic_open.req.user_context = (unsigned long) c2dev; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = reply->rnic_handle; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Close the RNIC instance + */ +static int c2_rnic_close(struct c2_dev *c2dev) +{ + struct c2_vq_req *vq_req; + union c2wr wr; + struct c2wr_rnic_close_rep *reply; + int err; + + vq_req = vq_req_alloc(c2dev); + if (vq_req == NULL) { + return -ENOMEM; + } + + memset(&wr, 0, sizeof(wr)); + c2_wr_set_id(&wr, CCWR_RNIC_CLOSE); + wr.rnic_close.req.hdr.context = (unsigned long) vq_req; + wr.rnic_close.req.rnic_handle = c2dev->adapter_handle; + + vq_req_get(c2dev, vq_req); + + err = vq_send_wr(c2dev, &wr); + if (err) { + vq_req_put(c2dev, vq_req); + goto bail0; + } + + err = vq_wait_for_reply(c2dev, vq_req); + if (err) { + goto bail0; + } + + reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg); + if (!reply) { + err = -ENOMEM; + goto bail0; + } + + if ((err = c2_errno(reply)) != 0) { + goto bail1; + } + + c2dev->adapter_handle = 0; + + bail1: + vq_repbuf_free(c2dev, reply); + bail0: + vq_req_free(c2dev, vq_req); + return err; +} + +/* + * Called by c2_probe to initialize the RNIC. This principally + * involves initalizing the various limits and resouce pools that + * comprise the RNIC instance. + */ +int c2_rnic_init(struct c2_dev *c2dev) +{ + int err; + u32 qsize, msgsize; + void *q1_pages; + void *q2_pages; + void __iomem *mmio_regs; + + /* Device capabilities */ + c2dev->device_cap_flags = + (IB_DEVICE_RESIZE_MAX_WR | + IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_ZERO_STAG | + IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW); + + /* Allocate the qptr_array */ + c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); + if (!c2dev->qptr_array) { + return -ENOMEM; + } + + /* Inialize the qptr_array */ + memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *)); + c2dev->qptr_array[0] = (void *) &c2dev->req_vq; + c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; + c2dev->qptr_array[2] = (void *) &c2dev->aeq; + + /* Initialize data structures */ + init_waitqueue_head(&c2dev->req_vq_wo); + spin_lock_init(&c2dev->vqlock); + spin_lock_init(&c2dev->lock); + + /* Allocate MQ shared pointer pool for kernel clients. User + * mode client pools are hung off the user context + */ + err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool); + if (err) { + goto bail0; + } + + /* Allocate shared pointers for Q0, Q1, and Q2 from + * the shared pointer pool. + */ + + c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->hint_count_dma, + GFP_KERNEL); + c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->req_vq.shared_dma, + GFP_KERNEL); + c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->rep_vq.shared_dma, + GFP_KERNEL); + c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, + &c2dev->aeq.shared_dma, GFP_KERNEL); + if (!c2dev->hint_count || !c2dev->req_vq.shared || + !c2dev->rep_vq.shared || !c2dev->aeq.shared) { + err = -ENOMEM; + goto bail1; + } + + mmio_regs = c2dev->kva; + /* Initialize the Verbs Request Queue */ + c2_mq_req_init(&c2dev->req_vq, 0, + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)), + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)), + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)), + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)), + C2_MQ_ADAPTER_TARGET); + + /* Initialize the Verbs Reply Queue */ + qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE)); + msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE)); + q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + if (!q1_pages) { + err = -ENOMEM; + goto bail1; + } + c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)q1_pages, qsize * msgsize, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); + pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages, + (u64)c2dev->rep_vq.host_dma); + c2_mq_rep_init(&c2dev->rep_vq, + 1, + qsize, + msgsize, + q1_pages, + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the Asynchronus Event Queue */ + qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE)); + msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE)); + q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL); + if (!q2_pages) { + err = -ENOMEM; + goto bail2; + } + c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device, + (void *)q2_pages, qsize * msgsize, + DMA_FROM_DEVICE); + pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); + pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages, + (u64)c2dev->rep_vq.host_dma); + c2_mq_rep_init(&c2dev->aeq, + 2, + qsize, + msgsize, + q2_pages, + mmio_regs + + be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)), + C2_MQ_HOST_TARGET); + + /* Initialize the verbs request allocator */ + err = vq_init(c2dev); + if (err) + goto bail3; + + /* Enable interrupts on the adapter */ + writel(0, c2dev->regs + C2_IDIS); + + /* create the WR init message */ + err = c2_adapter_init(c2dev); + if (err) + goto bail4; + c2dev->init++; + + /* open an adapter instance */ + err = c2_rnic_open(c2dev); + if (err) + goto bail4; + + /* Initialize cached the adapter limits */ + if (c2_rnic_query(c2dev, &c2dev->props)) + goto bail5; + + /* Initialize the PD pool */ + err = c2_init_pd_table(c2dev); + if (err) + goto bail5; + + /* Initialize the QP pool */ + c2_init_qp_table(c2dev); + return 0; + + bail5: + c2_rnic_close(c2dev); + bail4: + vq_term(c2dev); + bail3: + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->aeq, mapping), + c2dev->aeq.q_size * c2dev->aeq.msg_size, + DMA_FROM_DEVICE); + kfree(q2_pages); + bail2: + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->rep_vq, mapping), + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + DMA_FROM_DEVICE); + kfree(q1_pages); + bail1: + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + bail0: + vfree(c2dev->qptr_array); + + return err; +} + +/* + * Called by c2_remove to cleanup the RNIC resources. + */ +void c2_rnic_term(struct c2_dev *c2dev) +{ + + /* Close the open adapter instance */ + c2_rnic_close(c2dev); + + /* Send the TERM message to the adapter */ + c2_adapter_term(c2dev); + + /* Disable interrupts on the adapter */ + writel(1, c2dev->regs + C2_IDIS); + + /* Free the QP pool */ + c2_cleanup_qp_table(c2dev); + + /* Free the PD pool */ + c2_cleanup_pd_table(c2dev); + + /* Free the verbs request allocator */ + vq_term(c2dev); + + /* Unmap and free the asynchronus event queue */ + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->aeq, mapping), + c2dev->aeq.q_size * c2dev->aeq.msg_size, + DMA_FROM_DEVICE); + kfree(c2dev->aeq.msg_pool.host); + + /* Unmap and free the verbs reply queue */ + dma_unmap_single(c2dev->ibdev.dma_device, + pci_unmap_addr(&c2dev->rep_vq, mapping), + c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, + DMA_FROM_DEVICE); + kfree(c2dev->rep_vq.msg_pool.host); + + /* Free the MQ shared pointer pool */ + c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); + + /* Free the qptr_array */ + vfree(c2dev->qptr_array); + + return; +} diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h new file mode 100644 index 000000000000..6ee4aa92d875 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_status.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_STATUS_H_ +#define _C2_STATUS_H_ + +/* + * Verbs Status Codes + */ +enum c2_status { + C2_OK = 0, /* This must be zero */ + CCERR_INSUFFICIENT_RESOURCES = 1, + CCERR_INVALID_MODIFIER = 2, + CCERR_INVALID_MODE = 3, + CCERR_IN_USE = 4, + CCERR_INVALID_RNIC = 5, + CCERR_INTERRUPTED_OPERATION = 6, + CCERR_INVALID_EH = 7, + CCERR_INVALID_CQ = 8, + CCERR_CQ_EMPTY = 9, + CCERR_NOT_IMPLEMENTED = 10, + CCERR_CQ_DEPTH_TOO_SMALL = 11, + CCERR_PD_IN_USE = 12, + CCERR_INVALID_PD = 13, + CCERR_INVALID_SRQ = 14, + CCERR_INVALID_ADDRESS = 15, + CCERR_INVALID_NETMASK = 16, + CCERR_INVALID_QP = 17, + CCERR_INVALID_QP_STATE = 18, + CCERR_TOO_MANY_WRS_POSTED = 19, + CCERR_INVALID_WR_TYPE = 20, + CCERR_INVALID_SGL_LENGTH = 21, + CCERR_INVALID_SQ_DEPTH = 22, + CCERR_INVALID_RQ_DEPTH = 23, + CCERR_INVALID_ORD = 24, + CCERR_INVALID_IRD = 25, + CCERR_QP_ATTR_CANNOT_CHANGE = 26, + CCERR_INVALID_STAG = 27, + CCERR_QP_IN_USE = 28, + CCERR_OUTSTANDING_WRS = 29, + CCERR_STAG_IN_USE = 30, + CCERR_INVALID_STAG_INDEX = 31, + CCERR_INVALID_SGL_FORMAT = 32, + CCERR_ADAPTER_TIMEOUT = 33, + CCERR_INVALID_CQ_DEPTH = 34, + CCERR_INVALID_PRIVATE_DATA_LENGTH = 35, + CCERR_INVALID_EP = 36, + CCERR_MR_IN_USE = CCERR_STAG_IN_USE, + CCERR_FLUSHED = 38, + CCERR_INVALID_WQE = 39, + CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40, + CCERR_REMOTE_TERMINATION_ERROR = 41, + CCERR_BASE_AND_BOUNDS_VIOLATION = 42, + CCERR_ACCESS_VIOLATION = 43, + CCERR_INVALID_PD_ID = 44, + CCERR_WRAP_ERROR = 45, + CCERR_INV_STAG_ACCESS_ERROR = 46, + CCERR_ZERO_RDMA_READ_RESOURCES = 47, + CCERR_QP_NOT_PRIVILEGED = 48, + CCERR_STAG_STATE_NOT_INVALID = 49, + CCERR_INVALID_PAGE_SIZE = 50, + CCERR_INVALID_BUFFER_SIZE = 51, + CCERR_INVALID_PBE = 52, + CCERR_INVALID_FBO = 53, + CCERR_INVALID_LENGTH = 54, + CCERR_INVALID_ACCESS_RIGHTS = 55, + CCERR_PBL_TOO_BIG = 56, + CCERR_INVALID_VA = 57, + CCERR_INVALID_REGION = 58, + CCERR_INVALID_WINDOW = 59, + CCERR_TOTAL_LENGTH_TOO_BIG = 60, + CCERR_INVALID_QP_ID = 61, + CCERR_ADDR_IN_USE = 62, + CCERR_ADDR_NOT_AVAIL = 63, + CCERR_NET_DOWN = 64, + CCERR_NET_UNREACHABLE = 65, + CCERR_CONN_ABORTED = 66, + CCERR_CONN_RESET = 67, + CCERR_NO_BUFS = 68, + CCERR_CONN_TIMEDOUT = 69, + CCERR_CONN_REFUSED = 70, + CCERR_HOST_UNREACHABLE = 71, + CCERR_INVALID_SEND_SGL_DEPTH = 72, + CCERR_INVALID_RECV_SGL_DEPTH = 73, + CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74, + CCERR_INSUFFICIENT_PRIVILEGES = 75, + CCERR_STACK_ERROR = 76, + CCERR_INVALID_VERSION = 77, + CCERR_INVALID_MTU = 78, + CCERR_INVALID_IMAGE = 79, + CCERR_PENDING = 98, /* not an error; user internally by adapter */ + CCERR_DEFER = 99, /* not an error; used internally by adapter */ + CCERR_FAILED_WRITE = 100, + CCERR_FAILED_ERASE = 101, + CCERR_FAILED_VERIFICATION = 102, + CCERR_NOT_FOUND = 103, + +}; + +/* + * CCAE_ACTIVE_CONNECT_RESULTS status result codes. + */ +enum c2_connect_status { + C2_CONN_STATUS_SUCCESS = C2_OK, + C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES, + C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT, + C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED, + C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE, + C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE, + C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC, + C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP, + C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE, + C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET, + C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL, +}; + +/* + * Flash programming status codes. + */ +enum c2_flash_status { + C2_FLASH_STATUS_SUCCESS = 0x0000, + C2_FLASH_STATUS_VERIFY_ERR = 0x0002, + C2_FLASH_STATUS_IMAGE_ERR = 0x0004, + C2_FLASH_STATUS_ECLBS = 0x0400, + C2_FLASH_STATUS_PSLBS = 0x0800, + C2_FLASH_STATUS_VPENS = 0x1000, +}; + +#endif /* _C2_STATUS_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h new file mode 100644 index 000000000000..7e9e7ad65467 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_user.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef C2_USER_H +#define C2_USER_H + +#include + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct c2_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct c2_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct c2_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct c2_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct c2_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* C2_USER_H */ diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c new file mode 100644 index 000000000000..40caeb5f41b4 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include + +#include "c2_vq.h" +#include "c2_provider.h" + +/* + * Verbs Request Objects: + * + * VQ Request Objects are allocated by the kernel verbs handlers. + * They contain a wait object, a refcnt, an atomic bool indicating that the + * adapter has replied, and a copy of the verb reply work request. + * A pointer to the VQ Request Object is passed down in the context + * field of the work request message, and reflected back by the adapter + * in the verbs reply message. The function handle_vq() in the interrupt + * path will use this pointer to: + * 1) append a copy of the verbs reply message + * 2) mark that the reply is ready + * 3) wake up the kernel verbs handler blocked awaiting the reply. + * + * + * The kernel verbs handlers do a "get" to put a 2nd reference on the + * VQ Request object. If the kernel verbs handler exits before the adapter + * can respond, this extra reference will keep the VQ Request object around + * until the adapter's reply can be processed. The reason we need this is + * because a pointer to this object is stuffed into the context field of + * the verbs work request message, and reflected back in the reply message. + * It is used in the interrupt handler (handle_vq()) to wake up the appropriate + * kernel verb handler that is blocked awaiting the verb reply. + * So handle_vq() will do a "put" on the object when it's done accessing it. + * NOTE: If we guarantee that the kernel verb handler will never bail before + * getting the reply, then we don't need these refcnts. + * + * + * VQ Request objects are freed by the kernel verbs handlers only + * after the verb has been processed, or when the adapter fails and + * does not reply. + * + * + * Verbs Reply Buffers: + * + * VQ Reply bufs are local host memory copies of a + * outstanding Verb Request reply + * message. The are always allocated by the kernel verbs handlers, and _may_ be + * freed by either the kernel verbs handler -or- the interrupt handler. The + * kernel verbs handler _must_ free the repbuf, then free the vq request object + * in that order. + */ + +int vq_init(struct c2_dev *c2dev) +{ + sprintf(c2dev->vq_cache_name, "c2-vq:dev%c", + (char) ('0' + c2dev->devnum)); + c2dev->host_msg_cache = + kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (c2dev->host_msg_cache == NULL) { + return -ENOMEM; + } + return 0; +} + +void vq_term(struct c2_dev *c2dev) +{ + kmem_cache_destroy(c2dev->host_msg_cache); +} + +/* vq_req_alloc - allocate a VQ Request Object and initialize it. + * The refcnt is set to 1. + */ +struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) +{ + struct c2_vq_req *r; + + r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL); + if (r) { + init_waitqueue_head(&r->wait_object); + r->reply_msg = (u64) NULL; + r->event = 0; + r->cm_id = NULL; + r->qp = NULL; + atomic_set(&r->refcnt, 1); + atomic_set(&r->reply_ready, 0); + } + return r; +} + + +/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler + * has already free the VQ Reply Buffer if it existed. + */ +void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + r->reply_msg = (u64) NULL; + if (atomic_dec_and_test(&r->refcnt)) { + kfree(r); + } +} + +/* vq_req_get - reference a VQ Request Object. Done + * only in the kernel verbs handlers. + */ +void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + atomic_inc(&r->refcnt); +} + + +/* vq_req_put - dereference and potentially free a VQ Request Object. + * + * This is only called by handle_vq() on the + * interrupt when it is done processing + * a verb reply message. If the associated + * kernel verbs handler has already bailed, + * then this put will actually free the VQ + * Request object _and_ the VQ Reply Buffer + * if it exists. + */ +void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) +{ + if (atomic_dec_and_test(&r->refcnt)) { + if (r->reply_msg != (u64) NULL) + vq_repbuf_free(c2dev, + (void *) (unsigned long) r->reply_msg); + kfree(r); + } +} + + +/* + * vq_repbuf_alloc - allocate a VQ Reply Buffer. + */ +void *vq_repbuf_alloc(struct c2_dev *c2dev) +{ + return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC); +} + +/* + * vq_send_wr - post a verbs request message to the Verbs Request Queue. + * If a message is not available in the MQ, then block until one is available. + * NOTE: handle_mq() on the interrupt context will wake up threads blocked here. + * When the adapter drains the Verbs Request Queue, + * it inserts MQ index 0 in to the + * adapter->host activity fifo and interrupts the host. + */ +int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr) +{ + void *msg; + wait_queue_t __wait; + + /* + * grab adapter vq lock + */ + spin_lock(&c2dev->vqlock); + + /* + * allocate msg + */ + msg = c2_mq_alloc(&c2dev->req_vq); + + /* + * If we cannot get a msg, then we'll wait + * When a messages are available, the int handler will wake_up() + * any waiters. + */ + while (msg == NULL) { + pr_debug("%s:%d no available msg in VQ, waiting...\n", + __FUNCTION__, __LINE__); + init_waitqueue_entry(&__wait, current); + add_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_unlock(&c2dev->vqlock); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!c2_mq_full(&c2dev->req_vq)) { + break; + } + if (!signal_pending(current)) { + schedule_timeout(1 * HZ); /* 1 second... */ + continue; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + return -EINTR; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&c2dev->req_vq_wo, &__wait); + spin_lock(&c2dev->vqlock); + msg = c2_mq_alloc(&c2dev->req_vq); + } + + /* + * copy wr into adapter msg + */ + memcpy(msg, wr, c2dev->req_vq.msg_size); + + /* + * post msg + */ + c2_mq_produce(&c2dev->req_vq); + + /* + * release adapter vq lock + */ + spin_unlock(&c2dev->vqlock); + return 0; +} + + +/* + * vq_wait_for_reply - block until the adapter posts a Verb Reply Message. + */ +int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req) +{ + if (!wait_event_timeout(req->wait_object, + atomic_read(&req->reply_ready), + 60*HZ)) + return -ETIMEDOUT; + + return 0; +} + +/* + * vq_repbuf_free - Free a Verbs Reply Buffer. + */ +void vq_repbuf_free(struct c2_dev *c2dev, void *reply) +{ + kmem_cache_free(c2dev->host_msg_cache, reply); +} diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h new file mode 100644 index 000000000000..33805627a607 --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_vq.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_VQ_H_ +#define _C2_VQ_H_ +#include +#include "c2.h" +#include "c2_wr.h" +#include "c2_provider.h" + +struct c2_vq_req { + u64 reply_msg; /* ptr to reply msg */ + wait_queue_head_t wait_object; /* wait object for vq reqs */ + atomic_t reply_ready; /* set when reply is ready */ + atomic_t refcnt; /* used to cancel WRs... */ + int event; + struct iw_cm_id *cm_id; + struct c2_qp *qp; +}; + +extern int vq_init(struct c2_dev *c2dev); +extern void vq_term(struct c2_dev *c2dev); + +extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev); +extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req); +extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req); +extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr); + +extern void *vq_repbuf_alloc(struct c2_dev *c2dev); +extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply); + +extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req); +#endif /* _C2_VQ_H_ */ diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h new file mode 100644 index 000000000000..3ec6c43bb0ef --- /dev/null +++ b/drivers/infiniband/hw/amso1100/c2_wr.h @@ -0,0 +1,1520 @@ +/* + * Copyright (c) 2005 Ammasso, Inc. All rights reserved. + * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _C2_WR_H_ +#define _C2_WR_H_ + +#ifdef CCDEBUG +#define CCWR_MAGIC 0xb07700b0 +#endif + +#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF + +/* Maximum allowed size in bytes of private_data exchange + * on connect. + */ +#define C2_MAX_PRIVATE_DATA_SIZE 200 + +/* + * These types are shared among the adapter, host, and CCIL consumer. + */ +enum c2_cq_notification_type { + C2_CQ_NOTIFICATION_TYPE_NONE = 1, + C2_CQ_NOTIFICATION_TYPE_NEXT, + C2_CQ_NOTIFICATION_TYPE_NEXT_SE +}; + +enum c2_setconfig_cmd { + C2_CFG_ADD_ADDR = 1, + C2_CFG_DEL_ADDR = 2, + C2_CFG_ADD_ROUTE = 3, + C2_CFG_DEL_ROUTE = 4 +}; + +enum c2_getconfig_cmd { + C2_GETCONFIG_ROUTES = 1, + C2_GETCONFIG_ADDRS +}; + +/* + * CCIL Work Request Identifiers + */ +enum c2wr_ids { + CCWR_RNIC_OPEN = 1, + CCWR_RNIC_QUERY, + CCWR_RNIC_SETCONFIG, + CCWR_RNIC_GETCONFIG, + CCWR_RNIC_CLOSE, + CCWR_CQ_CREATE, + CCWR_CQ_QUERY, + CCWR_CQ_MODIFY, + CCWR_CQ_DESTROY, + CCWR_QP_CONNECT, + CCWR_PD_ALLOC, + CCWR_PD_DEALLOC, + CCWR_SRQ_CREATE, + CCWR_SRQ_QUERY, + CCWR_SRQ_MODIFY, + CCWR_SRQ_DESTROY, + CCWR_QP_CREATE, + CCWR_QP_QUERY, + CCWR_QP_MODIFY, + CCWR_QP_DESTROY, + CCWR_NSMR_STAG_ALLOC, + CCWR_NSMR_REGISTER, + CCWR_NSMR_PBL, + CCWR_STAG_DEALLOC, + CCWR_NSMR_REREGISTER, + CCWR_SMR_REGISTER, + CCWR_MR_QUERY, + CCWR_MW_ALLOC, + CCWR_MW_QUERY, + CCWR_EP_CREATE, + CCWR_EP_GETOPT, + CCWR_EP_SETOPT, + CCWR_EP_DESTROY, + CCWR_EP_BIND, + CCWR_EP_CONNECT, + CCWR_EP_LISTEN, + CCWR_EP_SHUTDOWN, + CCWR_EP_LISTEN_CREATE, + CCWR_EP_LISTEN_DESTROY, + CCWR_EP_QUERY, + CCWR_CR_ACCEPT, + CCWR_CR_REJECT, + CCWR_CONSOLE, + CCWR_TERM, + CCWR_FLASH_INIT, + CCWR_FLASH, + CCWR_BUF_ALLOC, + CCWR_BUF_FREE, + CCWR_FLASH_WRITE, + CCWR_INIT, /* WARNING: Don't move this ever again! */ + + + + /* Add new IDs here */ + + + + /* + * WARNING: CCWR_LAST must always be the last verbs id defined! + * All the preceding IDs are fixed, and must not change. + * You can add new IDs, but must not remove or reorder + * any IDs. If you do, YOU will ruin any hope of + * compatability between versions. + */ + CCWR_LAST, + + /* + * Start over at 1 so that arrays indexed by user wr id's + * begin at 1. This is OK since the verbs and user wr id's + * are always used on disjoint sets of queues. + */ + /* + * The order of the CCWR_SEND_XX verbs must + * match the order of the RDMA_OPs + */ + CCWR_SEND = 1, + CCWR_SEND_INV, + CCWR_SEND_SE, + CCWR_SEND_SE_INV, + CCWR_RDMA_WRITE, + CCWR_RDMA_READ, + CCWR_RDMA_READ_INV, + CCWR_MW_BIND, + CCWR_NSMR_FASTREG, + CCWR_STAG_INVALIDATE, + CCWR_RECV, + CCWR_NOP, + CCWR_UNIMPL, +/* WARNING: This must always be the last user wr id defined! */ +}; +#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2) + +/* + * SQ/RQ Work Request Types + */ +enum c2_wr_type { + C2_WR_TYPE_SEND = CCWR_SEND, + C2_WR_TYPE_SEND_SE = CCWR_SEND_SE, + C2_WR_TYPE_SEND_INV = CCWR_SEND_INV, + C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV, + C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE, + C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ, + C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV, + C2_WR_TYPE_BIND_MW = CCWR_MW_BIND, + C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG, + C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE, + C2_WR_TYPE_RECV = CCWR_RECV, + C2_WR_TYPE_NOP = CCWR_NOP, +}; + +struct c2_netaddr { + u32 ip_addr; + u32 netmask; + u32 mtu; +}; + +struct c2_route { + u32 ip_addr; /* 0 indicates the default route */ + u32 netmask; /* netmask associated with dst */ + u32 flags; + union { + u32 ipaddr; /* address of the nexthop interface */ + u8 enaddr[6]; + } nexthop; +}; + +/* + * A Scatter Gather Entry. + */ +struct c2_data_addr { + u32 stag; + u32 length; + u64 to; +}; + +/* + * MR and MW flags used by the consumer, RI, and RNIC. + */ +enum c2_mm_flags { + MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */ + MEM_VA_BASED = 0x0002, /* Not Zero-based */ + MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */ + MEM_LOCAL_READ = 0x0008, /* allow local reads */ + MEM_LOCAL_WRITE = 0x0010, /* allow local writes */ + MEM_REMOTE_READ = 0x0020, /* allow remote reads */ + MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */ + MEM_WINDOW_BIND = 0x0080, /* binds allowed */ + MEM_SHARED = 0x0100, /* set if MR is shared */ + MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */ +}; + +/* + * CCIL API ACF flags defined in terms of the low level mem flags. + * This minimizes translation needed in the user API + */ +enum c2_acf { + C2_ACF_LOCAL_READ = MEM_LOCAL_READ, + C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE, + C2_ACF_REMOTE_READ = MEM_REMOTE_READ, + C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE, + C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND +}; + +/* + * Image types of objects written to flash + */ +#define C2_FLASH_IMG_BITFILE 1 +#define C2_FLASH_IMG_OPTION_ROM 2 +#define C2_FLASH_IMG_VPD 3 + +/* + * to fix bug 1815 we define the max size allowable of the + * terminate message (per the IETF spec).Refer to the IETF + * protocal specification, section 12.1.6, page 64) + * The message is prefixed by 20 types of DDP info. + * + * Then the message has 6 bytes for the terminate control + * and DDP segment length info plus a DDP header (either + * 14 or 18 byts) plus 28 bytes for the RDMA header. + * Thus the max size in: + * 20 + (6 + 18 + 28) = 72 + */ +#define C2_MAX_TERMINATE_MESSAGE_SIZE (72) + +/* + * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h + */ +#define WR_BUILD_STR_LEN 64 + +/* + * WARNING: All of these structs need to align any 64bit types on + * 64 bit boundaries! 64bit types include u64 and u64. + */ + +/* + * Clustercore Work Request Header. Be sensitive to field layout + * and alignment. + */ +struct c2wr_hdr { + /* wqe_count is part of the cqe. It is put here so the + * adapter can write to it while the wr is pending without + * clobbering part of the wr. This word need not be dma'd + * from the host to adapter by libccil, but we copy it anyway + * to make the memcpy to the adapter better aligned. + */ + u32 wqe_count; + + /* Put these fields next so that later 32- and 64-bit + * quantities are naturally aligned. + */ + u8 id; + u8 result; /* adapter -> host */ + u8 sge_count; /* host -> adapter */ + u8 flags; /* host -> adapter */ + + u64 context; +#ifdef CCMSGMAGIC + u32 magic; + u32 pad; +#endif +} __attribute__((packed)); + +/* + *------------------------ RNIC ------------------------ + */ + +/* + * WR_RNIC_OPEN + */ + +/* + * Flags for the RNIC WRs + */ +enum c2_rnic_flags { + RNIC_IRD_STATIC = 0x0001, + RNIC_ORD_STATIC = 0x0002, + RNIC_QP_STATIC = 0x0004, + RNIC_SRQ_SUPPORTED = 0x0008, + RNIC_PBL_BLOCK_MODE = 0x0010, + RNIC_SRQ_MODEL_ARRIVAL = 0x0020, + RNIC_CQ_OVF_DETECTED = 0x0040, + RNIC_PRIV_MODE = 0x0080 +}; + +struct c2wr_rnic_open_req { + struct c2wr_hdr hdr; + u64 user_context; + u16 flags; /* See enum c2_rnic_flags */ + u16 port_num; +} __attribute__((packed)); + +struct c2wr_rnic_open_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +union c2wr_rnic_open { + struct c2wr_rnic_open_req req; + struct c2wr_rnic_open_rep rep; +} __attribute__((packed)); + +struct c2wr_rnic_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +/* + * WR_RNIC_QUERY + */ +struct c2wr_rnic_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + u32 vendor_id; + u32 part_number; + u32 hw_version; + u32 fw_ver_major; + u32 fw_ver_minor; + u32 fw_ver_patch; + char fw_ver_build_str[WR_BUILD_STR_LEN]; + u32 max_qps; + u32 max_qp_depth; + u32 max_srq_depth; + u32 max_send_sgl_depth; + u32 max_rdma_sgl_depth; + u32 max_cqs; + u32 max_cq_depth; + u32 max_cq_event_handlers; + u32 max_mrs; + u32 max_pbl_depth; + u32 max_pds; + u32 max_global_ird; + u32 max_global_ord; + u32 max_qp_ird; + u32 max_qp_ord; + u32 flags; + u32 max_mws; + u32 pbe_range_low; + u32 pbe_range_high; + u32 max_srqs; + u32 page_size; +} __attribute__((packed)); + +union c2wr_rnic_query { + struct c2wr_rnic_query_req req; + struct c2wr_rnic_query_rep rep; +} __attribute__((packed)); + +/* + * WR_RNIC_GETCONFIG + */ + +struct c2wr_rnic_getconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 option; /* see c2_getconfig_cmd_t */ + u64 reply_buf; + u32 reply_buf_len; +} __attribute__((packed)) ; + +struct c2wr_rnic_getconfig_rep { + struct c2wr_hdr hdr; + u32 option; /* see c2_getconfig_cmd_t */ + u32 count_len; /* length of the number of addresses configured */ +} __attribute__((packed)) ; + +union c2wr_rnic_getconfig { + struct c2wr_rnic_getconfig_req req; + struct c2wr_rnic_getconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_SETCONFIG + */ +struct c2wr_rnic_setconfig_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 option; /* See c2_setconfig_cmd_t */ + /* variable data and pad. See c2_netaddr and c2_route */ + u8 data[0]; +} __attribute__((packed)) ; + +struct c2wr_rnic_setconfig_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_setconfig { + struct c2wr_rnic_setconfig_req req; + struct c2wr_rnic_setconfig_rep rep; +} __attribute__((packed)) ; + +/* + * WR_RNIC_CLOSE + */ +struct c2wr_rnic_close_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)) ; + +struct c2wr_rnic_close_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_rnic_close { + struct c2wr_rnic_close_req req; + struct c2wr_rnic_close_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ CQ ------------------------ + */ +struct c2wr_cq_create_req { + struct c2wr_hdr hdr; + u64 shared_ht; + u64 user_context; + u64 msg_pool; + u32 rnic_handle; + u32 msg_size; + u32 depth; +} __attribute__((packed)) ; + +struct c2wr_cq_create_rep { + struct c2wr_hdr hdr; + u32 mq_index; + u32 adapter_shared; + u32 cq_handle; +} __attribute__((packed)) ; + +union c2wr_cq_create { + struct c2wr_cq_create_req req; + struct c2wr_cq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; + u32 new_depth; + u64 new_msg_pool; +} __attribute__((packed)) ; + +struct c2wr_cq_modify_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_modify { + struct c2wr_cq_modify_req req; + struct c2wr_cq_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 cq_handle; +} __attribute__((packed)) ; + +struct c2wr_cq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_cq_destroy { + struct c2wr_cq_destroy_req req; + struct c2wr_cq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ PD ------------------------ + */ +struct c2wr_pd_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_alloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_alloc { + struct c2wr_pd_alloc_req req; + struct c2wr_pd_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_pd_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_pd_dealloc { + struct c2wr_pd_dealloc_req req; + struct c2wr_pd_dealloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ SRQ ------------------------ + */ +struct c2wr_srq_create_req { + struct c2wr_hdr hdr; + u64 shared_ht; + u64 user_context; + u32 rnic_handle; + u32 srq_depth; + u32 srq_limit; + u32 sgl_depth; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_srq_create_rep { + struct c2wr_hdr hdr; + u32 srq_depth; + u32 sgl_depth; + u32 msg_size; + u32 mq_index; + u32 mq_start; + u32 srq_handle; +} __attribute__((packed)) ; + +union c2wr_srq_create { + struct c2wr_srq_create_req req; + struct c2wr_srq_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 srq_handle; +} __attribute__((packed)) ; + +struct c2wr_srq_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_srq_destroy { + struct c2wr_srq_destroy_req req; + struct c2wr_srq_destroy_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ QP ------------------------ + */ +enum c2wr_qp_flags { + QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */ + QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */ + QP_MW_BIND = 0x00000004, /* MWs enabled */ + QP_ZERO_STAG = 0x00000008, /* enabled? */ + QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */ + QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */ + /* enabled? */ +}; + +struct c2wr_qp_create_req { + struct c2wr_hdr hdr; + u64 shared_sq_ht; + u64 shared_rq_ht; + u64 user_context; + u32 rnic_handle; + u32 sq_cq_handle; + u32 rq_cq_handle; + u32 sq_depth; + u32 rq_depth; + u32 srq_handle; + u32 srq_limit; + u32 flags; /* see enum c2wr_qp_flags */ + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u32 ord; + u32 ird; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_qp_create_rep { + struct c2wr_hdr hdr; + u32 sq_depth; + u32 rq_depth; + u32 send_sgl_depth; + u32 recv_sgl_depth; + u32 rdma_write_sgl_depth; + u32 ord; + u32 ird; + u32 sq_msg_size; + u32 sq_mq_index; + u32 sq_mq_start; + u32 rq_msg_size; + u32 rq_mq_index; + u32 rq_mq_start; + u32 qp_handle; +} __attribute__((packed)) ; + +union c2wr_qp_create { + struct c2wr_qp_create_req req; + struct c2wr_qp_create_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_query_rep { + struct c2wr_hdr hdr; + u64 user_context; + u32 rnic_handle; + u32 sq_depth; + u32 rq_depth; + u32 send_sgl_depth; + u32 rdma_write_sgl_depth; + u32 recv_sgl_depth; + u32 ord; + u32 ird; + u16 qp_state; + u16 flags; /* see c2wr_qp_flags_t */ + u32 qp_id; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; + u32 terminate_msg_length; /* 0 if not present */ + u8 data[0]; + /* Terminate Message in-line here. */ +} __attribute__((packed)) ; + +union c2wr_qp_query { + struct c2wr_qp_query_req req; + struct c2wr_qp_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_req { + struct c2wr_hdr hdr; + u64 stream_msg; + u32 stream_msg_length; + u32 rnic_handle; + u32 qp_handle; + u32 next_qp_state; + u32 ord; + u32 ird; + u32 sq_depth; + u32 rq_depth; + u32 llp_ep_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_modify_rep { + struct c2wr_hdr hdr; + u32 ord; + u32 ird; + u32 sq_depth; + u32 rq_depth; + u32 sq_msg_size; + u32 sq_mq_index; + u32 sq_mq_start; + u32 rq_msg_size; + u32 rq_mq_index; + u32 rq_mq_start; +} __attribute__((packed)) ; + +union c2wr_qp_modify { + struct c2wr_qp_modify_req req; + struct c2wr_qp_modify_rep rep; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; +} __attribute__((packed)) ; + +struct c2wr_qp_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_qp_destroy { + struct c2wr_qp_destroy_req req; + struct c2wr_qp_destroy_rep rep; +} __attribute__((packed)) ; + +/* + * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can + * only be posted when a QP is in IDLE state. After the connect request is + * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state. + * No synchronous reply from adapter to this WR. The results of + * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS + * See c2wr_ae_active_connect_results_t + */ +struct c2wr_qp_connect_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; + u32 remote_addr; + u16 remote_port; + u16 pad; + u32 private_data_length; + u8 private_data[0]; /* Private data in-line. */ +} __attribute__((packed)) ; + +struct c2wr_qp_connect { + struct c2wr_qp_connect_req req; + /* no synchronous reply. */ +} __attribute__((packed)) ; + + +/* + *------------------------ MM ------------------------ + */ + +struct c2wr_nsmr_stag_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pbl_depth; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +struct c2wr_nsmr_stag_alloc_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_stag_alloc { + struct c2wr_nsmr_stag_alloc_req req; + struct c2wr_nsmr_stag_alloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 pd_id; + u32 pbl_depth; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_register_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_register { + struct c2wr_nsmr_register_req req; + struct c2wr_nsmr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 flags; + u32 stag_index; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_pbl_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_nsmr_pbl { + struct c2wr_nsmr_pbl_req req; + struct c2wr_nsmr_pbl_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mr_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mr_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; + u32 pbl_depth; +} __attribute__((packed)) ; + +union c2wr_mr_query { + struct c2wr_mr_query_req req; + struct c2wr_mr_query_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_mw_query_rep { + struct c2wr_hdr hdr; + u8 stag_key; + u8 pad[3]; + u32 pd_id; + u32 flags; +} __attribute__((packed)) ; + +union c2wr_mw_query { + struct c2wr_mw_query_req req; + struct c2wr_mw_query_rep rep; +} __attribute__((packed)) ; + + +struct c2wr_stag_dealloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 stag_index; +} __attribute__((packed)) ; + +struct c2wr_stag_dealloc_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)) ; + +union c2wr_stag_dealloc { + struct c2wr_stag_dealloc_req req; + struct c2wr_stag_dealloc_rep rep; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; + u32 pbl_depth; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + u32 pad1; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)) ; + +struct c2wr_nsmr_reregister_rep { + struct c2wr_hdr hdr; + u32 pbl_depth; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_nsmr_reregister { + struct c2wr_nsmr_reregister_req req; + struct c2wr_nsmr_reregister_rep rep; +} __attribute__((packed)) ; + +struct c2wr_smr_register_req { + struct c2wr_hdr hdr; + u64 va; + u32 rnic_handle; + u16 flags; + u8 stag_key; + u8 pad; + u32 stag_index; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_smr_register_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_smr_register { + struct c2wr_smr_register_req req; + struct c2wr_smr_register_rep rep; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 pd_id; +} __attribute__((packed)) ; + +struct c2wr_mw_alloc_rep { + struct c2wr_hdr hdr; + u32 stag_index; +} __attribute__((packed)) ; + +union c2wr_mw_alloc { + struct c2wr_mw_alloc_req req; + struct c2wr_mw_alloc_rep rep; +} __attribute__((packed)) ; + +/* + *------------------------ WRs ----------------------- + */ + +struct c2wr_user_hdr { + struct c2wr_hdr hdr; /* Has status and WR Type */ +} __attribute__((packed)) ; + +enum c2_qp_state { + C2_QP_STATE_IDLE = 0x01, + C2_QP_STATE_CONNECTING = 0x02, + C2_QP_STATE_RTS = 0x04, + C2_QP_STATE_CLOSING = 0x08, + C2_QP_STATE_TERMINATE = 0x10, + C2_QP_STATE_ERROR = 0x20, +}; + +/* Completion queue entry. */ +struct c2wr_ce { + struct c2wr_hdr hdr; /* Has status and WR Type */ + u64 qp_user_context; /* c2_user_qp_t * */ + u32 qp_state; /* Current QP State */ + u32 handle; /* QPID or EP Handle */ + u32 bytes_rcvd; /* valid for RECV WCs */ + u32 stag; +} __attribute__((packed)) ; + + +/* + * Flags used for all post-sq WRs. These must fit in the flags + * field of the struct c2wr_hdr (eight bits). + */ +enum { + SQ_SIGNALED = 0x01, + SQ_READ_FENCE = 0x02, + SQ_FENCE = 0x04, +}; + +/* + * Common fields for all post-sq WRs. Namely the standard header and a + * secondary header with fields common to all post-sq WRs. + */ +struct c2_sq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * Same as above but for post-rq WRs. + */ +struct c2_rq_hdr { + struct c2wr_user_hdr user_hdr; +} __attribute__((packed)); + +/* + * use the same struct for all sends. + */ +struct c2wr_send_req { + struct c2_sq_hdr sq_hdr; + u32 sge_len; + u32 remote_stag; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_send { + struct c2wr_send_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_write_req { + struct c2_sq_hdr sq_hdr; + u64 remote_to; + u32 remote_stag; + u32 sge_len; + u8 data[0]; /* SGE array */ +} __attribute__((packed)); + +union c2wr_rdma_write { + struct c2wr_rdma_write_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_rdma_read_req { + struct c2_sq_hdr sq_hdr; + u64 local_to; + u64 remote_to; + u32 local_stag; + u32 remote_stag; + u32 length; +} __attribute__((packed)); + +union c2wr_rdma_read { + struct c2wr_rdma_read_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_mw_bind_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 mw_stag_index; + u32 mr_stag_index; + u32 length; + u32 flags; +} __attribute__((packed)); + +union c2wr_mw_bind { + struct c2wr_mw_bind_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_nsmr_fastreg_req { + struct c2_sq_hdr sq_hdr; + u64 va; + u8 stag_key; + u8 pad[3]; + u32 stag_index; + u32 pbe_size; + u32 fbo; + u32 length; + u32 addrs_length; + /* array of paddrs (must be aligned on a 64bit boundary) */ + u64 paddrs[0]; +} __attribute__((packed)); + +union c2wr_nsmr_fastreg { + struct c2wr_nsmr_fastreg_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_stag_invalidate_req { + struct c2_sq_hdr sq_hdr; + u8 stag_key; + u8 pad[3]; + u32 stag_index; +} __attribute__((packed)); + +union c2wr_stag_invalidate { + struct c2wr_stag_invalidate_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +union c2wr_sqwr { + struct c2_sq_hdr sq_hdr; + struct c2wr_send_req send; + struct c2wr_send_req send_se; + struct c2wr_send_req send_inv; + struct c2wr_send_req send_se_inv; + struct c2wr_rdma_write_req rdma_write; + struct c2wr_rdma_read_req rdma_read; + struct c2wr_mw_bind_req mw_bind; + struct c2wr_nsmr_fastreg_req nsmr_fastreg; + struct c2wr_stag_invalidate_req stag_inv; +} __attribute__((packed)); + + +/* + * RQ WRs + */ +struct c2wr_rqwr { + struct c2_rq_hdr rq_hdr; + u8 data[0]; /* array of SGEs */ +} __attribute__((packed)); + +union c2wr_recv { + struct c2wr_rqwr req; + struct c2wr_ce rep; +} __attribute__((packed)); + +/* + * All AEs start with this header. Most AEs only need to convey the + * information in the header. Some, like LLP connection events, need + * more info. The union typdef c2wr_ae_t has all the possible AEs. + * + * hdr.context is the user_context from the rnic_open WR. NULL If this + * is not affiliated with an rnic + * + * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN, + * CCAE_LLP_CLOSE_COMPLETE) + * + * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ + * + * user_context is the context passed down when the host created the resource. + */ +struct c2wr_ae_hdr { + struct c2wr_hdr hdr; + u64 user_context; /* user context for this res. */ + u32 resource_type; /* see enum c2_resource_indicator */ + u32 resource; /* handle for resource */ + u32 qp_state; /* current QP State */ +} __attribute__((packed)); + +/* + * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ, + * the adapter moves the QP into RTS state + */ +struct c2wr_ae_active_connect_results { + struct c2wr_ae_hdr ae_hdr; + u32 laddr; + u32 raddr; + u16 lport; + u16 rport; + u32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +/* + * When connections are established by the stack (and the private data + * MPA frame is received), the adapter will generate an event to the host. + * The details of the connection, any private data, and the new connection + * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the + * AE queue: + */ +struct c2wr_ae_connection_request { + struct c2wr_ae_hdr ae_hdr; + u32 cr_handle; /* connreq handle (sock ptr) */ + u32 laddr; + u32 raddr; + u16 lport; + u16 rport; + u32 private_data_length; + u8 private_data[0]; /* data is in-line in the msg. */ +} __attribute__((packed)); + +union c2wr_ae { + struct c2wr_ae_hdr ae_generic; + struct c2wr_ae_active_connect_results ae_active_connect_results; + struct c2wr_ae_connection_request ae_connection_request; +} __attribute__((packed)); + +struct c2wr_init_req { + struct c2wr_hdr hdr; + u64 hint_count; + u64 q0_host_shared; + u64 q1_host_shared; + u64 q1_host_msg_pool; + u64 q2_host_shared; + u64 q2_host_msg_pool; +} __attribute__((packed)); + +struct c2wr_init_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_init { + struct c2wr_init_req req; + struct c2wr_init_rep rep; +} __attribute__((packed)); + +/* + * For upgrading flash. + */ + +struct c2wr_flash_init_req { + struct c2wr_hdr hdr; + u32 rnic_handle; +} __attribute__((packed)); + +struct c2wr_flash_init_rep { + struct c2wr_hdr hdr; + u32 adapter_flash_buf_offset; + u32 adapter_flash_len; +} __attribute__((packed)); + +union c2wr_flash_init { + struct c2wr_flash_init_req req; + struct c2wr_flash_init_rep rep; +} __attribute__((packed)); + +struct c2wr_flash_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 len; +} __attribute__((packed)); + +struct c2wr_flash_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash { + struct c2wr_flash_req req; + struct c2wr_flash_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_alloc_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 size; +} __attribute__((packed)); + +struct c2wr_buf_alloc_rep { + struct c2wr_hdr hdr; + u32 offset; /* 0 if mem not available */ + u32 size; /* 0 if mem not available */ +} __attribute__((packed)); + +union c2wr_buf_alloc { + struct c2wr_buf_alloc_req req; + struct c2wr_buf_alloc_rep rep; +} __attribute__((packed)); + +struct c2wr_buf_free_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; /* Must match value from alloc */ + u32 size; /* Must match value from alloc */ +} __attribute__((packed)); + +struct c2wr_buf_free_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_buf_free { + struct c2wr_buf_free_req req; + struct c2wr_ce rep; +} __attribute__((packed)); + +struct c2wr_flash_write_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 offset; + u32 size; + u32 type; + u32 flags; +} __attribute__((packed)); + +struct c2wr_flash_write_rep { + struct c2wr_hdr hdr; + u32 status; +} __attribute__((packed)); + +union c2wr_flash_write { + struct c2wr_flash_write_req req; + struct c2wr_flash_write_rep rep; +} __attribute__((packed)); + +/* + * Messages for LLP connection setup. + */ + +/* + * Listen Request. This allocates a listening endpoint to allow passive + * connection setup. Newly established LLP connections are passed up + * via an AE. See c2wr_ae_connection_request_t + */ +struct c2wr_ep_listen_create_req { + struct c2wr_hdr hdr; + u64 user_context; /* returned in AEs. */ + u32 rnic_handle; + u32 local_addr; /* local addr, or 0 */ + u16 local_port; /* 0 means "pick one" */ + u16 pad; + u32 backlog; /* tradional tcp listen bl */ +} __attribute__((packed)); + +struct c2wr_ep_listen_create_rep { + struct c2wr_hdr hdr; + u32 ep_handle; /* handle to new listening ep */ + u16 local_port; /* resulting port... */ + u16 pad; +} __attribute__((packed)); + +union c2wr_ep_listen_create { + struct c2wr_ep_listen_create_req req; + struct c2wr_ep_listen_create_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_listen_destroy_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_ep_listen_destroy { + struct c2wr_ep_listen_destroy_req req; + struct c2wr_ep_listen_destroy_rep rep; +} __attribute__((packed)); + +struct c2wr_ep_query_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; +} __attribute__((packed)); + +struct c2wr_ep_query_rep { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 local_addr; + u32 remote_addr; + u16 local_port; + u16 remote_port; +} __attribute__((packed)); + +union c2wr_ep_query { + struct c2wr_ep_query_req req; + struct c2wr_ep_query_rep rep; +} __attribute__((packed)); + + +/* + * The host passes this down to indicate acceptance of a pending iWARP + * connection. The cr_handle was obtained from the CONNECTION_REQUEST + * AE passed up by the adapter. See c2wr_ae_connection_request_t. + */ +struct c2wr_cr_accept_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 qp_handle; /* QP to bind to this LLP conn */ + u32 ep_handle; /* LLP handle to accept */ + u32 private_data_length; + u8 private_data[0]; /* data in-line in msg. */ +} __attribute__((packed)); + +/* + * adapter sends reply when private data is successfully submitted to + * the LLP. + */ +struct c2wr_cr_accept_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_accept { + struct c2wr_cr_accept_req req; + struct c2wr_cr_accept_rep rep; +} __attribute__((packed)); + +/* + * The host sends this down if a given iWARP connection request was + * rejected by the consumer. The cr_handle was obtained from a + * previous c2wr_ae_connection_request_t AE sent by the adapter. + */ +struct c2wr_cr_reject_req { + struct c2wr_hdr hdr; + u32 rnic_handle; + u32 ep_handle; /* LLP handle to reject */ +} __attribute__((packed)); + +/* + * Dunno if this is needed, but we'll add it for now. The adapter will + * send the reject_reply after the LLP endpoint has been destroyed. + */ +struct c2wr_cr_reject_rep { + struct c2wr_hdr hdr; +} __attribute__((packed)); + +union c2wr_cr_reject { + struct c2wr_cr_reject_req req; + struct c2wr_cr_reject_rep rep; +} __attribute__((packed)); + +/* + * console command. Used to implement a debug console over the verbs + * request and reply queues. + */ + +/* + * Console request message. It contains: + * - message hdr with id = CCWR_CONSOLE + * - the physaddr/len of host memory to be used for the reply. + * - the command string. eg: "netstat -s" or "zoneinfo" + */ +struct c2wr_console_req { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u64 reply_buf; /* pinned host buf for reply */ + u32 reply_buf_len; /* length of reply buffer */ + u8 command[0]; /* NUL terminated ascii string */ + /* containing the command req */ +} __attribute__((packed)); + +/* + * flags used in the console reply. + */ +enum c2_console_flags { + CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */ +} __attribute__((packed)); + +/* + * Console reply message. + * hdr.result contains the c2_status_t error if the reply was _not_ generated, + * or C2_OK if the reply was generated. + */ +struct c2wr_console_rep { + struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */ + u32 flags; +} __attribute__((packed)); + +union c2wr_console { + struct c2wr_console_req req; + struct c2wr_console_rep rep; +} __attribute__((packed)); + + +/* + * Giant union with all WRs. Makes life easier... + */ +union c2wr { + struct c2wr_hdr hdr; + struct c2wr_user_hdr user_hdr; + union c2wr_rnic_open rnic_open; + union c2wr_rnic_query rnic_query; + union c2wr_rnic_getconfig rnic_getconfig; + union c2wr_rnic_setconfig rnic_setconfig; + union c2wr_rnic_close rnic_close; + union c2wr_cq_create cq_create; + union c2wr_cq_modify cq_modify; + union c2wr_cq_destroy cq_destroy; + union c2wr_pd_alloc pd_alloc; + union c2wr_pd_dealloc pd_dealloc; + union c2wr_srq_create srq_create; + union c2wr_srq_destroy srq_destroy; + union c2wr_qp_create qp_create; + union c2wr_qp_query qp_query; + union c2wr_qp_modify qp_modify; + union c2wr_qp_destroy qp_destroy; + struct c2wr_qp_connect qp_connect; + union c2wr_nsmr_stag_alloc nsmr_stag_alloc; + union c2wr_nsmr_register nsmr_register; + union c2wr_nsmr_pbl nsmr_pbl; + union c2wr_mr_query mr_query; + union c2wr_mw_query mw_query; + union c2wr_stag_dealloc stag_dealloc; + union c2wr_sqwr sqwr; + struct c2wr_rqwr rqwr; + struct c2wr_ce ce; + union c2wr_ae ae; + union c2wr_init init; + union c2wr_ep_listen_create ep_listen_create; + union c2wr_ep_listen_destroy ep_listen_destroy; + union c2wr_cr_accept cr_accept; + union c2wr_cr_reject cr_reject; + union c2wr_console console; + union c2wr_flash_init flash_init; + union c2wr_flash flash; + union c2wr_buf_alloc buf_alloc; + union c2wr_buf_free buf_free; + union c2wr_flash_write flash_write; +} __attribute__((packed)); + + +/* + * Accessors for the wr fields that are packed together tightly to + * reduce the wr message size. The wr arguments are void* so that + * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types + * in the struct c2wr union can be passed in. + */ +static __inline__ u8 c2_wr_get_id(void *wr) +{ + return ((struct c2wr_hdr *) wr)->id; +} +static __inline__ void c2_wr_set_id(void *wr, u8 id) +{ + ((struct c2wr_hdr *) wr)->id = id; +} +static __inline__ u8 c2_wr_get_result(void *wr) +{ + return ((struct c2wr_hdr *) wr)->result; +} +static __inline__ void c2_wr_set_result(void *wr, u8 result) +{ + ((struct c2wr_hdr *) wr)->result = result; +} +static __inline__ u8 c2_wr_get_flags(void *wr) +{ + return ((struct c2wr_hdr *) wr)->flags; +} +static __inline__ void c2_wr_set_flags(void *wr, u8 flags) +{ + ((struct c2wr_hdr *) wr)->flags = flags; +} +static __inline__ u8 c2_wr_get_sge_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->sge_count; +} +static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count) +{ + ((struct c2wr_hdr *) wr)->sge_count = sge_count; +} +static __inline__ u32 c2_wr_get_wqe_count(void *wr) +{ + return ((struct c2wr_hdr *) wr)->wqe_count; +} +static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count) +{ + ((struct c2wr_hdr *) wr)->wqe_count = wqe_count; +} + +#endif /* _C2_WR_H_ */ From 61a73c708f37295892176bc911b178278df6a091 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 1 Sep 2006 15:33:55 -0700 Subject: [PATCH 0775/1063] RDMA/cma: Protect against adding device during destruction Closes a window where address resolution can attach an rdma_cm_id to a device during destruction of the rdma_cm_id. This can result in the rdma_cm_id remaining in the device list after its memory has been freed. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e88a7c652ca0..488fa1d642a7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -279,7 +279,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) default: return -ENODEV; } - mutex_lock(&lock); + list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -288,7 +288,6 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) break; } } - mutex_unlock(&lock); return ret; } @@ -712,7 +711,9 @@ void rdma_destroy_id(struct rdma_cm_id *id) state = cma_exch(id_priv, CMA_DESTROYING); cma_cancel_operation(id_priv, state); + mutex_lock(&lock); if (id_priv->cma_dev) { + mutex_unlock(&lock); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) @@ -727,8 +728,8 @@ void rdma_destroy_id(struct rdma_cm_id *id) } mutex_lock(&lock); cma_detach_from_dev(id_priv); - mutex_unlock(&lock); } + mutex_unlock(&lock); cma_release_port(id_priv); cma_deref_id(id_priv); @@ -925,7 +926,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } atomic_inc(&conn_id->dev_remove); + mutex_lock(&lock); ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); if (ret) { ret = -ENODEV; cma_release_remove(conn_id); @@ -1097,7 +1100,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; } + mutex_lock(&lock); ret = cma_acquire_dev(conn_id); + mutex_unlock(&lock); if (ret) { cma_release_remove(conn_id); rdma_destroy_id(new_cm_id); @@ -1507,16 +1512,26 @@ static void addr_handler(int status, struct sockaddr *src_addr, enum rdma_cm_event_type event; atomic_inc(&id_priv->dev_remove); - if (!id_priv->cma_dev && !status) + + /* + * Grab mutex to block rdma_destroy_id() from removing the device while + * we're trying to acquire it. + */ + mutex_lock(&lock); + if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) { + mutex_unlock(&lock); + goto out; + } + + if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv); + mutex_unlock(&lock); if (status) { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) + if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) goto out; event = RDMA_CM_EVENT_ADDR_ERROR; } else { - if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) - goto out; memcpy(&id_priv->id.route.addr.src_addr, src_addr, ip_addr_size(src_addr)); event = RDMA_CM_EVENT_ADDR_RESOLVED; @@ -1740,8 +1755,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_any_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); - if (!ret) + if (!ret) { + mutex_lock(&lock); ret = cma_acquire_dev(id_priv); + mutex_unlock(&lock); + } if (ret) goto err; } From 777a71dd4d901f055967ddbd038d2a74ffce0eb8 Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Mon, 11 Sep 2006 12:19:17 +0300 Subject: [PATCH 0776/1063] IB/iser: fix a check of SG alignment for RDMA dma mapping may include a "compaction" of the sg associated with scsi command. Hence, the size of the maximal prefix of the SG which is aligned for rdma must be compared against the length of the dma mapped sg (mem->dma_nents) and not against the size of it before it was mapped (mem->size). Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 31950a522a1c..53af9567632e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -378,7 +378,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, regd_buf = &iser_ctask->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem); - if (aligned_len != mem->size) { + if (aligned_len != mem->dma_nents) { iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem); From 8072ec2f8f6790df91e85d833e672c9c30a7ab3c Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Mon, 11 Sep 2006 12:20:54 +0300 Subject: [PATCH 0777/1063] IB/iser: Limit the max size of a scsi command Currently, the data length of a command coming down from scsi-ml is limited only by the size of its sg list (sg_tablesize). The max data length may be different for different page size values. By setting max_sectors, we limit the data length to max_sectors*512 bytes. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1437d7ee3b19..e9cf1a9f1e1c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -555,6 +555,7 @@ static struct scsi_host_template iscsi_iser_sht = { .queuecommand = iscsi_queuecommand, .can_queue = ISCSI_XMIT_CMDS_MAX - 1, .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, + .max_sectors = 1024, .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, .eh_abort_handler = iscsi_eh_abort, .eh_host_reset_handler = iscsi_eh_host_reset, From 8dfa0876d3dde5f9c1818a4c35caaabc3ddba78b Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Mon, 11 Sep 2006 12:22:30 +0300 Subject: [PATCH 0778/1063] IB/iser: make FMR "page size" be 4K and not PAGE_SIZE As iser is able to use at most one rdma operation for the execution of a scsi command, and registration of the sg associated with scsi command has its restrictions, the code checks if an sg is "aligned for rdma". Alignment for rdma is measured in "fmr page" units whose possible resolutions are different between HCAs and can be smaller, equal or bigger to the system page size. When the system page size is bigger than 4KB (eg the default with ia64 kernels) there a bigger chance that an sg would be aligned for rdma if the fmr page size is 4KB. Change the code to create FMR whose pages are of size 4KB and to take that into account when processing the sg. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 6 ++++- drivers/infiniband/ulp/iser/iser_memory.c | 29 +++++++++++++++-------- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++-- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 3350ba690cfe..0ba02abb0414 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -82,8 +82,12 @@ __func__ , ## arg); \ } while (0) +#define SHIFT_4K 12 +#define SIZE_4K (1UL << SHIFT_4K) +#define MASK_4K (~(SIZE_4K-1)) + /* support upto 512KB in one RDMA */ -#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT) +#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISCSI_ISER_MAX_LUN 256 #define ISCSI_ISER_MAX_CMD_LEN 16 diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 53af9567632e..bcef0d31f756 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -42,6 +42,7 @@ #include "iscsi_iser.h" #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ + /** * Decrements the reference count for the * registered buffer & releases it @@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, int i; /* compute the offset of first element */ - page_vec->offset = (u64) sg[0].offset; + page_vec->offset = (u64) sg[0].offset & ~MASK_4K; for (i = 0; i < data->dma_nents; i++) { total_sz += sg_dma_len(&sg[i]); @@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, first_addr = sg_dma_address(&sg[i]); last_addr = first_addr + sg_dma_len(&sg[i]); - start_aligned = !(first_addr & ~PAGE_MASK); - end_aligned = !(last_addr & ~PAGE_MASK); + start_aligned = !(first_addr & ~MASK_4K); + end_aligned = !(last_addr & ~MASK_4K); /* continue to collect page fragments till aligned or SG ends */ while (!end_aligned && (i + 1 < data->dma_nents)) { i++; total_sz += sg_dma_len(&sg[i]); last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); - end_aligned = !(last_addr & ~PAGE_MASK); + end_aligned = !(last_addr & ~MASK_4K); } - first_addr = first_addr & PAGE_MASK; + /* handle the 1st page in the 1st DMA element */ + if (cur_page == 0) { + page = first_addr & MASK_4K; + page_vec->pages[cur_page] = page; + cur_page++; + page += SIZE_4K; + } else + page = first_addr; - for (page = first_addr; page < last_addr; page += PAGE_SIZE) - page_vec->pages[cur_page++] = page; + for (; page < last_addr; page += SIZE_4K) { + page_vec->pages[cur_page] = page; + cur_page++; + } } page_vec->data_size = total_sz; @@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, return cur_page; } -#define MASK_4K ((1UL << 12) - 1) /* 0xFFF */ -#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0) +#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) /** * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned @@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data, page_vec->length = page_vec_len; - if (page_vec_len * PAGE_SIZE < page_vec->data_size) { + if (page_vec_len * SIZE_4K < page_vec->data_size) { iser_err("page_vec too short to hold this SG\n"); iser_data_buf_dump(data); iser_dump_page_vec(page_vec); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 72febf1f8ff8..9b27a7c26aa8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -150,7 +150,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) } ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); - params.page_shift = PAGE_SHIFT; + params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * * page aligned, the map whould be of N+1 pages */ params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; @@ -604,7 +604,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem_reg->lkey = mem->fmr->lkey; mem_reg->rkey = mem->fmr->rkey; - mem_reg->len = page_vec->length * PAGE_SIZE; + mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; mem_reg->mem_h = (void *)mem; From e981f1d4b8288072ba7cf6b7141cd4aefb404383 Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Mon, 11 Sep 2006 12:24:00 +0300 Subject: [PATCH 0779/1063] IB/iser: fix some debug prints fix and add some debug prints related to iser handling of memory for rdma. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index bcef0d31f756..8fea0bce5042 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -329,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data) struct scatterlist *sg = (struct scatterlist *)data->buf; int i; - for (i = 0; i < data->size; i++) + for (i = 0; i < data->dma_nents; i++) iser_err("sg[%d] dma_addr:0x%lX page:0x%p " - "off:%d sz:%d dma_len:%d\n", + "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)sg_dma_address(&sg[i]), sg[i].page, sg[i].offset, sg[i].length,sg_dma_len(&sg[i])); @@ -383,6 +383,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, struct iser_regd_buf *regd_buf; int aligned_len; int err; + int i; regd_buf = &iser_ctask->rdma_regd[cmd_dir]; @@ -400,8 +401,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_page_vec_build(mem, ib_conn->page_vec); err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); - if (err) + if (err) { + iser_data_buf_dump(mem); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, + ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", + ib_conn->page_vec->data_size, ib_conn->page_vec->length, + ib_conn->page_vec->offset); + for (i=0 ; ipage_vec->length ; i++) { + iser_err("page_vec[%d] = 0x%lx\n", i, ib_conn->page_vec->pages[i]); + } return err; + } /* take a reference on this regd buf such that it will not be released * * (eg in send dto completion) before we get the scsi response */ From d81110285f7f6c07a0ce8f99a5ff158a647cd649 Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Mon, 11 Sep 2006 12:26:33 +0300 Subject: [PATCH 0780/1063] IB/iser: Do not use FMR for a single dma entry sg Fast Memory Registration (fmr) is used to register for rdma an sg whose elements are not linearly sequential after dma mapping. The IB verbs layer provides an "all dma memory MR (memory region)" which can be used for RDMA-ing a dma linearly sequential buffer. Change the code to use the dma mr instead of doing fmr when dma mapping produces a single dma entry sg. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 1 + drivers/infiniband/ulp/iser/iser_memory.c | 48 ++++++++++++++++------- drivers/infiniband/ulp/iser/iser_verbs.c | 6 ++- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0ba02abb0414..7e1a411db2a3 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -175,6 +175,7 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; + int is_fmr; }; struct iser_regd_buf { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 8fea0bce5042..d0b03f426581 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -56,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf) if ((atomic_read(®d_buf->ref_count) == 0) || atomic_dec_and_test(®d_buf->ref_count)) { /* if we used the dma mr, unreg is just NOP */ - if (regd_buf->reg.rkey != 0) + if (regd_buf->reg.is_fmr) iser_unreg_mem(®d_buf->reg); if (regd_buf->dma_addr) { @@ -91,9 +91,9 @@ void iser_reg_single(struct iser_device *device, BUG_ON(dma_mapping_error(dma_addr)); regd_buf->reg.lkey = device->mr->lkey; - regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */ regd_buf->reg.len = regd_buf->data_size; regd_buf->reg.va = dma_addr; + regd_buf->reg.is_fmr = 0; regd_buf->dma_addr = dma_addr; regd_buf->direction = direction; @@ -379,11 +379,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; int i; + struct scatterlist *sg; regd_buf = &iser_ctask->rdma_regd[cmd_dir]; @@ -399,19 +401,37 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, mem = &iser_ctask->data_copy[cmd_dir]; } - iser_page_vec_build(mem, ib_conn->page_vec); - err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); - if (err) { - iser_data_buf_dump(mem); - iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, - ntoh24(iser_ctask->desc.iscsi_header.dlength)); - iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->page_vec->data_size, ib_conn->page_vec->length, - ib_conn->page_vec->offset); - for (i=0 ; ipage_vec->length ; i++) { - iser_err("page_vec[%d] = 0x%lx\n", i, ib_conn->page_vec->pages[i]); + /* if there a single dma entry, FMR is not needed */ + if (mem->dma_nents == 1) { + sg = (struct scatterlist *)mem->buf; + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.len = sg_dma_len(&sg[0]); + regd_buf->reg.va = sg_dma_address(&sg[0]); + regd_buf->reg.is_fmr = 0; + + iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " + "va: 0x%08lX sz: %ld]\n", + (unsigned int)regd_buf->reg.lkey, + (unsigned int)regd_buf->reg.rkey, + (unsigned long)regd_buf->reg.va, + (unsigned long)regd_buf->reg.len); + } else { /* use FMR for multiple dma entries */ + iser_page_vec_build(mem, ib_conn->page_vec); + err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); + if (err) { + iser_data_buf_dump(mem); + iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, + ntoh24(iser_ctask->desc.iscsi_header.dlength)); + iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", + ib_conn->page_vec->data_size, ib_conn->page_vec->length, + ib_conn->page_vec->offset); + for (i=0 ; ipage_vec->length ; i++) + iser_err("page_vec[%d] = 0x%llx\n", i, + (unsigned long long) ib_conn->page_vec->pages[i]); + return err; } - return err; } /* take a reference on this regd buf such that it will not be released * diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9b27a7c26aa8..ecdca7fc1e4c 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device) iser_cq_tasklet_fn, (unsigned long)device); - device->mr = ib_get_dma_mr(device->pd, - IB_ACCESS_LOCAL_WRITE); + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); if (IS_ERR(device->mr)) goto dma_mr_err; @@ -606,6 +607,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem_reg->rkey = mem->fmr->rkey; mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; + mem_reg->is_fmr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; From 2439a6e65ff09729c3b4215f134dc5cd4e8a30c0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:22:52 -0700 Subject: [PATCH 0781/1063] IPoIB: Refactor completion handling Split up ipoib_ib_handle_wc() into ipoib_ib_handle_rx_wc() and ipoib_ib_handle_tx_wc() to make the code easier to read. This will also help implement NAPI in the future. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 220 +++++++++++++----------- 1 file changed, 116 insertions(+), 104 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5033666b1481..722177ea069b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev) return 0; } -static void ipoib_ib_handle_wc(struct net_device *dev, - struct ib_wc *wc) +static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; + struct sk_buff *skb; + dma_addr_t addr; + + ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n", + wr_id, wc->opcode, wc->status); + + if (unlikely(wr_id >= ipoib_recvq_size)) { + ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); + return; + } + + skb = priv->rx_ring[wr_id].skb; + addr = priv->rx_ring[wr_id].mapping; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed recv event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; + return; + } + + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } + + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", + wc->byte_len, wc->slid); + + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + + skb_put(skb, wc->byte_len); + skb_pull(skb, IB_GRH_BYTES); + + if (wc->slid != priv->local_lid || + wc->src_qp != priv->qp->qp_num) { + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb->mac.raw = skb->data; + skb_pull(skb, IPOIB_ENCAP_LEN); + + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; + + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_rx_ni(skb); + } else { + ipoib_dbg_data(priv, "dropping loopback packet\n"); + dev_kfree_skb_any(skb); + } + +repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) + ipoib_warn(priv, "ipoib_ib_post_receive failed " + "for buf %d\n", wr_id); +} + +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id; + struct ipoib_tx_buf *tx_req; + unsigned long flags; - ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n", + ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n", wr_id, wc->opcode, wc->status); - if (wr_id & IPOIB_OP_RECV) { - wr_id &= ~IPOIB_OP_RECV; - - if (wr_id < ipoib_recvq_size) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - dma_addr_t addr = priv->rx_ring[wr_id].mapping; - - if (unlikely(wc->status != IB_WC_SUCCESS)) { - if (wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed recv event " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - priv->rx_ring[wr_id].skb = NULL; - return; - } - - /* - * If we can't allocate a new RX buffer, dump - * this packet and reuse the old buffer. - */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { - ++priv->stats.rx_dropped; - goto repost; - } - - ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", - wc->byte_len, wc->slid); - - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); - skb_pull(skb, IB_GRH_BYTES); - - if (wc->slid != priv->local_lid || - wc->src_qp != priv->qp->qp_num) { - skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb->mac.raw = skb->data; - skb_pull(skb, IPOIB_ENCAP_LEN); - - dev->last_rx = jiffies; - ++priv->stats.rx_packets; - priv->stats.rx_bytes += skb->len; - - skb->dev = dev; - /* XXX get correct PACKET_ type here */ - skb->pkt_type = PACKET_HOST; - netif_rx_ni(skb); - } else { - ipoib_dbg_data(priv, "dropping loopback packet\n"); - dev_kfree_skb_any(skb); - } - - repost: - if (unlikely(ipoib_ib_post_receive(dev, wr_id))) - ipoib_warn(priv, "ipoib_ib_post_receive failed " - "for buf %d\n", wr_id); - } else - ipoib_warn(priv, "completion event with wrid %d\n", - wr_id); - - } else { - struct ipoib_tx_buf *tx_req; - unsigned long flags; - - if (wr_id >= ipoib_sendq_size) { - ipoib_warn(priv, "completion event with wrid %d (> %d)\n", - wr_id, ipoib_sendq_size); - return; - } - - ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id); - - tx_req = &priv->tx_ring[wr_id]; - - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(tx_req, mapping), - tx_req->skb->len, - DMA_TO_DEVICE); - - ++priv->stats.tx_packets; - priv->stats.tx_bytes += tx_req->skb->len; - - dev_kfree_skb_any(tx_req->skb); - - spin_lock_irqsave(&priv->tx_lock, flags); - ++priv->tx_tail; - if (netif_queue_stopped(dev) && - test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && - priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) - netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); - - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) - ipoib_warn(priv, "failed send event " - "(status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + if (unlikely(wr_id >= ipoib_sendq_size)) { + ipoib_warn(priv, "send completion event with wrid %d (> %d)\n", + wr_id, ipoib_sendq_size); + return; } + + tx_req = &priv->tx_ring[wr_id]; + + dma_unmap_single(priv->ca->dma_device, + pci_unmap_addr(tx_req, mapping), + tx_req->skb->len, + DMA_TO_DEVICE); + + ++priv->stats.tx_packets; + priv->stats.tx_bytes += tx_req->skb->len; + + dev_kfree_skb_any(tx_req->skb); + + spin_lock_irqsave(&priv->tx_lock, flags); + ++priv->tx_tail; + if (netif_queue_stopped(dev) && + test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && + priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) + netif_wake_queue(dev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) + ipoib_warn(priv, "failed send event " + "(status=%d, wrid=%d vend_err %x)\n", + wc->status, wr_id, wc->vendor_err); +} + +static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) +{ + if (wc->wr_id & IPOIB_OP_RECV) + ipoib_ib_handle_rx_wc(dev, wc); + else + ipoib_ib_handle_tx_wc(dev, wc); } void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) From c1a0b23bf477c2e1068905f4e2b5c3cee139e853 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 21 Aug 2006 16:40:12 -0700 Subject: [PATCH 0782/1063] IB/sa: Require SA registration Require users to register with SA module, to prevent the sa_query module text from going away while an SA query callback is still running. Update all in-tree users for the new interface. Signed-off-by: Michael S. Tsirkin Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 7 ++- drivers/infiniband/core/sa_query.c | 60 ++++++++++++++++--- drivers/infiniband/ulp/ipoib/ipoib.h | 2 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 +++- .../infiniband/ulp/ipoib/ipoib_multicast.c | 12 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 9 ++- include/rdma/ib_sa.h | 41 ++++++++++--- 7 files changed, 116 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 488fa1d642a7..1178bd434d1b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -62,6 +62,7 @@ static struct ib_client cma_client = { .remove = cma_remove_one }; +static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); @@ -1323,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); path_rec.numb_path = 1; - id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, + id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, @@ -2199,12 +2200,15 @@ static int cma_init(void) if (!cma_wq) return -ENOMEM; + ib_sa_register_client(&sa_client); + ret = ib_register_client(&cma_client); if (ret) goto err; return 0; err: + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; } @@ -2212,6 +2216,7 @@ err: static void cma_cleanup(void) { ib_unregister_client(&cma_client); + ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index ca8760a7d88c..1706d3c7e95e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -75,6 +76,7 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); + struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; @@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event } } +void ib_sa_register_client(struct ib_sa_client *client) +{ + atomic_set(&client->users, 1); + init_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_register_client); + +static inline void ib_sa_client_get(struct ib_sa_client *client) +{ + atomic_inc(&client->users); +} + +static inline void ib_sa_client_put(struct ib_sa_client *client) +{ + if (atomic_dec_and_test(&client->users)) + complete(&client->comp); +} + +void ib_sa_unregister_client(struct ib_sa_client *client) +{ + ib_sa_client_put(client); + wait_for_completion(&client->comp); +} +EXPORT_SYMBOL(ib_sa_unregister_client); + /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel @@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_path_rec_get - Start a Path get query + * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query @@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a query ID that can be used to cancel * the query. */ -int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, +int ib_sa_path_rec_get(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) /** * ib_sa_service_rec_query - Start Service Record operation + * @client:SA client * @device:device to send request on * @port_num: port number to send request on * @method:SA method - should be get, set, or delete @@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) * error code. Otherwise it is a request ID that can be used to cancel * the query. */ -int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, +int ib_sa_service_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } -int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, @@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, goto err1; } - query->callback = callback; - query->context = context; + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); @@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, err2: *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); ib_free_send_mad(query->sa_query.mad_buf); err1: @@ -889,6 +930,7 @@ static void send_handler(struct ib_mad_agent *agent, ib_free_send_mad(mad_send_wc->send_buf); kref_put(&query->sm_ah->ref, free_sm_ah); + ib_sa_client_put(query->client); query->release(query); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 474aa214ab57..0b8a79d53a00 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { } extern int ipoib_sendq_size; extern int ipoib_recvq_size; +extern struct ib_sa_client ipoib_sa_client; + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG extern int ipoib_debug_level; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e9a7659eb1d7..ae3a4982cddb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -82,6 +82,8 @@ static const u8 ipv4_bcast_addr[] = { struct workqueue_struct *ipoib_workqueue; +struct ib_sa_client ipoib_sa_client; + static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); @@ -463,7 +465,7 @@ static int path_rec_start(struct net_device *dev, init_completion(&path->done); path->query_id = - ib_sa_path_rec_get(priv->ca, priv->port, + ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port, &path->pathrec, IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | @@ -1191,13 +1193,16 @@ static int __init ipoib_init_module(void) goto err_fs; } + ib_sa_register_client(&ipoib_sa_client); + ret = ib_register_client(&ipoib_client); if (ret) - goto err_wq; + goto err_sa; return 0; -err_wq: +err_sa: + ib_sa_unregister_client(&ipoib_sa_client); destroy_workqueue(ipoib_workqueue); err_fs: @@ -1209,6 +1214,7 @@ err_fs: static void __exit ipoib_cleanup_module(void) { ib_unregister_client(&ipoib_client); + ib_sa_unregister_client(&ipoib_sa_client); ipoib_unregister_debugfs(); destroy_workqueue(ipoib_workqueue); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 60b09f5cb347..fb3e4875a46d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -361,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) init_completion(&mcast->done); - ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, + ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | @@ -485,9 +485,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, init_completion(&mcast->done); - ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, - mcast->backoff * 1000, GFP_ATOMIC, - ipoib_mcast_join_complete, + ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, + &rec, comp_mask, mcast->backoff * 1000, + GFP_ATOMIC, ipoib_mcast_join_complete, mcast, &mcast->query); if (ret < 0) { @@ -528,7 +528,7 @@ void ipoib_mcast_join_task(void *dev_ptr) priv->local_rate = attr.active_speed * ib_width_enum_to_int(attr.active_width); } else - ipoib_warn(priv, "ib_query_port failed\n"); + ipoib_warn(priv, "ib_query_port failed\n"); } if (!priv->broadcast) { @@ -681,7 +681,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) * Just make one shot at leaving and don't wait for a reply; * if we fail, too bad. */ - ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, + ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index feb1fcd0f2fb..44b9e5be6687 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -96,6 +96,8 @@ static struct ib_client srp_client = { .remove = srp_remove_one }; +static struct ib_sa_client srp_sa_client; + static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) { return (struct srp_target_port *) host->hostdata; @@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target) init_completion(&target->done); - target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev, + target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, + target->srp_host->dev->dev, target->srp_host->port, &target->path, IB_SA_PATH_REC_DGID | @@ -1998,9 +2001,12 @@ static int __init srp_init_module(void) return ret; } + ib_sa_register_client(&srp_sa_client); + ret = ib_register_client(&srp_client); if (ret) { printk(KERN_ERR PFX "couldn't register IB client\n"); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); return ret; } @@ -2011,6 +2017,7 @@ static int __init srp_init_module(void) static void __exit srp_cleanup_module(void) { ib_unregister_client(&srp_client); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index c99e4420fd7e..58bb5f716fe3 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,8 +37,11 @@ #ifndef IB_SA_H #define IB_SA_H +#include #include +#include + #include #include @@ -250,11 +254,28 @@ struct ib_sa_service_rec { u64 data64[2]; }; +struct ib_sa_client { + atomic_t users; + struct completion comp; +}; + +/** + * ib_sa_register_client - Register an SA client. + */ +void ib_sa_register_client(struct ib_sa_client *client); + +/** + * ib_sa_unregister_client - Deregister an SA client. + * @client: Client object to deregister. + */ +void ib_sa_unregister_client(struct ib_sa_client *client); + struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); -int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, +int ib_sa_path_rec_get(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -264,7 +285,8 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, void *context, struct ib_sa_query **query); -int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, +int ib_sa_mcmember_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, @@ -275,7 +297,8 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, void *context, struct ib_sa_query **query); -int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, +int ib_sa_service_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, @@ -288,6 +311,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, /** * ib_sa_mcmember_rec_set - Start an MCMember set query + * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:MCMember Record to send in query @@ -311,7 +335,8 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, * cancel the query. */ static inline int -ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, +ib_sa_mcmember_rec_set(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -321,7 +346,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, void *context, struct ib_sa_query **query) { - return ib_sa_mcmember_rec_query(device, port_num, + return ib_sa_mcmember_rec_query(client, device, port_num, IB_MGMT_METHOD_SET, rec, comp_mask, timeout_ms, gfp_mask, callback, @@ -330,6 +355,7 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, /** * ib_sa_mcmember_rec_delete - Start an MCMember delete query + * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:MCMember Record to send in query @@ -353,7 +379,8 @@ ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, * cancel the query. */ static inline int -ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, +ib_sa_mcmember_rec_delete(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, @@ -363,7 +390,7 @@ ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, void *context, struct ib_sa_query **query) { - return ib_sa_mcmember_rec_query(device, port_num, + return ib_sa_mcmember_rec_query(client, device, port_num, IB_SA_METHOD_DELETE, rec, comp_mask, timeout_ms, gfp_mask, callback, From a70d059009f4a207e2a9c794f40fc8c870096d54 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 28 Aug 2006 16:32:50 +0300 Subject: [PATCH 0783/1063] IB/cm: Do not track remote QPN in timewait state Do not track remote QPN in TimeWait state, since QP is not connected. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e130d2e89515..f35fcc4c0638 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -685,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; + cm_cleanup_timewait(cm_id_priv->timewait_info); + /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting From 07eeec0627e93a1a753c4df004a97a4d0a7b9ceb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 12 Sep 2006 09:03:33 -0700 Subject: [PATCH 0784/1063] RDMA/cma: Document rdma_destroy_id() function Clarify that rdma_destroy_id cancels outstanding asynchronous operations on the Associated id. Signed-off-by: Or Gerlitz Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/rdma_cm.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 402c63d7226b..1566be568ab6 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -117,6 +117,14 @@ struct rdma_cm_id { struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps); +/** + * rdma_destroy_id - Destroys an RDMA identifier. + * + * @id: RDMA identifier. + * + * Note: calling this function has the effect of canceling in-flight + * asynchronous operations associated with the id. + */ void rdma_destroy_id(struct rdma_cm_id *id); /** From b3b30f5e8a0c50db3d76b6f7c7cc50245aeb57fd Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 15 Aug 2006 21:11:18 +0300 Subject: [PATCH 0785/1063] IB/mthca: Recover from catastrophic errors Trigger device remove and then add when a catastrophic error is detected in hardware. This, in turn, will cause a device reset, which we hope will recover from the catastrophic condition. Since this might interefere with debugging the root cause, add a module option to suppress this behaviour. Signed-off-by: Jack Morgenstein Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_catas.c | 62 ++++++++++++++++ drivers/infiniband/hw/mthca/mthca_dev.h | 7 ++ drivers/infiniband/hw/mthca/mthca_main.c | 88 +++++++++++++++++------ 3 files changed, 136 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index c3bec7490f52..cd044ea2dfa4 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -34,6 +34,7 @@ #include #include +#include #include "mthca_dev.h" @@ -48,9 +49,41 @@ enum { static DEFINE_SPINLOCK(catas_lock); +static LIST_HEAD(catas_list); +static struct workqueue_struct *catas_wq; +static struct work_struct catas_work; + +static int catas_reset_disable; +module_param_named(catas_reset_disable, catas_reset_disable, int, 0644); +MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); + +static void catas_reset(void *work_ptr) +{ + struct mthca_dev *dev, *tmpdev; + LIST_HEAD(tlist); + int ret; + + mutex_lock(&mthca_device_mutex); + + spin_lock_irq(&catas_lock); + list_splice_init(&catas_list, &tlist); + spin_unlock_irq(&catas_lock); + + list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + ret = __mthca_restart_one(dev->pdev); + if (ret) + mthca_err(dev, "Reset failed (%d)\n", ret); + else + mthca_dbg(dev, "Reset succeeded\n"); + } + + mutex_unlock(&mthca_device_mutex); +} + static void handle_catas(struct mthca_dev *dev) { struct ib_event event; + unsigned long flags; const char *type; int i; @@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev) for (i = 0; i < dev->catas_err.size; ++i) mthca_err(dev, " buf[%02x]: %08x\n", i, swab32(readl(dev->catas_err.map + i))); + + if (catas_reset_disable) + return; + + spin_lock_irqsave(&catas_lock, flags); + list_add(&dev->catas_err.list, &catas_list); + queue_work(catas_wq, &catas_work); + spin_unlock_irqrestore(&catas_lock, flags); } static void poll_catas(unsigned long dev_ptr) @@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev) dev->catas_err.timer.data = (unsigned long) dev; dev->catas_err.timer.function = poll_catas; dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + INIT_LIST_HEAD(&dev->catas_err.list); add_timer(&dev->catas_err.timer); } @@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) dev->catas_err.addr), dev->catas_err.size * 4); } + + spin_lock_irq(&catas_lock); + list_del(&dev->catas_err.list); + spin_unlock_irq(&catas_lock); +} + +int __init mthca_catas_init(void) +{ + INIT_WORK(&catas_work, catas_reset, NULL); + + catas_wq = create_singlethread_workqueue("mthca_catas"); + if (!catas_wq) + return -ENOMEM; + + return 0; +} + +void mthca_catas_cleanup(void) +{ + destroy_workqueue(catas_wq); } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 33bd0b8bfd13..fe5cecf70fed 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -283,8 +284,11 @@ struct mthca_catas_err { unsigned long stop; u32 size; struct timer_list timer; + struct list_head list; }; +extern struct mutex mthca_device_mutex; + struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev); void mthca_start_catas_poll(struct mthca_dev *dev); void mthca_stop_catas_poll(struct mthca_dev *dev); +int __mthca_restart_one(struct pci_dev *pdev); +int mthca_catas_init(void); +void mthca_catas_cleanup(void); int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 7b82c1907f04..47ea02148368 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -80,6 +80,8 @@ static int tune_pci = 0; module_param(tune_pci, int, 0444); MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); +struct mutex mthca_device_mutex; + static const char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -978,28 +980,15 @@ static struct { MTHCA_FLAG_SINAI_OPT } }; -static int __devinit mthca_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int __mthca_init_one(struct pci_dev *pdev, int hca_type) { - static int mthca_version_printed = 0; int ddr_hidden = 0; int err; struct mthca_dev *mdev; - if (!mthca_version_printed) { - printk(KERN_INFO "%s", mthca_version); - ++mthca_version_printed; - } - printk(KERN_INFO PFX "Initializing %s\n", pci_name(pdev)); - if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { - printk(KERN_ERR PFX "%s has invalid driver data %lx\n", - pci_name(pdev), id->driver_data); - return -ENODEV; - } - err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " @@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, mdev->pdev = pdev; - mdev->mthca_flags = mthca_hca_table[id->driver_data].flags; + mdev->mthca_flags = mthca_hca_table[hca_type].flags; if (ddr_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; @@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, if (err) goto err_cmd; - if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { + if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), - (int) (mthca_hca_table[id->driver_data].latest_fw >> 32), - (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff, - (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff)); + (int) (mthca_hca_table[hca_type].latest_fw >> 32), + (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff, + (int) (mthca_hca_table[hca_type].latest_fw & 0xffff)); mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); } @@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, goto err_unregister; pci_set_drvdata(pdev, mdev); + mdev->hca_type = hca_type; return 0; @@ -1166,7 +1156,7 @@ err_disable_pdev: return err; } -static void __devexit mthca_remove_one(struct pci_dev *pdev) +static void __mthca_remove_one(struct pci_dev *pdev) { struct mthca_dev *mdev = pci_get_drvdata(pdev); u8 status; @@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) } } +int __mthca_restart_one(struct pci_dev *pdev) +{ + struct mthca_dev *mdev; + + mdev = pci_get_drvdata(pdev); + if (!mdev) + return -ENODEV; + __mthca_remove_one(pdev); + return __mthca_init_one(pdev, mdev->hca_type); +} + +static int __devinit mthca_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + static int mthca_version_printed = 0; + int ret; + + mutex_lock(&mthca_device_mutex); + + if (!mthca_version_printed) { + printk(KERN_INFO "%s", mthca_version); + ++mthca_version_printed; + } + + if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { + printk(KERN_ERR PFX "%s has invalid driver data %lx\n", + pci_name(pdev), id->driver_data); + mutex_unlock(&mthca_device_mutex); + return -ENODEV; + } + + ret = __mthca_init_one(pdev, id->driver_data); + + mutex_unlock(&mthca_device_mutex); + + return ret; +} + +static void __devexit mthca_remove_one(struct pci_dev *pdev) +{ + mutex_lock(&mthca_device_mutex); + __mthca_remove_one(pdev); + mutex_unlock(&mthca_device_mutex); +} + static struct pci_device_id mthca_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, @@ -1248,13 +1283,24 @@ static int __init mthca_init(void) { int ret; + mutex_init(&mthca_device_mutex); + ret = mthca_catas_init(); + if (ret) + return ret; + ret = pci_register_driver(&mthca_driver); - return ret < 0 ? ret : 0; + if (ret < 0) { + mthca_catas_cleanup(); + return ret; + } + + return 0; } static void __exit mthca_cleanup(void) { pci_unregister_driver(&mthca_driver); + mthca_catas_cleanup(); } module_init(mthca_init); From 951f7fc1372da3d826b1d975b3cc5e3db92af5d0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 22 Sep 2006 15:22:54 -0700 Subject: [PATCH 0786/1063] RDMA/cma: Document rdma_accept() error handling Document the reject sending and modifying QP to error done in rdma_accept(). Signed-off-by: Or Gerlitz Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/rdma_cm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 1566be568ab6..deb5a0a4cee5 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -245,6 +245,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog); * Typically, this routine is only called by the listener to accept a connection * request. It must also be called on the active side of a connection if the * user is performing their own QP transitions. + * + * In the case of error, a reject message is sent to the remote side and the + * state of the qp associated with the id is modified to error, such that any + * previously posted receive buffers would be flushed. */ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); From d35cc330a2058a32410ef42784b8d3b942f37b8b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:22:55 -0700 Subject: [PATCH 0787/1063] IB/mthca: Simplify calls to mthca_cq_clean() If a QP has separate send and receive CQs, then the send CQ will never have receive completions from that QP in it. So when cleaning the send CQ, there's no need to pass in an SRQ pointer, even if the QP is attached to an SRQ. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9324b6204ac5..5e5c58b9920b 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -845,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, - qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL); mthca_wq_reset(&qp->sq); qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); From 5755d6dad95808a24a65dd9e61e23c305f9b077c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:22:55 -0700 Subject: [PATCH 0788/1063] IB/iser: INFINIBAND_ISER depends on INET iSER won't build without CONFIG_INET enabled, so make Kconfig reflect that. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index fead87d1eff9..365a1b5f19e0 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_ISER tristate "ISCSI RDMA Protocol" - depends on INFINIBAND && SCSI + depends on INFINIBAND && SCSI && INET select SCSI_ISCSI_ATTRS ---help--- Support for the ISCSI RDMA Protocol over InfiniBand. This From aec79fcc3ea3b536a2788b4e22b7ebabbb176485 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 18 Sep 2006 22:17:08 +0300 Subject: [PATCH 0789/1063] IB/sa: fix ib_sa_selector names Relevant SA queries are actually "greater than" / "less than", not "greater than or equal" / "less than or equal" as the names imply. (See IB spec 1.2 Vol 1, 15.2.5.16 PATHRECORD/Table 205 PathRecord) Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- include/rdma/ib_sa.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 58bb5f716fe3..97715b0c20b6 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -83,8 +83,8 @@ enum { }; enum ib_sa_selector { - IB_SA_GTE = 0, - IB_SA_LTE = 1, + IB_SA_GT = 0, + IB_SA_LT = 1, IB_SA_EQ = 2, /* * The meaning of "best" depends on the attribute: for From d0df6d6d4539241179a1ef5394787825bf05bbce Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 22 Sep 2006 15:22:56 -0700 Subject: [PATCH 0790/1063] IPoIB: Create MCGs with all attributes required by RFC RFC 4391 ("Transmission of IP over InfiniBand (IPoIB)") says: If the IB multicast group does not already exist, one must be created first with the IPoIB link MTU. The MGID MUST use the same P_Key, Q_Key, SL, MTU, and HopLimit as those used in the broadcast-GID. The rest of attributes SHOULD follow the values used in the broadcast-GID as well. However, the current IPoIB driver is only setting the attributes required by the InfiniBand spec to create a multicast group, so in particular the MTU and HopLimit are not being set. Add these attributes when creating MCGs, and also set the Rate attribute, since IPoIB pays attention to that attribute as well. Signed-off-by: Roland Dreier --- .../infiniband/ulp/ipoib/ipoib_multicast.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index fb3e4875a46d..3faa1820f0e9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -472,15 +472,25 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, if (create) { comp_mask |= - IB_SA_MCMEMBER_REC_QKEY | - IB_SA_MCMEMBER_REC_SL | - IB_SA_MCMEMBER_REC_FLOW_LABEL | - IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + IB_SA_MCMEMBER_REC_QKEY | + IB_SA_MCMEMBER_REC_MTU_SELECTOR | + IB_SA_MCMEMBER_REC_MTU | + IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | + IB_SA_MCMEMBER_REC_RATE_SELECTOR | + IB_SA_MCMEMBER_REC_RATE | + IB_SA_MCMEMBER_REC_SL | + IB_SA_MCMEMBER_REC_FLOW_LABEL | + IB_SA_MCMEMBER_REC_HOP_LIMIT; rec.qkey = priv->broadcast->mcmember.qkey; + rec.mtu_selector = IB_SA_EQ; + rec.mtu = priv->broadcast->mcmember.mtu; + rec.traffic_class = priv->broadcast->mcmember.traffic_class; + rec.rate_selector = IB_SA_EQ; + rec.rate = priv->broadcast->mcmember.rate; rec.sl = priv->broadcast->mcmember.sl; rec.flow_label = priv->broadcast->mcmember.flow_label; - rec.traffic_class = priv->broadcast->mcmember.traffic_class; + rec.hop_limit = priv->broadcast->mcmember.hop_limit; } init_completion(&mcast->done); From 5ccd025553d73e523212ee0860b7f4a75e886bfa Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 22 Sep 2006 15:22:56 -0700 Subject: [PATCH 0791/1063] IPoIB: Rejoin all multicast groups after a port event When ipoib_ib_dev_flush() is called because of a port event, the driver needs to rejoin all multicast groups, since the flush will call ipoib_mcast_dev_flush() (via ipoib_ib_dev_down()). Otherwise no (non-broadcast) multicast groups will be rejoined until the networking core calls ->set_multicast_list again, and so multicast reception will be broken for potentially a long time. Signed-off-by: Eli Cohen Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 722177ea069b..240befdf90dc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -631,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev) * The device could have been brought down between the start and when * we get here, don't bring it back up if it's not configured up */ - if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { ipoib_ib_dev_up(dev); + ipoib_mcast_restart_task(dev); + } mutex_lock(&priv->vlan_mutex); From 507c33504686e733a14ef0b2dc9db0c20fae4653 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Thu, 21 Sep 2006 18:26:43 +0300 Subject: [PATCH 0792/1063] IPoIB: Remove unused include of vmalloc.h IPoIB doesn't use anything from , so don't include it. Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ae3a4982cddb..867d62742054 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -40,7 +40,6 @@ #include #include -#include #include #include /* For ARPHRD_xxx */ From a8bfca024326560d86c6323b0504288ca55a75fc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 22 Sep 2006 15:22:58 -0700 Subject: [PATCH 0793/1063] IPoIB: Add some likely/unlikely annotations in hot path Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 240befdf90dc..f426a69d9a43 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -332,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; dma_addr_t addr; - if (skb->len > dev->mtu + INFINIBAND_ALEN) { + if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", skb->len, dev->mtu + INFINIBAND_ALEN); ++priv->stats.tx_dropped; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 867d62742054..1eaf00e9862c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -620,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (!spin_trylock_irqsave(&priv->tx_lock, flags)) + if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) return NETDEV_TX_LOCKED; /* @@ -633,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (skb->dst && skb->dst->neighbour) { + if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); goto out; From 9cd330d36b32ed48d49561b165842db20bd153cc Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Fri, 22 Sep 2006 15:22:58 -0700 Subject: [PATCH 0794/1063] IB: Fix typo in kerneldoc for ib_set_client_data() Signed-off-by: Krishna Kumar Signed-off-by: Roland Dreier --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d978fbe97535..63d2a39fb82c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) EXPORT_SYMBOL(ib_get_client_data); /** - * ib_set_client_data - Get IB client context + * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set From ddad65df0048e210c93640b59b3bad12701febb6 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 22 Sep 2006 19:15:23 -0400 Subject: [PATCH 0795/1063] [CPUFREQ] Fix some more CPU hotplug locking. Lukewarm IQ detected in hotplug locking BUG: warning at kernel/cpu.c:38/lock_cpu_hotplug() [] lock_cpu_hotplug+0x42/0x65 [] cpufreq_update_policy+0x25/0xad [] kprobe_flush_task+0x18/0x40 [] schedule+0x63f/0x68b [] __link_module+0x0/0x1f [] __cond_resched+0x16/0x34 [] cond_resched+0x26/0x31 [] wait_for_completion+0x17/0xb1 [] cpufreq_stat_cpu_callback+0x13/0x20 [cpufreq_stats] [] cpufreq_stats_init+0x74/0x8b [cpufreq_stats] [] sys_init_module+0x91/0x174 [] sysenter_past_esp+0x56/0x79 As there are other places that call cpufreq_update_policy without the hotplug lock, it seems better to keep the hotplug locking at the lower level for the time being until this is revamped. Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq_stats.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 25eee5394201..c2ecc599dc5f 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -350,12 +350,10 @@ __init cpufreq_stats_init(void) } register_hotcpu_notifier(&cpufreq_stat_cpu_notifier); - lock_cpu_hotplug(); for_each_online_cpu(cpu) { cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE, (void *)(long)cpu); } - unlock_cpu_hotplug(); return 0; } static void From 24669f7d00d387799fc6a39452ab22d7f078f043 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 12 Sep 2006 18:55:53 -0700 Subject: [PATCH 0796/1063] [CPUFREQ] sw_any_bug_dmi_table can be used on resume, so it isn't initdata sw_any_bug_dmi_table can be used on resume, so it isn't initdata. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index dba6bb28d298..7a9325349e94 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -393,7 +393,7 @@ static int __init sw_any_bug_found(struct dmi_system_id *d) } -static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { +static struct dmi_system_id sw_any_bug_dmi_table[] = { { .callback = sw_any_bug_found, .ident = "Supermicro Server X6DLP", From a83fbf635992442edf6aa3252e4008d4a08edf12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 00:10:18 +0100 Subject: [PATCH 0797/1063] [PATCH] fix missing ifdefs in syscall classes hookup for generic targets several targets have no ....at() family and m32r calls its only chown variant chown32(), with __NR_chown being undefined. creat(2) is also absent in some targets. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/asm-generic/audit_change_attr.h | 4 ++++ include/asm-generic/audit_dir_write.h | 4 ++++ lib/audit.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h index cb05bf69745a..50764550a60c 100644 --- a/include/asm-generic/audit_change_attr.h +++ b/include/asm-generic/audit_change_attr.h @@ -1,16 +1,20 @@ __NR_chmod, __NR_fchmod, +#ifdef __NR_chown __NR_chown, __NR_fchown, __NR_lchown, +#endif __NR_setxattr, __NR_lsetxattr, __NR_fsetxattr, __NR_removexattr, __NR_lremovexattr, __NR_fremovexattr, +#ifdef __NR_fchownat __NR_fchownat, __NR_fchmodat, +#endif #ifdef __NR_chown32 __NR_chown32, __NR_fchown32, diff --git a/include/asm-generic/audit_dir_write.h b/include/asm-generic/audit_dir_write.h index 161a7a58fbab..6621bd82cbe8 100644 --- a/include/asm-generic/audit_dir_write.h +++ b/include/asm-generic/audit_dir_write.h @@ -1,14 +1,18 @@ __NR_rename, __NR_mkdir, __NR_rmdir, +#ifdef __NR_creat __NR_creat, +#endif __NR_link, __NR_unlink, __NR_symlink, __NR_mknod, +#ifdef __NR_mkdirat __NR_mkdirat, __NR_mknodat, __NR_unlinkat, __NR_renameat, __NR_linkat, __NR_symlinkat, +#endif diff --git a/lib/audit.c b/lib/audit.c index 8c21625ef938..3b1289fadf06 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -28,8 +28,10 @@ int audit_classify_syscall(int abi, unsigned syscall) switch(syscall) { case __NR_open: return 2; +#ifdef __NR_openat case __NR_openat: return 3; +#endif #ifdef __NR_socketcall case __NR_socketcall: return 4; From cc9bd99e9adfa4f44ea050a63fb41a3f764acf84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:18:41 +0100 Subject: [PATCH 0798/1063] [PATCH] fix ancient breakage in ebus_init() Back when pci_dev had base_address[], loop of form base = &...->base_address[0]; for (.....) { ... *base++ = addr; } was fine, but when that array got spread in ->resource[...].start replacing the initialization with base = &...->resource[0].start; was not a sufficient modification. IOW this code got broken for cases when there had been more than one resource to fill. All way back in 2.3.41-pre3... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/sparc/kernel/ebus.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index 81c0cbd96ff0..75ac24d229b1 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c @@ -277,7 +277,7 @@ void __init ebus_init(void) struct pci_dev *pdev; struct pcidev_cookie *cookie; struct device_node *dp; - unsigned long addr, *base; + struct resource *p; unsigned short pci_command; int len, reg, nreg; int num_ebus = 0; @@ -321,13 +321,12 @@ void __init ebus_init(void) } nreg = len / sizeof(struct linux_prom_pci_registers); - base = &ebus->self->resource[0].start; + p = &ebus->self->resource[0]; for (reg = 0; reg < nreg; reg++) { if (!(regs[reg].which_io & 0x03000000)) continue; - addr = regs[reg].phys_lo; - *base++ = addr; + (p++)->start = regs[reg].phys_lo; } ebus->ofdev.node = dp; From 634965f5cfda1763f51e7916cfa49265b70e2a8d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:20:31 +0100 Subject: [PATCH 0799/1063] [PATCH] memcpy_fromio() missing in istallion memcpy() from iomem is a bad thing... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/char/istallion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 84dfc4278139..8c09997cc3d6 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -3488,7 +3488,7 @@ static int stli_initecp(stlibrd_t *brdp) */ EBRDENABLE(brdp); sigsp = (cdkecpsig_t __iomem *) EBRDGETMEMPTR(brdp, CDK_SIGADDR); - memcpy(&sig, sigsp, sizeof(cdkecpsig_t)); + memcpy_fromio(&sig, sigsp, sizeof(cdkecpsig_t)); EBRDDISABLE(brdp); if (sig.magic != cpu_to_le32(ECP_MAGIC)) From 55ae922323c90fdcb733c13ccf0da2ee72763913 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:24:25 +0100 Subject: [PATCH 0800/1063] [PATCH] aoa is pmac-only Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- sound/aoa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/aoa/Kconfig b/sound/aoa/Kconfig index 2f4334d19ccd..5d5813cec4c8 100644 --- a/sound/aoa/Kconfig +++ b/sound/aoa/Kconfig @@ -1,5 +1,5 @@ menu "Apple Onboard Audio driver" - depends on SND!=n && PPC + depends on SND!=n && PPC_PMAC config SND_AOA tristate "Apple Onboard Audio driver" From 00ddaf20b0049c65ddd0c2b1cbed16c7a433e47c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:22:46 +0100 Subject: [PATCH 0801/1063] [PATCH] sanitize frv archclean Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/frv/Makefile | 5 +---- arch/frv/boot/Makefile | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/frv/Makefile b/arch/frv/Makefile index d163747d17c0..038e3a8457e0 100644 --- a/arch/frv/Makefile +++ b/arch/frv/Makefile @@ -108,11 +108,8 @@ Image: vmlinux bootstrap: $(Q)$(MAKEBOOT) bootstrap -archmrproper: - $(Q)$(MAKE) $(build)=arch/frv/boot mrproper - archclean: - $(Q)$(MAKE) $(build)=arch/frv/boot clean + $(Q)$(MAKE) $(clean)=arch/frv/boot archdep: scripts/mkdep symlinks $(Q)$(MAKE) $(build)=arch/frv/boot dep diff --git a/arch/frv/boot/Makefile b/arch/frv/boot/Makefile index 5dfc93fd945a..dc6f03824423 100644 --- a/arch/frv/boot/Makefile +++ b/arch/frv/boot/Makefile @@ -8,6 +8,8 @@ # Copyright (C) 1995-2000 Russell King # +targets := Image zImage bootpImage + SYSTEM =$(TOPDIR)/$(LINUX) ZTEXTADDR = 0x02080000 @@ -66,7 +68,6 @@ zinstall: $(CONFIGURE) zImage # miscellany # mrproper clean: - $(RM) Image zImage bootpImage # @$(MAKE) -C compressed clean # @$(MAKE) -C bootp clean From a07562e03a3f4a1276931e3fb3cb532622a6c616 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:25:18 +0100 Subject: [PATCH 0802/1063] [PATCH] asm/backlight.h is ppc-only Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/macintosh/adbhid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c index c69d23bb255e..efd51e01c06e 100644 --- a/drivers/macintosh/adbhid.c +++ b/drivers/macintosh/adbhid.c @@ -45,8 +45,8 @@ #include #include -#include #ifdef CONFIG_PPC_PMAC +#include #include #endif From 5932ef077716e3e798eaba6738ef874849f62a17 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:26:02 +0100 Subject: [PATCH 0803/1063] [PATCH] sun4: fix sbus_setup_iommu() iommu_init() and iounit_init() are never called for sun4, but that's not enough - these calls should be ifdefed out since the functions in question simply do not exist for CONFIG_SUN4 kernel. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/sparc/kernel/ioport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 8654b446ac9e..d33f8a07ccac 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -508,6 +508,7 @@ void __init sbus_arch_bus_ranges_init(struct device_node *pn, struct sbus_bus *s void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp) { +#ifndef CONFIG_SUN4 struct device_node *parent = dp->parent; if (sparc_cpu_model != sun4d && @@ -524,6 +525,7 @@ void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp) iounit_init(dp->node, parent->node, sbus); } +#endif } void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp) From 956295d50dc5462722f029de64d44a7ecba54e69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:27:30 +0100 Subject: [PATCH 0804/1063] [PATCH] fix the survivors of fbcon_vbl_handler() renaming In |Author: James Simmons |Date: Thu Mar 13 22:37:08 2003 -0800 | | [FBCON] Cursor handling clean up. I nuked several static variables. we have -static void fbcon_vbl_handler(int irq, void *dummy, struct pt_regs *fp) +static void fb_vbl_handler(int irq, void *dev_id, struct pt_regs *fp) and 3 years later a couple of instances missed back then still remains there. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/video/console/fbcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 390439b3d899..1b4f75d1f8a9 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -3197,11 +3197,11 @@ static void fbcon_exit(void) return; #ifdef CONFIG_ATARI - free_irq(IRQ_AUTO_4, fbcon_vbl_handler); + free_irq(IRQ_AUTO_4, fb_vbl_handler); #endif #ifdef CONFIG_MAC if (MACH_IS_MAC && vbl_detected) - free_irq(IRQ_MAC_VBL, fbcon_vbl_handler); + free_irq(IRQ_MAC_VBL, fb_vbl_handler); #endif kfree((void *)softback_buf); From c03efdb202a4882f426ce49766859af4058c9b8a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 01:29:34 +0100 Subject: [PATCH 0805/1063] [PATCH] fallout from hcd-core patch missing le16_to_cpu() Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/usb/input/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index a2c56b2de589..3305fb6079eb 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1818,7 +1818,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf) int n, len, insize = 0; /* Ignore all Wacom devices */ - if (dev->descriptor.idVendor == USB_VENDOR_ID_WACOM) + if (le16_to_cpu(dev->descriptor.idVendor) == USB_VENDOR_ID_WACOM) return NULL; for (n = 0; hid_blacklist[n].idVendor; n++) From 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: [PATCH 0806/1063] NFS: Add a new ACCESS rpc call cache to the linux nfs client The current access cache only allows one entry at a time to be cached for each inode. Add a per-inode red-black tree in order to allow more than one to be cached at a time. Should significantly cut down the time spent in path traversal for shared directories such as ${PATH}, /usr/share, etc. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 137 +++++++++++++++++++++++++++++++++++------ fs/nfs/inode.c | 13 ++-- include/linux/nfs_fs.h | 5 +- 3 files changed, 126 insertions(+), 29 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e7ffb4deb3e5..094afded2b11 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1638,35 +1638,134 @@ out: return error; } +static void nfs_access_free_entry(struct nfs_access_entry *entry) +{ + put_rpccred(entry->cred); + kfree(entry); +} + +static void __nfs_access_zap_cache(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct rb_root *root_node = &nfsi->access_cache; + struct rb_node *n, *dispose = NULL; + struct nfs_access_entry *entry; + + /* Unhook entries from the cache */ + while ((n = rb_first(root_node)) != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + rb_erase(n, root_node); + n->rb_left = dispose; + dispose = n; + } + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); + + /* Now kill them all! */ + while (dispose != NULL) { + n = dispose; + dispose = n->rb_left; + nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node)); + } +} + +void nfs_access_zap_cache(struct inode *inode) +{ + spin_lock(&inode->i_lock); + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); +} + +static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) +{ + struct rb_node *n = NFS_I(inode)->access_cache.rb_node; + struct nfs_access_entry *entry; + + while (n != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + + if (cred < entry->cred) + n = n->rb_left; + else if (cred > entry->cred) + n = n->rb_right; + else + return entry; + } + return NULL; +} + int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; + struct nfs_access_entry *cache; + int err = -ENOENT; - if (cache->cred != cred - || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) - return -ENOENT; - memcpy(res, cache, sizeof(*res)); - return 0; + spin_lock(&inode->i_lock); + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out_zap; + cache = nfs_access_search_rbtree(inode, cred); + if (cache == NULL) + goto out; + if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + goto out_stale; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + spin_unlock(&inode->i_lock); + return err; +out_stale: + rb_erase(&cache->rb_node, &nfsi->access_cache); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(cache); + return -ENOENT; +out_zap: + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); + return -ENOENT; +} + +static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +{ + struct rb_root *root_node = &NFS_I(inode)->access_cache; + struct rb_node **p = &root_node->rb_node; + struct rb_node *parent = NULL; + struct nfs_access_entry *entry; + + spin_lock(&inode->i_lock); + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct nfs_access_entry, rb_node); + + if (set->cred < entry->cred) + p = &parent->rb_left; + else if (set->cred > entry->cred) + p = &parent->rb_right; + else + goto found; + } + rb_link_node(&set->rb_node, parent, p); + rb_insert_color(&set->rb_node, root_node); + spin_unlock(&inode->i_lock); + return; +found: + rb_replace_node(parent, &set->rb_node, root_node); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(entry); } void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; - - if (cache->cred != set->cred) { - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = get_rpccred(set->cred); - } - /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ - spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; - spin_unlock(&inode->i_lock); + struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (cache == NULL) + return; + RB_CLEAR_NODE(&cache->rb_node); cache->jiffies = set->jiffies; + cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + + nfs_access_add_rbtree(inode, cache); } static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d349fb2245da..b94ab060bb1e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync) void nfs_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred; - /* * The following should never happen... */ BUG_ON(nfs_have_writebacks(inode)); - BUG_ON (!list_empty(&nfsi->open_files)); + BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); - cred = nfsi->cache_access.cred; - if (cred) - put_rpccred(cred); - BUG_ON(atomic_read(&nfsi->data_updates) != 0); + nfs_access_zap_cache(inode); } /** @@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - nfsi->cache_access.cred = NULL; + nfsi->access_cache = RB_ROOT; unlock_new_inode(inode); } else diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c2066caeaab..cc013ed2e52e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,7 @@ * NFSv3/v4 Access mode cache entry */ struct nfs_access_entry { + struct rb_node rb_node; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -145,7 +147,7 @@ struct nfs_inode { */ atomic_t data_updates; - struct nfs_access_entry cache_access; + struct rb_root access_cache; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -297,6 +299,7 @@ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); From cfcea3e8c66c2dcde98d5c2693d4bff50b5cac97 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:18 -0400 Subject: [PATCH 0807/1063] NFS: Add a global LRU list for the ACCESS cache ...in order to allow the addition of a memory shrinker. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 35 ++++++++++++++++++++++++++++++++++- fs/nfs/inode.c | 2 ++ include/linux/nfs_fs.h | 4 ++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 094afded2b11..bf4f5ffda703 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1638,10 +1638,17 @@ out: return error; } +static DEFINE_SPINLOCK(nfs_access_lru_lock); +static LIST_HEAD(nfs_access_lru_list); +static atomic_long_t nfs_access_nr_entries; + static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); kfree(entry); + smp_mb__before_atomic_dec(); + atomic_long_dec(&nfs_access_nr_entries); + smp_mb__after_atomic_dec(); } static void __nfs_access_zap_cache(struct inode *inode) @@ -1655,6 +1662,7 @@ static void __nfs_access_zap_cache(struct inode *inode) while ((n = rb_first(root_node)) != NULL) { entry = rb_entry(n, struct nfs_access_entry, rb_node); rb_erase(n, root_node); + list_del(&entry->lru); n->rb_left = dispose; dispose = n; } @@ -1671,6 +1679,13 @@ static void __nfs_access_zap_cache(struct inode *inode) void nfs_access_zap_cache(struct inode *inode) { + /* Remove from global LRU init */ + if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + spin_lock(&nfs_access_lru_lock); + list_del_init(&NFS_I(inode)->access_cache_inode_lru); + spin_unlock(&nfs_access_lru_lock); + } + spin_lock(&inode->i_lock); /* This will release the spinlock */ __nfs_access_zap_cache(inode); @@ -1711,12 +1726,14 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: spin_unlock(&inode->i_lock); return err; out_stale: rb_erase(&cache->rb_node, &nfsi->access_cache); + list_del(&cache->lru); spin_unlock(&inode->i_lock); nfs_access_free_entry(cache); return -ENOENT; @@ -1728,7 +1745,8 @@ out_zap: static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { - struct rb_root *root_node = &NFS_I(inode)->access_cache; + struct nfs_inode *nfsi = NFS_I(inode); + struct rb_root *root_node = &nfsi->access_cache; struct rb_node **p = &root_node->rb_node; struct rb_node *parent = NULL; struct nfs_access_entry *entry; @@ -1747,10 +1765,13 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * } rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); spin_unlock(&inode->i_lock); return; found: rb_replace_node(parent, &set->rb_node, root_node); + list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); + list_del(&entry->lru); spin_unlock(&inode->i_lock); nfs_access_free_entry(entry); } @@ -1766,6 +1787,18 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) cache->mask = set->mask; nfs_access_add_rbtree(inode, cache); + + /* Update accounting */ + smp_mb__before_atomic_inc(); + atomic_long_inc(&nfs_access_nr_entries); + smp_mb__after_atomic_inc(); + + /* Add inode to global LRU list */ + if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) { + spin_lock(&nfs_access_lru_lock); + list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); + spin_unlock(&nfs_access_lru_lock); + } } static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b94ab060bb1e..6ed018c9aad2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1104,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cc013ed2e52e..a36e01cd6321 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -71,6 +71,7 @@ */ struct nfs_access_entry { struct rb_node rb_node; + struct list_head lru; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -148,6 +149,8 @@ struct nfs_inode { atomic_t data_updates; struct rb_root access_cache; + struct list_head access_cache_entry_lru; + struct list_head access_cache_inode_lru; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -201,6 +204,7 @@ struct nfs_inode { #define NFS_INO_REVALIDATING (0) /* revalidating attrs */ #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (2) /* possible stale inode */ +#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ static inline struct nfs_inode *NFS_I(struct inode *inode) { From 979df72e6f963b42ee484f2eca049c3344da0ba7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jul 2006 11:28:19 -0400 Subject: [PATCH 0808/1063] NFS: Add an ACCESS cache memory shrinker A pinned inode may in theory end up filling memory with cached ACCESS calls. This patch ensures that the VM may shrink away the cache in these particular cases. The shrinker works by iterating through the list of inodes on the global nfs_access_lru_list, and removing the least recently used access cache entry until it is done (or until the entire cache is empty). Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 3 +++ fs/nfs/super.c | 5 +++++ 3 files changed, 52 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bf4f5ffda703..067d144d141b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1651,6 +1651,50 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry) smp_mb__after_atomic_dec(); } +int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +{ + LIST_HEAD(head); + struct nfs_inode *nfsi; + struct nfs_access_entry *cache; + + spin_lock(&nfs_access_lru_lock); +restart: + list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + struct inode *inode; + + if (nr_to_scan-- == 0) + break; + inode = igrab(&nfsi->vfs_inode); + if (inode == NULL) + continue; + spin_lock(&inode->i_lock); + if (list_empty(&nfsi->access_cache_entry_lru)) + goto remove_lru_entry; + cache = list_entry(nfsi->access_cache_entry_lru.next, + struct nfs_access_entry, lru); + list_move(&cache->lru, &head); + rb_erase(&cache->rb_node, &nfsi->access_cache); + if (!list_empty(&nfsi->access_cache_entry_lru)) + list_move_tail(&nfsi->access_cache_inode_lru, + &nfs_access_lru_list); + else { +remove_lru_entry: + list_del_init(&nfsi->access_cache_inode_lru); + clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); + } + spin_unlock(&inode->i_lock); + iput(inode); + goto restart; + } + spin_unlock(&nfs_access_lru_lock); + while (!list_empty(&head)) { + cache = list_entry(head.next, struct nfs_access_entry, lru); + list_del(&cache->lru); + nfs_access_free_entry(cache); + } + return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; +} + static void __nfs_access_zap_cache(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e4f4e5def0fc..660e9ff5341c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -66,6 +66,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, struct page *page); #endif +/* dir.c */ +extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); + /* inode.c */ extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e8a9bee74d9d..06c321beacfe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -221,6 +221,8 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, &nfs_idmap_cache_timeout, 0644); #endif +static struct shrinker *acl_shrinker; + /* * Register the NFS filesystems */ @@ -240,6 +242,7 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_2; #endif + acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker); return 0; #ifdef CONFIG_NFS_V4 @@ -257,6 +260,8 @@ error_0: */ void __exit unregister_nfs_fs(void) { + if (acl_shrinker != NULL) + remove_shrinker(acl_shrinker); #ifdef CONFIG_NFS_V4 unregister_filesystem(&nfs4_fs_type); nfs_unregister_sysctl(); From 770bfad846ab6628444428467b11fa6773ae9ea1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:07 -0400 Subject: [PATCH 0809/1063] NFS: Add dentry materialisation op The attached patch adds a new directory cache management function that prepares a disconnected anonymous function to be connected into the dentry tree. The anonymous dentry is transferred the name and parentage from another dentry. The following changes were made in [try #2]: (*) d_materialise_dentry() now switches the parentage of the two nodes around correctly when one or other of them is self-referential. The following changes were made in [try #7]: (*) d_instantiate_unique() has had the interior part split out as function __d_instantiate_unique(). Callers of this latter function must be holding the appropriate locks. (*) _d_rehash() has been added as a wrapper around __d_rehash() to call it with the most obvious hash list (the one from the name). d_rehash() now calls _d_rehash(). (*) d_materialise_dentry() is now __d_materialise_dentry() and is static. (*) d_materialise_unique() added to perform the combination of d_find_alias(), d_materialise_dentry() and d_add_unique() that the NFS client was doing twice, all within a single dcache_lock critical section. This reduces the number of times two different spinlocks were being accessed. The following further changes were made: (*) Add the dentries onto their parents d_subdirs lists. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/dcache.c | 164 +++++++++++++++++++++++++++++++++++++---- include/linux/dcache.h | 1 + 2 files changed, 151 insertions(+), 14 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 1b4a3a34ec57..17b392a2049e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode) * (or otherwise set) by the caller to indicate that it is now * in use by the dcache. */ -struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +static struct dentry *__d_instantiate_unique(struct dentry *entry, + struct inode *inode) { struct dentry *alias; int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - BUG_ON(!list_empty(&entry->d_alias)); - spin_lock(&dcache_lock); - if (!inode) - goto do_negative; + if (!inode) { + entry->d_inode = NULL; + return NULL; + } + list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct qstr *qstr = &alias->d_name; @@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) if (memcmp(qstr->name, name, len)) continue; dget_locked(alias); - spin_unlock(&dcache_lock); - BUG_ON(!d_unhashed(alias)); - iput(inode); return alias; } + list_add(&entry->d_alias, &inode->i_dentry); -do_negative: entry->d_inode = inode; fsnotify_d_instantiate(entry, inode); - spin_unlock(&dcache_lock); - security_d_instantiate(entry, inode); return NULL; } + +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *result; + + BUG_ON(!list_empty(&entry->d_alias)); + + spin_lock(&dcache_lock); + result = __d_instantiate_unique(entry, inode); + spin_unlock(&dcache_lock); + + if (!result) { + security_d_instantiate(entry, inode); + return NULL; + } + + BUG_ON(!d_unhashed(result)); + iput(inode); + return result; +} + EXPORT_SYMBOL(d_instantiate_unique); /** @@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list) hlist_add_head_rcu(&entry->d_hash, list); } +static void _d_rehash(struct dentry * entry) +{ + __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); +} + /** * d_rehash - add an entry back to the hash * @entry: dentry to add to the hash @@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list) void d_rehash(struct dentry * entry) { - struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); - spin_lock(&dcache_lock); spin_lock(&entry->d_lock); - __d_rehash(entry, list); + _d_rehash(entry); spin_unlock(&entry->d_lock); spin_unlock(&dcache_lock); } @@ -1386,6 +1407,120 @@ already_unhashed: spin_unlock(&dcache_lock); } +/* + * Prepare an anonymous dentry for life in the superblock's dentry tree as a + * named dentry in place of the dentry to be replaced. + */ +static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) +{ + struct dentry *dparent, *aparent; + + switch_names(dentry, anon); + do_switch(dentry->d_name.len, anon->d_name.len); + do_switch(dentry->d_name.hash, anon->d_name.hash); + + dparent = dentry->d_parent; + aparent = anon->d_parent; + + dentry->d_parent = (aparent == anon) ? dentry : aparent; + list_del(&dentry->d_u.d_child); + if (!IS_ROOT(dentry)) + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&dentry->d_u.d_child); + + anon->d_parent = (dparent == dentry) ? anon : dparent; + list_del(&anon->d_u.d_child); + if (!IS_ROOT(anon)) + list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); + else + INIT_LIST_HEAD(&anon->d_u.d_child); + + anon->d_flags &= ~DCACHE_DISCONNECTED; +} + +/** + * d_materialise_unique - introduce an inode into the tree + * @dentry: candidate dentry + * @inode: inode to bind to the dentry, to which aliases may be attached + * + * Introduces an dentry into the tree, substituting an extant disconnected + * root directory alias in its place if there is one + */ +struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) +{ + struct dentry *alias, *actual; + + BUG_ON(!d_unhashed(dentry)); + + spin_lock(&dcache_lock); + + if (!inode) { + actual = dentry; + dentry->d_inode = NULL; + goto found_lock; + } + + /* See if a disconnected directory already exists as an anonymous root + * that we should splice into the tree instead */ + if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { + spin_lock(&alias->d_lock); + + /* Is this a mountpoint that we could splice into our tree? */ + if (IS_ROOT(alias)) + goto connect_mountpoint; + + if (alias->d_name.len == dentry->d_name.len && + alias->d_parent == dentry->d_parent && + memcmp(alias->d_name.name, + dentry->d_name.name, + dentry->d_name.len) == 0) + goto replace_with_alias; + + spin_unlock(&alias->d_lock); + + /* Doh! Seem to be aliasing directories for some reason... */ + dput(alias); + } + + /* Add a unique reference */ + actual = __d_instantiate_unique(dentry, inode); + if (!actual) + actual = dentry; + else if (unlikely(!d_unhashed(actual))) + goto shouldnt_be_hashed; + +found_lock: + spin_lock(&actual->d_lock); +found: + _d_rehash(actual); + spin_unlock(&actual->d_lock); + spin_unlock(&dcache_lock); + + if (actual == dentry) { + security_d_instantiate(dentry, inode); + return NULL; + } + + iput(inode); + return actual; + + /* Convert the anonymous/root alias into an ordinary dentry */ +connect_mountpoint: + __d_materialise_dentry(dentry, alias); + + /* Replace the candidate dentry with the alias in the tree */ +replace_with_alias: + __d_drop(alias); + actual = alias; + goto found; + +shouldnt_be_hashed: + spin_unlock(&dcache_lock); + BUG(); + goto shouldnt_be_hashed; +} + /** * d_path - return the path of a dentry * @dentry: dentry to report @@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_invalidate); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_move); +EXPORT_SYMBOL_GPL(d_materialise_unique); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 471781ffeab1..44605be59409 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -221,6 +221,7 @@ static inline int dname_external(struct dentry *dentry) */ extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); +extern struct dentry * d_materialise_unique(struct dentry *, struct inode *); extern void d_delete(struct dentry *); /* allocate/de-allocate */ From 7d4e2747a0412583526a162fbbd6edeeafcceb08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:07 -0400 Subject: [PATCH 0810/1063] NFS: Fix up split of fs/nfs/inode.c Fix ups for the splitting of the superblock stuff out of fs/nfs/inode.c, including: (*) Move the callback tcpport module param into callback.c. (*) Move the idmap cache timeout module param into idmap.c. (*) Changes to internal.h: (*) namespace-nfs4.c was renamed to nfs4namespace.c. (*) nfs_stat_to_errno() is in nfs2xdr.c, not nfs4xdr.c. (*) nfs4xdr.c is contingent on CONFIG_NFS_V4. (*) nfs4_path() is only uses if CONFIG_NFS_V4 is set. Plus also: (*) The sec_flavours[] table should really be const. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 15 +++++++++++++++ fs/nfs/idmap.c | 14 ++++++++++++++ fs/nfs/internal.h | 12 ++++++------ fs/nfs/super.c | 40 ++++------------------------------------ 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index fe0a6b8ac149..d6c4bae14bb9 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -36,6 +36,21 @@ static struct svc_program nfs4_callback_program; unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; + +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); /* * This is the callback kernel thread. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 07a5dd57646e..873deb96a6cd 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -57,6 +57,20 @@ /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600 * HZ; +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + struct idmap_hashent { unsigned long ih_expires; __u32 ih_id; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 660e9ff5341c..4802157963f8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -15,7 +15,7 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; -/* namespace-nfs4.c */ +/* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); #else @@ -46,6 +46,7 @@ extern void nfs_destroy_directcache(void); #endif /* nfs2xdr.c */ +extern int nfs_stat_to_errno(int); extern struct rpc_procinfo nfs_procedures[]; extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); @@ -54,8 +55,9 @@ extern struct rpc_procinfo nfs3_procedures[]; extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); /* nfs4xdr.c */ -extern int nfs_stat_to_errno(int); +#ifdef CONFIG_NFS_V4 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); +#endif /* nfs4proc.c */ #ifdef CONFIG_NFS_V4 @@ -97,15 +99,13 @@ extern char *nfs_path(const char *base, const struct dentry *dentry, /* * Determine the mount path as a string */ +#ifdef CONFIG_NFS_V4 static inline char * nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) { -#ifdef CONFIG_NFS_V4 return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); -#else - return NULL; -#endif } +#endif /* * Determine the device name as a string diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 06c321beacfe..63497345806b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -187,40 +187,6 @@ static struct super_operations nfs4_sops = { }; #endif -#ifdef CONFIG_NFS_V4 -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; - -static int param_set_port(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) - return -EINVAL; - *((int *)kp->arg) = num; - return 0; -} - -module_param_call(callback_tcpport, param_set_port, param_get_int, - &nfs_callback_set_tcpport, 0644); -#endif - -#ifdef CONFIG_NFS_V4 -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); -#endif - static struct shrinker *acl_shrinker; /* @@ -328,9 +294,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) } +/* + * Map the security flavour number to a name + */ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) { - static struct { + static const struct { rpc_authflavor_t flavour; const char *str; } sec_flavours[] = { @@ -1368,7 +1337,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type, } s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; From 0a8ea4372b2868842986118ca90912f3382e6c5a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:08 -0400 Subject: [PATCH 0811/1063] NFS: Disambiguate nfs_stat_to_errno() Rename the NFS4 version of nfs_stat_to_errno() so that it doesn't conflict with the common one used by NFS2 and NFS3. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 730ec8fb31c6..1dee6ef7e5a9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -58,7 +58,7 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -static int nfs_stat_to_errno(int); +static int nfs4_stat_to_errno(int); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -2127,7 +2127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) } READ32(nfserr); if (nfserr != NFS_OK) - return -nfs_stat_to_errno(nfserr); + return -nfs4_stat_to_errno(nfserr); return 0; } @@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) READ_BUF(len); return -NFSERR_CLID_INUSE; } else - return -nfs_stat_to_errno(nfserr); + return -nfs4_stat_to_errno(nfserr); return 0; } @@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, if (!status) status = decode_setclientid(&xdr, clp); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs_stat_to_errno(hdr.status); + status = -nfs4_stat_to_errno(hdr.status); return status; } @@ -4521,7 +4521,7 @@ static struct { * This one is used jointly by NFSv2 and NFSv3. */ static int -nfs_stat_to_errno(int stat) +nfs4_stat_to_errno(int stat) { int i; for (i = 0; nfs_errtbl[i].stat != -1; i++) { From 5ae1fbce142b67bf59e15fb1af96e88a96abde7b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:08 -0400 Subject: [PATCH 0812/1063] NFS: Fix NFS4 callback up/down prototypes Make the nfs_callback_up()/down() prototypes just do nothing if NFS4 is not enabled. Also make the down function void type since we can't really do anything if it fails. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 5 +---- fs/nfs/callback.h | 7 ++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index d6c4bae14bb9..b1f7dc415392 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -149,10 +149,8 @@ out_err: /* * Kill the server process if it is not already up. */ -int nfs_callback_down(void) +void nfs_callback_down(void) { - int ret = 0; - lock_kernel(); mutex_lock(&nfs_callback_mutex); nfs_callback_info.users--; @@ -164,7 +162,6 @@ int nfs_callback_down(void) } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); mutex_unlock(&nfs_callback_mutex); unlock_kernel(); - return ret; } static int nfs_callback_authenticate(struct svc_rqst *rqstp) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b252e7fe53a5..5676163d26e8 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -62,8 +62,13 @@ struct cb_recallargs { extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); +#ifdef CONFIG_NFS_V4 extern int nfs_callback_up(void); -extern int nfs_callback_down(void); +extern void nfs_callback_down(void); +#else +#define nfs_callback_up() (0) +#define nfs_callback_down() do {} while(0) +#endif extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; From adfa6f980bd46974e6b32b22dd0c45e3f52063f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:08 -0400 Subject: [PATCH 0813/1063] NFS: Rename struct nfs4_client to struct nfs_client Rename struct nfs4_client to struct nfs_client so that it can become the basis for a general client record for NFS2 and NFS3 in addition to NFS4. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 +- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/delegation.c | 24 +++++++++---------- fs/nfs/delegation.h | 10 ++++---- fs/nfs/idmap.c | 12 +++++----- fs/nfs/nfs4_fs.h | 30 +++++++++++------------ fs/nfs/nfs4proc.c | 32 ++++++++++++------------- fs/nfs/nfs4renewd.c | 8 +++---- fs/nfs/nfs4state.c | 50 +++++++++++++++++++-------------------- fs/nfs/nfs4xdr.c | 18 +++++++------- fs/nfs/super.c | 4 ++-- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_idmap.h | 14 +++++------ 13 files changed, 105 insertions(+), 105 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b1f7dc415392..1b596b6d9dc2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -167,7 +167,7 @@ void nfs_callback_down(void) static int nfs_callback_authenticate(struct svc_rqst *rqstp) { struct in_addr *addr = &rqstp->rq_addr.sin_addr; - struct nfs4_client *clp; + struct nfs_client *clp; /* Don't talk to strangers */ clp = nfs4_find_client(addr); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7719483ecdfc..55d6e2ec157f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -15,7 +15,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) { - struct nfs4_client *clp; + struct nfs_client *clp; struct nfs_delegation *delegation; struct nfs_inode *nfsi; struct inode *inode; @@ -56,7 +56,7 @@ out: unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) { - struct nfs4_client *clp; + struct nfs_client *clp; struct inode *inode; unsigned res; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9540a316c05e..5a1105c258bd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int status = 0; @@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode) */ int __nfs_inode_return_delegation(struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; @@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode) */ void nfs_return_all_delegations(struct super_block *sb) { - struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_client *clp = NFS_SB(sb)->nfs4_state; struct nfs_delegation *delegation; struct inode *inode; @@ -232,7 +232,7 @@ restart: int nfs_do_expire_all_delegations(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs_delegation *delegation; struct inode *inode; @@ -258,7 +258,7 @@ out: module_put_and_exit(0); } -void nfs_expire_all_delegations(struct nfs4_client *clp) +void nfs_expire_all_delegations(struct nfs_client *clp) { struct task_struct *task; @@ -276,7 +276,7 @@ void nfs_expire_all_delegations(struct nfs4_client *clp) /* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ -void nfs_handle_cb_pathdown(struct nfs4_client *clp) +void nfs_handle_cb_pathdown(struct nfs_client *clp) { struct nfs_delegation *delegation; struct inode *inode; @@ -299,7 +299,7 @@ restart: struct recall_threadargs { struct inode *inode; - struct nfs4_client *clp; + struct nfs_client *clp; const nfs4_stateid *stateid; struct completion started; @@ -310,7 +310,7 @@ static int recall_thread(void *data) { struct recall_threadargs *args = (struct recall_threadargs *)data; struct inode *inode = igrab(args->inode); - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -371,7 +371,7 @@ out_module_put: /* * Retrieve the inode associated with a delegation */ -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; struct inode *res = NULL; @@ -389,7 +389,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf /* * Mark all delegations as needing to be reclaimed */ -void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +void nfs_delegation_mark_reclaim(struct nfs_client *clp) { struct nfs_delegation *delegation; spin_lock(&clp->cl_lock); @@ -401,7 +401,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp) /* * Reap all unclaimed delegations after reboot recovery is done */ -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { struct nfs_delegation *delegation, *n; LIST_HEAD(head); @@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3858694652fa..2cfd4b24c7fe 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); -struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); -void nfs_expire_all_delegations(struct nfs4_client *clp); -void nfs_handle_cb_pathdown(struct nfs4_client *clp); +void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_handle_cb_pathdown(struct nfs_client *clp); -void nfs_delegation_mark_reclaim(struct nfs4_client *clp); -void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); +void nfs_delegation_mark_reclaim(struct nfs_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 873deb96a6cd..d05148ec9414 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -109,7 +109,7 @@ static struct rpc_pipe_ops idmap_upcall_ops = { }; void -nfs_idmap_new(struct nfs4_client *clp) +nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; @@ -138,7 +138,7 @@ nfs_idmap_new(struct nfs4_client *clp) } void -nfs_idmap_delete(struct nfs4_client *clp) +nfs_idmap_delete(struct nfs_client *clp) { struct idmap *idmap = clp->cl_idmap; @@ -491,27 +491,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) return (hash); } -int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); } -int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) { struct idmap *idmap = clp->cl_idmap; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9a102860df37..4e334cb48498 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -43,9 +43,9 @@ enum nfs4_client_state { }; /* - * The nfs4_client identifies our client state to the server. + * The nfs_client identifies our client state to the server. */ -struct nfs4_client { +struct nfs_client { struct list_head cl_servers; /* Global list of servers */ struct in_addr cl_addr; /* Server identifier */ u64 cl_clientid; /* constant */ @@ -127,7 +127,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status struct nfs4_state_owner { spinlock_t so_lock; struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; + struct nfs_client *so_client; u32 so_id; /* 32-bit identifier, unique */ atomic_t so_count; @@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); +extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); +extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -231,19 +231,19 @@ extern const u32 nfs4_fsinfo_bitmap[2]; extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(void *); /* nfs4state.c */ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); +extern struct nfs_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs_client *clp); +extern struct nfs_client *nfs4_find_client(struct in_addr *); +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); @@ -252,7 +252,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b14145b7b87f..168f3ffb059f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) @@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -792,7 +792,7 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs4_schedule_state_recovery(clp); @@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; @@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs4_opendata *opendata; int status; @@ -2521,7 +2521,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) */ static void nfs4_renew_done(struct rpc_task *task, void *data) { - struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp; unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { @@ -2543,7 +2543,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_call_done = nfs4_renew_done, }; -int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2555,7 +2555,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) &nfs4_renew_ops, (void *)jiffies); } -int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (!clp || task->tk_status >= 0) return 0; @@ -2828,7 +2828,7 @@ static int nfs4_wait_bit_interruptible(void *word) return 0; } -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp) { sigset_t oldset; int res; @@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) */ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; int ret = errorcode; exception->retry = 0; @@ -2898,7 +2898,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct return nfs4_map_errors(ret); } -int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) +int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { @@ -2945,7 +2945,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p return status; } -static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs_fsinfo fsinfo; struct rpc_message msg = { @@ -2969,7 +2969,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr return status; } -int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) +int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) { long timeout; int err; @@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs_lockt_args arg = { .fh = NFS_FH(inode), .fl = request, @@ -3513,7 +3513,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; unsigned char fl_flags = request->fl_flags; int status; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 5d764d8e6d8a..208764069f61 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -61,7 +61,7 @@ void nfs4_renew_state(void *data) { - struct nfs4_client *clp = (struct nfs4_client *)data; + struct nfs_client *clp = (struct nfs_client *)data; struct rpc_cred *cred; long lease, timeout; unsigned long last, now; @@ -108,7 +108,7 @@ out: /* Must be called with clp->cl_sem locked for writes */ void -nfs4_schedule_state_renewal(struct nfs4_client *clp) +nfs4_schedule_state_renewal(struct nfs_client *clp) { long timeout; @@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; if (!clp) return; @@ -140,7 +140,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server) /* Must be called with clp->cl_sem locked for writes */ void -nfs4_kill_renewd(struct nfs4_client *clp) +nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); if (!list_empty(&clp->cl_superblocks)) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 090a36b07a22..c0b6439f1f71 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -83,10 +83,10 @@ destroy_nfsv4_state(struct nfs_server *server) * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs4_client * +static struct nfs_client * nfs4_alloc_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; if (nfs_callback_up() < 0) return NULL; @@ -111,7 +111,7 @@ nfs4_alloc_client(struct in_addr *addr) } static void -nfs4_free_client(struct nfs4_client *clp) +nfs4_free_client(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -130,9 +130,9 @@ nfs4_free_client(struct nfs4_client *clp) nfs_callback_down(); } -static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) +static struct nfs_client *__nfs4_find_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { atomic_inc(&clp->cl_count); @@ -142,19 +142,19 @@ static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) return NULL; } -struct nfs4_client *nfs4_find_client(struct in_addr *addr) +struct nfs_client *nfs4_find_client(struct in_addr *addr) { - struct nfs4_client *clp; + struct nfs_client *clp; spin_lock(&state_spinlock); clp = __nfs4_find_client(addr); spin_unlock(&state_spinlock); return clp; } -struct nfs4_client * +struct nfs_client * nfs4_get_client(struct in_addr *addr) { - struct nfs4_client *clp, *new = NULL; + struct nfs_client *clp, *new = NULL; spin_lock(&state_spinlock); for (;;) { @@ -180,7 +180,7 @@ nfs4_get_client(struct in_addr *addr) } void -nfs4_put_client(struct nfs4_client *clp) +nfs4_put_client(struct nfs_client *clp) { if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) return; @@ -192,7 +192,7 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } -static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) +static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport, cred); @@ -204,13 +204,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred) } u32 -nfs4_alloc_lockowner_id(struct nfs4_client *clp) +nfs4_alloc_lockowner_id(struct nfs_client *clp) { return clp->cl_lockowner_id ++; } static struct nfs4_state_owner * -nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp = NULL; @@ -224,7 +224,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) return sp; } -struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rpc_cred *cred = NULL; @@ -238,7 +238,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) return cred; } -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -251,7 +251,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) } static struct nfs4_state_owner * -nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_state_owner *sp, *res = NULL; @@ -294,7 +294,7 @@ nfs4_alloc_state_owner(void) void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; spin_lock(&clp->cl_lock); list_del_init(&sp->so_list); spin_unlock(&clp->cl_lock); @@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct nfs4_state_owner *sp, *new; get_rpccred(cred); @@ -337,7 +337,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { - struct nfs4_client *clp = sp->so_client; + struct nfs_client *clp = sp->so_client; struct rpc_cred *cred = sp->so_cred; if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) @@ -540,7 +540,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; - struct nfs4_client *clp = state->owner->so_client; + struct nfs_client *clp = state->owner->so_client; lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) @@ -752,7 +752,7 @@ out: static int reclaimer(void *); -static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) +static inline void nfs4_clear_recover_bit(struct nfs_client *clp) { smp_mb__before_clear_bit(); clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); @@ -764,7 +764,7 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) /* * State recovery routine */ -static void nfs4_recover_state(struct nfs4_client *clp) +static void nfs4_recover_state(struct nfs_client *clp) { struct task_struct *task; @@ -782,7 +782,7 @@ static void nfs4_recover_state(struct nfs4_client *clp) /* * Schedule a state recovery attempt */ -void nfs4_schedule_state_recovery(struct nfs4_client *clp) +void nfs4_schedule_state_recovery(struct nfs_client *clp) { if (!clp) return; @@ -879,7 +879,7 @@ out_err: return status; } -static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +static void nfs4_state_mark_reclaim(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct nfs4_state *state; @@ -903,7 +903,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp) static int reclaimer(void *ptr) { - struct nfs4_client *clp = ptr; + struct nfs_client *clp = ptr; struct nfs4_state_owner *sp; struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1dee6ef7e5a9..04748ab9ed55 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con return 0; } -static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) +static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) { uint32_t *p; @@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien return 0; } -static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) +static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) { uint32_t *p; @@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str /* * a RENEW request */ -static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf /* * a SETCLIENTID_CONFIRM request */ -static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -2132,7 +2132,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) } /* Dummy routine */ -static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp) { uint32_t *p; unsigned int strlen; @@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t return 0; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) { uint32_t len, *p; @@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf return 0; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) { uint32_t len, *p; @@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) return 0; } -static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) +static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) { uint32_t *p; uint32_t opnum; @@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) * a SETCLIENTID request */ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, - struct nfs4_client *clp) + struct nfs_client *clp) { struct xdr_stream xdr; struct compound_hdr hdr; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 63497345806b..d03ede5b1aca 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1099,7 +1099,7 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type, static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) { - struct nfs4_client *clp; + struct nfs_client *clp; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int err = -EIO; @@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry) static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { const struct dentry *dentry = data->dentry; - struct nfs4_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs4_state; struct super_block *sb; server->fsid = data->fattr->fsid; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6b4a13c79474..4db90df2aed0 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 102e56094296..678fe68982ef 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -62,15 +62,15 @@ struct idmap_msg { #ifdef __KERNEL__ /* Forward declaration to make this header independent of others */ -struct nfs4_client; +struct nfs_client; -void nfs_idmap_new(struct nfs4_client *); -void nfs_idmap_delete(struct nfs4_client *); +void nfs_idmap_new(struct nfs_client *); +void nfs_idmap_delete(struct nfs_client *); -int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); -int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); +int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs_client *, __u32, char *); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ From 7539bbab8062aadc1db95a22b377146843cfa88f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: [PATCH 0814/1063] NFS: Rename nfs_server::nfs4_state Rename nfs_server::nfs4_state to nfs_client as it will be used to represent the client state for NFS2 and NFS3 also. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 12 ++++++------ fs/nfs/nfs4proc.c | 26 +++++++++++++------------- fs/nfs/nfs4renewd.c | 2 +- fs/nfs/nfs4state.c | 10 +++++----- fs/nfs/nfs4xdr.c | 10 +++++----- fs/nfs/super.c | 6 +++--- include/linux/nfs_fs_sb.h | 2 +- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5a1105c258bd..cfe239736ac0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ case -NFS4ERR_EXPIRED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); + nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client); goto out_err; } } @@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st */ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int status = 0; @@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode) */ int __nfs_inode_return_delegation(struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; @@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode) */ void nfs_return_all_delegations(struct super_block *sb) { - struct nfs_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_client *clp = NFS_SB(sb)->nfs_client; struct nfs_delegation *delegation; struct inode *inode; @@ -310,7 +310,7 @@ static int recall_thread(void *data) { struct recall_threadargs *args = (struct recall_threadargs *)data; struct inode *inode = igrab(args->inode); - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int res = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 168f3ffb059f..b46597fc81e1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; @@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags, - p->o_arg.clientid = server->nfs4_state->cl_clientid; + p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; @@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); return err; } err = nfs4_handle_exception(server, err, &exception); @@ -792,7 +792,7 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) nfs4_schedule_state_recovery(clp); @@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; @@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; int status; @@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); break; default: if (nfs4_async_handle_error(task, server) == -EAGAIN) { @@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (!clp || task->tk_status >= 0) return 0; @@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) */ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; int ret = errorcode; exception->retry = 0; @@ -3077,7 +3077,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); + nfs4_schedule_state_recovery(server->nfs_client); case 0: return 0; } @@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs_lockt_args arg = { .fh = NFS_FH(inode), .fl = request, @@ -3231,7 +3231,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->server->nfs4_state); + nfs4_schedule_state_recovery(calldata->server->nfs_client); break; default: if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { @@ -3343,7 +3343,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, if (p->arg.lock_seqid == NULL) goto out_free; p->arg.lock_stateid = &lsp->ls_stateid; - p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; + p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id; p->lsp = lsp; atomic_inc(&lsp->ls_count); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 208764069f61..ff947ecb8b81 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; if (!clp) return; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c0b6439f1f71..fa51a7d4c022 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -61,7 +61,7 @@ static LIST_HEAD(nfs4_clientid_list); void init_nfsv4_state(struct nfs_server *server) { - server->nfs4_state = NULL; + server->nfs_client = NULL; INIT_LIST_HEAD(&server->nfs4_siblings); } @@ -70,9 +70,9 @@ destroy_nfsv4_state(struct nfs_server *server) { kfree(server->mnt_path); server->mnt_path = NULL; - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; + if (server->nfs_client) { + nfs4_put_client(server->nfs_client); + server->nfs_client = NULL; } } @@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; get_rpccred(cred); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 04748ab9ed55..99926067eca4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); + owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); + owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons fattr->mode |= fmode; if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) goto xdr_error; - if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) + if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0) goto xdr_error; - if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) + if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0) goto xdr_error; if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) goto xdr_error; @@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) if (decode_space_limit(xdr, &res->maxsize) < 0) return -EIO; } - return decode_ace(xdr, NULL, res->server->nfs4_state); + return decode_ace(xdr, NULL, res->server->nfs_client); } static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d03ede5b1aca..ab4c78ee840c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1141,7 +1141,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) - server->nfs4_state = clp; + server->nfs_client = clp; up_write(&clp->cl_sem); clp = NULL; @@ -1151,7 +1151,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return clnt; } - if (server->nfs4_state->cl_idmap == NULL) { + if (server->nfs_client->cl_idmap == NULL) { dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return ERR_PTR(-ENOMEM); } @@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry) static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { const struct dentry *dentry = data->dentry; - struct nfs_client *clp = server->nfs4_state; + struct nfs_client *clp = server->nfs_client; struct super_block *sb; server->fsid = data->fattr->fsid; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4db90df2aed0..fc20d6b934fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -43,7 +43,7 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs4_state; /* all NFSv4 state starts here */ + struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ From b7162792b5c0e0f6e91b8997f8e6bbc76ec5420a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: [PATCH 0815/1063] NFS: Return an error when starting the idmapping pipe Return an error when starting the idmapping pipe so that we can detect it failing. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 12 ++++++++---- fs/nfs/super.c | 3 ++- include/linux/nfs_idmap.h | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index d05148ec9414..231c20ffc0ff 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -108,15 +108,17 @@ static struct rpc_pipe_ops idmap_upcall_ops = { .destroy_msg = idmap_pipe_destroy_msg, }; -void +int nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; + int error; if (clp->cl_idmap != NULL) - return; + return 0; + if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return; + return -ENOMEM; snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), "%s/idmap", clp->cl_rpcclient->cl_pathname); @@ -124,8 +126,9 @@ nfs_idmap_new(struct nfs_client *clp) idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { + error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); - return; + return error; } mutex_init(&idmap->idmap_lock); @@ -135,6 +138,7 @@ nfs_idmap_new(struct nfs_client *clp) idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; clp->cl_idmap = idmap; + return 0; } void diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ab4c78ee840c..3ee85c4e65d8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1136,7 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, clnt->cl_softrtry = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); + if (nfs_idmap_new(clp) < 0) + goto out_fail; } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 678fe68982ef..15a9f3b7289a 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -64,7 +64,7 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; -void nfs_idmap_new(struct nfs_client *); +int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); From 2b3de4411b3ccaeb00018c99d1bbe7203554cf7f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:09 -0400 Subject: [PATCH 0816/1063] NFS: Add a lookupfh NFS RPC op Add a lookup filehandle NFS RPC op so that a file handle can be looked up without requiring dentries and inodes and other VFS stuff when doing an NFS4 pathwalk during mounting. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 47 +++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 3 +++ 2 files changed, 50 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b46597fc81e1..de2006f754ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1583,6 +1583,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, return status; } +static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + struct nfs4_lookup_arg args = { + .bitmask = server->attr_bitmask, + .dir_fh = dirfh, + .name = name, + }; + struct nfs4_lookup_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + nfs_fattr_init(fattr); + + dprintk("NFS call lookupfh %s\n", name->name); + status = rpc_call_sync(server->client, &msg, 0); + dprintk("NFS reply lookupfh: %d\n", status); + if (status == -NFS4ERR_MOVED) + status = -EREMOTE; + return status; +} + +static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_lookupfh(server, dirfh, name, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { @@ -3723,6 +3769,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, + .lookupfh = nfs4_proc_lookupfh, .lookup = nfs4_proc_lookup, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 41e5a19199e9..26879771831d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -770,6 +770,9 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*lookupfh)(struct nfs_server *, struct nfs_fh *, + struct qstr *, struct nfs_fh *, + struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, From e9326dcab413848e70ab746c7c5363da13e5f801 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: [PATCH 0817/1063] NFS: Add a server capabilities NFS RPC op Add a set_capabilities NFS RPC op so that the server capabilities can be set. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index de2006f754ef..850f0851023a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3790,6 +3790,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .statfs = nfs4_proc_statfs, .fsinfo = nfs4_proc_fsinfo, .pathconf = nfs4_proc_pathconf, + .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, .read_done = nfs4_read_done, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 26879771831d..dd9ae6761f71 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -809,6 +809,7 @@ struct nfs_rpc_ops { struct nfs_fsinfo *); int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); + int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); int (*read_done) (struct rpc_task *, struct nfs_read_data *); From 24c8dbbb5f777187d660393599641ab3307b4b97 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: [PATCH 0818/1063] NFS: Generalise the nfs_client structure Generalise the nfs_client structure by: (1) Moving nfs_client to a more general place (nfs_fs_sb.h). (2) Renaming its maintenance routines to be non-NFS4 specific. (3) Move those maintenance routines to a new non-NFS4 specific file (client.c) and move the declarations to internal.h. (4) Make nfs_find/get_client() take a full sockaddr_in to include the port number (will be required for NFS2/3). (5) Make nfs_find/get_client() take the NFS protocol version (again will be required to differentiate NFS2, 3 & 4 client records). Also: (6) Make nfs_client construction proceed akin to inodes, marking them as under construction and providing a function to indicate completion. (7) Make nfs_get_client() wait interruptibly if it finds a client that it can share, but that client is currently being constructed. (8) Make nfs4_create_client() use (6) and (7) instead of locking cl_sem. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 6 +- fs/nfs/callback.c | 9 +- fs/nfs/callback_proc.c | 9 +- fs/nfs/client.c | 312 ++++++++++++++++++++++++++++++++++++++ fs/nfs/delegation.c | 9 +- fs/nfs/internal.h | 6 + fs/nfs/nfs4_fs.h | 52 ------- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 128 +--------------- fs/nfs/super.c | 53 +++---- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 60 ++++++++ 12 files changed, 425 insertions(+), 222 deletions(-) create mode 100644 fs/nfs/client.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b572a0c1967..3b993a6f8163 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,9 +4,9 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o \ - namespace.o +nfs-y := client.o dir.o file.o inode.o super.o nfs2xdr.o \ + pagelist.o proc.o read.o symlink.o unlink.o \ + write.o namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 1b596b6d9dc2..a3ee11364db0 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -19,6 +19,7 @@ #include "nfs4_fs.h" #include "callback.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -166,15 +167,15 @@ void nfs_callback_down(void) static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct sockaddr_in *addr = &rqstp->rq_addr; struct nfs_client *clp; /* Don't talk to strangers */ - clp = nfs4_find_client(addr); + clp = nfs_find_client(addr, 4); if (clp == NULL) return SVC_DROP; - dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); - nfs4_put_client(clp); + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr)); + nfs_put_client(clp); switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 55d6e2ec157f..97cf8f71451f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -10,6 +10,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -22,7 +23,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -48,7 +49,7 @@ out_iput: up_read(&nfsi->rwsem); iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); return res->status; @@ -61,7 +62,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) unsigned res; res = htonl(NFS4ERR_BADHANDLE); - clp = nfs4_find_client(&args->addr->sin_addr); + clp = nfs_find_client(args->addr, 4); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) } iput(inode); out_putclient: - nfs4_put_client(clp); + nfs_put_client(clp); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); return res; diff --git a/fs/nfs/client.c b/fs/nfs/client.c new file mode 100644 index 000000000000..cb5e92463bdb --- /dev/null +++ b/fs/nfs/client.c @@ -0,0 +1,312 @@ +/* client.c: NFS client sharing and management code + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +static DEFINE_SPINLOCK(nfs_client_lock); +static LIST_HEAD(nfs_client_list); +static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); + +/* + * Allocate a shared client record + * + * Since these are allocated/deallocated very rarely, we don't + * bother putting them in a slab cache... + */ +static struct nfs_client *nfs_alloc_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp; + int error; + + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) + goto error_0; + + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); + goto error_1; + } + + if (nfsversion == 4) { + if (nfs_callback_up() < 0) + goto error_2; + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + + atomic_set(&clp->cl_count, 1); + clp->cl_cons_state = NFS_CS_INITING; + + clp->cl_nfsversion = nfsversion; + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + + if (hostname) { + clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (!clp->cl_hostname) + goto error_3; + } + + INIT_LIST_HEAD(&clp->cl_superblocks); + clp->cl_rpcclient = ERR_PTR(-EINVAL); + +#ifdef CONFIG_NFS_V4 + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_boot_time = CURRENT_TIME; + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; +#endif + + return clp; + +error_3: + nfs_callback_down(); + __clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state); +error_2: + rpciod_down(); + __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); +error_1: + kfree(clp); +error_0: + return NULL; +} + +/* + * Destroy a shared client record + */ +static void nfs_free_client(struct nfs_client *clp) +{ + dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); + +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) { + while (!list_empty(&clp->cl_unused)) { + struct nfs4_state_owner *sp; + + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + nfs_idmap_delete(clp); + } +#endif + + /* -EIO all pending I/O */ + if (!IS_ERR(clp->cl_rpcclient)) + rpc_shutdown_client(clp->cl_rpcclient); + + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(); + + if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) + rpciod_down(); + + kfree(clp->cl_hostname); + kfree(clp); + + dprintk("<-- nfs_free_client()\n"); +} + +/* + * Release a reference to a shared client record + */ +void nfs_put_client(struct nfs_client *clp) +{ + dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + + if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + list_del(&clp->cl_share_link); + spin_unlock(&nfs_client_lock); + + BUG_ON(!list_empty(&clp->cl_superblocks)); + + nfs_free_client(clp); + } +} + +/* + * Find a client by address + * - caller must hold nfs_client_lock + */ +static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + /* Different NFS versions cannot share the same nfs_client */ + if (clp->cl_nfsversion != nfsversion) + continue; + + if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr, + sizeof(clp->cl_addr.sin_addr)) != 0) + continue; + + if (clp->cl_addr.sin_port == addr->sin_port) + goto found; + } + + return NULL; + +found: + atomic_inc(&clp->cl_count); + return clp; +} + +/* + * Find a client by IP address and protocol version + * - returns NULL if no such client + */ +struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +{ + struct nfs_client *clp; + + spin_lock(&nfs_client_lock); + clp = __nfs_find_client(addr, nfsversion); + spin_unlock(&nfs_client_lock); + + BUG_ON(clp->cl_cons_state == 0); + + return clp; +} + +/* + * Look up a client by IP address and protocol version + * - creates a new record if one doesn't yet exist + */ +struct nfs_client *nfs_get_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) +{ + struct nfs_client *clp, *new = NULL; + int error; + + dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n", + hostname ?: "", NIPQUAD(addr->sin_addr), + addr->sin_port, nfsversion); + + /* see if the client already exists */ + do { + spin_lock(&nfs_client_lock); + + clp = __nfs_find_client(addr, nfsversion); + if (clp) + goto found_client; + if (new) + goto install_client; + + spin_unlock(&nfs_client_lock); + + new = nfs_alloc_client(hostname, addr, nfsversion); + } while (new); + + return ERR_PTR(-ENOMEM); + + /* install a new client and return with it unready */ +install_client: + clp = new; + list_add(&clp->cl_share_link, &nfs_client_list); + spin_unlock(&nfs_client_lock); + dprintk("--> nfs_get_client() = %p [new]\n", clp); + return clp; + + /* found an existing client + * - make sure it's ready before returning + */ +found_client: + spin_unlock(&nfs_client_lock); + + if (new) + nfs_free_client(new); + + if (clp->cl_cons_state == NFS_CS_INITING) { + DECLARE_WAITQUEUE(myself, current); + + add_wait_queue(&nfs_client_active_wq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current) || + clp->cl_cons_state > NFS_CS_READY) + break; + schedule(); + } + + remove_wait_queue(&nfs_client_active_wq, &myself); + + if (signal_pending(current)) { + nfs_put_client(clp); + return ERR_PTR(-ERESTARTSYS); + } + } + + if (clp->cl_cons_state < NFS_CS_READY) { + error = clp->cl_cons_state; + nfs_put_client(clp); + return ERR_PTR(error); + } + + dprintk("--> nfs_get_client() = %p [share]\n", clp); + return clp; +} + +/* + * Mark a server as ready or failed + */ +void nfs_mark_client_ready(struct nfs_client *clp, int state) +{ + clp->cl_cons_state = state; + wake_up_all(&nfs_client_active_wq); +} diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index cfe239736ac0..57133678db16 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -18,6 +18,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" static struct nfs_delegation *nfs_alloc_delegation(void) { @@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct sizeof(delegation->stateid)) != 0 || delegation->type != nfsi->delegation->type) { printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", - __FUNCTION__, NIPQUAD(clp->cl_addr)); + __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr)); status = -EIO; } } @@ -254,7 +255,7 @@ restart: } out: spin_unlock(&clp->cl_lock); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); } @@ -266,10 +267,10 @@ void nfs_expire_all_delegations(struct nfs_client *clp) atomic_inc(&clp->cl_count); task = kthread_run(nfs_do_expire_all_delegations, clp, "%u.%u.%u.%u-delegreturn", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4802157963f8..ac370d5d4494 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -15,6 +15,12 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; +/* client.c */ +extern void nfs_put_client(struct nfs_client *); +extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); +extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); +extern void nfs_mark_client_ready(struct nfs_client *, int); + /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4e334cb48498..e7879245361e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -42,55 +42,6 @@ enum nfs4_client_state { NFS4CLNT_LEASE_EXPIRED, }; -/* - * The nfs_client identifies our client state to the server. - */ -struct nfs_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - struct rpc_wait_queue cl_rpcwaitq; - - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; - unsigned char cl_id_uniquifier; -}; - /* * struct rpc_sequence ensures that RPC calls are sent in the exact * order that they appear on the list. @@ -239,9 +190,6 @@ extern void nfs4_renew_state(void *); /* nfs4state.c */ extern void init_nfsv4_state(struct nfs_server *); extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs_client *clp); -extern struct nfs_client *nfs4_find_client(struct in_addr *); struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 850f0851023a..803c31b88bb5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2968,7 +2968,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po for(;;) { setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", - clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr), cred->cr_ops->cr_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fa51a7d4c022..058811e39555 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -50,12 +50,12 @@ #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" +#include "internal.h" #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; -static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); void @@ -71,127 +71,11 @@ destroy_nfsv4_state(struct nfs_server *server) kfree(server->mnt_path); server->mnt_path = NULL; if (server->nfs_client) { - nfs4_put_client(server->nfs_client); + nfs_put_client(server->nfs_client); server->nfs_client = NULL; } } -/* - * nfs4_get_client(): returns an empty client structure - * nfs4_put_client(): drops reference to client structure - * - * Since these are allocated/deallocated very rarely, we don't - * bother putting them in a slab cache... - */ -static struct nfs_client * -nfs4_alloc_client(struct in_addr *addr) -{ - struct nfs_client *clp; - - if (nfs_callback_up() < 0) - return NULL; - if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { - nfs_callback_down(); - return NULL; - } - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - init_rwsem(&clp->cl_sem); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); - spin_lock_init(&clp->cl_lock); - atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); - INIT_LIST_HEAD(&clp->cl_superblocks); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_rpcclient = ERR_PTR(-EINVAL); - clp->cl_boot_time = CURRENT_TIME; - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - return clp; -} - -static void -nfs4_free_client(struct nfs_client *clp) -{ - struct nfs4_state_owner *sp; - - while (!list_empty(&clp->cl_unused)) { - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); - nfs_idmap_delete(clp); - if (!IS_ERR(clp->cl_rpcclient)) - rpc_shutdown_client(clp->cl_rpcclient); - kfree(clp); - nfs_callback_down(); -} - -static struct nfs_client *__nfs4_find_client(struct in_addr *addr) -{ - struct nfs_client *clp; - list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { - if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { - atomic_inc(&clp->cl_count); - return clp; - } - } - return NULL; -} - -struct nfs_client *nfs4_find_client(struct in_addr *addr) -{ - struct nfs_client *clp; - spin_lock(&state_spinlock); - clp = __nfs4_find_client(addr); - spin_unlock(&state_spinlock); - return clp; -} - -struct nfs_client * -nfs4_get_client(struct in_addr *addr) -{ - struct nfs_client *clp, *new = NULL; - - spin_lock(&state_spinlock); - for (;;) { - clp = __nfs4_find_client(addr); - if (clp != NULL) - break; - clp = new; - if (clp != NULL) { - list_add(&clp->cl_servers, &nfs4_clientid_list); - new = NULL; - break; - } - spin_unlock(&state_spinlock); - new = nfs4_alloc_client(addr); - spin_lock(&state_spinlock); - if (new == NULL) - break; - } - spin_unlock(&state_spinlock); - if (new) - nfs4_free_client(new); - return clp; -} - -void -nfs4_put_client(struct nfs_client *clp) -{ - if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock)) - return; - list_del(&clp->cl_servers); - spin_unlock(&state_spinlock); - BUG_ON(!list_empty(&clp->cl_superblocks)); - rpc_wake_up(&clp->cl_rpcwaitq); - nfs4_kill_renewd(clp); - nfs4_free_client(clp); -} - static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, @@ -771,11 +655,11 @@ static void nfs4_recover_state(struct nfs_client *clp) __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", - NIPQUAD(clp->cl_addr)); + NIPQUAD(clp->cl_addr.sin_addr)); if (!IS_ERR(task)) return; nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put(THIS_MODULE); } @@ -970,12 +854,12 @@ out: if (status == -NFS4ERR_CB_PATH_DOWN) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); - nfs4_put_client(clp); + nfs_put_client(clp); module_put_and_exit(0); return 0; out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", - NIPQUAD(clp->cl_addr.s_addr), -status); + NIPQUAD(clp->cl_addr.sin_addr), -status); set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3ee85c4e65d8..f97d7d9c5c32 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1104,47 +1104,46 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, struct rpc_clnt *clnt = NULL; int err = -EIO; - clp = nfs4_get_client(&server->addr.sin_addr); + clp = nfs_get_client(server->hostname, &server->addr, 4); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return ERR_PTR(err); } /* Now create transport and client */ - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { + if (clp->cl_cons_state == NFS_CS_INITING) { xprt = xprt_create_proto(proto, &server->addr, timeparms); if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); err = PTR_ERR(xprt); dprintk("%s: cannot create RPC transport. Error = %d\n", __FUNCTION__, err); - goto out_fail; + goto client_init_error; } /* Bind to a reserved port! */ xprt->resvport = 1; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, flavor); if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); err = PTR_ERR(clnt); dprintk("%s: cannot create RPC client. Error = %d\n", __FUNCTION__, err); - goto out_fail; + goto client_init_error; } clnt->cl_intr = 1; clnt->cl_softrtry = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - if (nfs_idmap_new(clp) < 0) - goto out_fail; + err = nfs_idmap_new(clp); + if (err < 0) { + dprintk("%s: failed to create idmapper.\n", + __FUNCTION__); + goto client_init_error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + nfs_mark_client_ready(clp, 0); } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs_client = clp; - up_write(&clp->cl_sem); - clp = NULL; if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1152,11 +1151,6 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return clnt; } - if (server->nfs_client->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return ERR_PTR(-ENOMEM); - } - if (clnt->cl_auth->au_flavor != flavor) { struct rpc_auth *auth; @@ -1166,11 +1160,16 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, return (struct rpc_clnt *)auth; } } + + server->nfs_client = clp; + down_write(&clp->cl_sem); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); return clnt; - out_fail: - if (clp) - nfs4_put_client(clp); +client_init_error: + nfs_mark_client_ready(clp, err); + nfs_put_client(clp); return ERR_PTR(err); } @@ -1329,14 +1328,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto out_free; } - /* Fire up rpciod if not yet running */ - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto out_free; - } - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); @@ -1383,8 +1374,6 @@ static void nfs4_kill_super(struct super_block *sb) destroy_nfsv4_state(server); - rpciod_down(); - nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a36e01cd6321..70e1dc9162e2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -586,6 +586,7 @@ extern void * nfs_root_data(void); #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 +#define NFSDBG_CLIENT 0x0200 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fc20d6b934fb..a727657e0ad3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -6,6 +6,66 @@ struct nfs_iostats; +/* + * The nfs_client identifies our client state to the server. + */ +struct nfs_client { + atomic_t cl_count; + int cl_cons_state; /* current construction state (-ve: init error) */ +#define NFS_CS_READY 0 /* ready to be used */ +#define NFS_CS_INITING 1 /* busy initialising */ + int cl_nfsversion; /* NFS protocol version */ + unsigned long cl_res_state; /* NFS resources state */ +#define NFS_CS_RPCIOD 0 /* - rpciod started */ +#define NFS_CS_CALLBACK 1 /* - callback started */ +#define NFS_CS_IDMAP 2 /* - idmap started */ + struct sockaddr_in cl_addr; /* server identifier */ + char * cl_hostname; /* hostname of server */ + struct list_head cl_share_link; /* link in global client list */ + struct list_head cl_superblocks; /* List of nfs_server structs */ + + struct rpc_clnt * cl_rpcclient; + +#ifdef CONFIG_NFS_V4 + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +#endif +}; + /* * NFS client parameters stored in the superblock. */ From 0c7d90cfed91a283228017ba6faf37ee0bcd32b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:10 -0400 Subject: [PATCH 0819/1063] NFS: Use the dentry superblock directly in nfs_statfs() Use the nominated dentry's superblock directly in the NFS statfs() op to get a file handle, rather than using s_root (which will become a dummy dentry in a future patch). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f97d7d9c5c32..a41d516ed595 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -240,11 +240,10 @@ void __exit unregister_nfs_fs(void) */ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct super_block *sb = dentry->d_sb; - struct nfs_server *server = NFS_SB(sb); + struct nfs_server *server = NFS_SB(dentry->d_sb); unsigned char blockbits; unsigned long blockres; - struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); + struct nfs_fh *fh = NFS_FH(dentry->d_inode); struct nfs_fattr fattr; struct nfs_fsstat res = { .fattr = &fattr, @@ -253,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) lock_kernel(); - error = server->rpc_ops->statfs(server, rootfh, &res); + error = server->rpc_ops->statfs(server, fh, &res); buf->f_type = NFS_SUPER_MAGIC; if (error < 0) goto out_err; @@ -263,7 +262,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) * case where f_frsize != f_bsize. Eventually we want to * report the value of wtmult in this field. */ - buf->f_frsize = sb->s_blocksize; + buf->f_frsize = dentry->d_sb->s_blocksize; /* * On most *nix systems, f_blocks, f_bfree, and f_bavail @@ -272,8 +271,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) * thus historically Linux's sys_statfs reports these * fields in units of f_bsize. */ - buf->f_bsize = sb->s_blocksize; - blockbits = sb->s_blocksize_bits; + buf->f_bsize = dentry->d_sb->s_blocksize; + blockbits = dentry->d_sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; From 509de8111656a7d89b4a1a5f430f4460ce510f0f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: [PATCH 0820/1063] NFS: Add extra const qualifiers Add some extra const qualifiers into NFS. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 3 ++- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4namespace.c | 8 ++++---- fs/nfs/nfs4proc.c | 2 +- fs/nfs/proc.c | 2 +- fs/nfs/super.c | 10 +++++----- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 6 +++--- 8 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 86b3169c8cac..85d9ed1dcf42 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -172,7 +172,8 @@ void nfs_release_automount_timer(void) /* * Clone a mountpoint of the appropriate type */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, + const char *devname, struct nfs_clone_mount *mountdata) { #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7143b1f82cea..3e5371241cea 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -886,7 +886,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); } -struct nfs_rpc_ops nfs_v3_clientops = { +const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs3_dir_inode_operations, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index ea38d27b74e6..faed9bcba50f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -23,7 +23,7 @@ /* * Check if fs_root is valid */ -static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, +static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) { char *end = buffer + buflen; @@ -34,7 +34,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, n = pathname->ncomponents; while (--n >= 0) { - struct nfs4_string *component = &pathname->components[n]; + const struct nfs4_string *component = &pathname->components[n]; buflen -= component->len + 1; if (buflen < 0) goto Elong; @@ -60,7 +60,7 @@ Elong: */ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, const struct dentry *dentry, - struct nfs4_fs_locations *locations) + const struct nfs4_fs_locations *locations) { struct vfsmount *mnt = ERR_PTR(-ENOENT); struct nfs_clone_mount mountdata = { @@ -108,7 +108,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, loc = 0; while (loc < locations->nlocations && IS_ERR(mnt)) { - struct nfs4_fs_location *location = &locations->locations[loc]; + const struct nfs4_fs_location *location = &locations->locations[loc]; char *mnt_path; if (location == NULL || location->nservers <= 0 || diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 803c31b88bb5..061be713b206 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3761,7 +3761,7 @@ static struct inode_operations nfs4_file_inode_operations = { .listxattr = nfs4_listxattr, }; -struct nfs_rpc_ops nfs_v4_clientops = { +const struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b3899ea3229e..77676903e0f5 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -671,7 +671,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) } -struct nfs_rpc_ops nfs_v2_clientops = { +const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a41d516ed595..c97f30967955 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -329,10 +329,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) */ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { - static struct proc_nfs_info { + static const struct proc_nfs_info { int flag; - char *str; - char *nostr; + const char *str; + const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, { NFS_MOUNT_INTR, ",intr", "" }, @@ -342,9 +342,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; - struct proc_nfs_info *nfs_infop; + const struct proc_nfs_info *nfs_infop; char buf[12]; - char *proto; + const char *proto; seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a727657e0ad3..95f32d5f6e9c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -73,7 +73,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index dd9ae6761f71..2426b11b6cce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -833,9 +833,9 @@ struct nfs_rpc_ops { /* * Function vectors etc. for the NFS client */ -extern struct nfs_rpc_ops nfs_v2_clientops; -extern struct nfs_rpc_ops nfs_v3_clientops; -extern struct nfs_rpc_ops nfs_v4_clientops; +extern const struct nfs_rpc_ops nfs_v2_clientops; +extern const struct nfs_rpc_ops nfs_v3_clientops; +extern const struct nfs_rpc_ops nfs_v4_clientops; extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; From 27951bd26031f6c27d38df9e94623bbe208a2464 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: [PATCH 0821/1063] NFS: Maintain a common server record for NFS2/3 as well as for NFS4 Maintain a common server record for NFS2/3 as well as for NFS4 so that common stuff can be moved there from struct nfs_server. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 21 ++++++++++++++++++++- include/linux/nfs_fs_sb.h | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c97f30967955..d1b4a5b36e33 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -658,11 +658,19 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned static struct rpc_clnt * nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) { + struct nfs_client *clp; struct rpc_timeout timeparms; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + clp = nfs_get_client(server->hostname, &server->addr, + server->rpc_ops->version); + if (!clp) { + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); + return ERR_PTR(PTR_ERR(clp)); + } + nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); server->retrans_timeo = timeparms.to_initval; @@ -673,6 +681,8 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) if (IS_ERR(xprt)) { dprintk("%s: cannot create RPC transport. Error = %ld\n", __FUNCTION__, PTR_ERR(xprt)); + nfs_mark_client_ready(clp, PTR_ERR(xprt)); + nfs_put_client(clp); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, @@ -686,9 +696,13 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = 1; clnt->cl_softrtry = 1; + nfs_mark_client_ready(clp, 0); + server->nfs_client = clp; return clnt; out_fail: + nfs_mark_client_ready(clp, PTR_ERR(xprt)); + nfs_put_client(clp); return clnt; } @@ -764,6 +778,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data, if (server == NULL) goto out_err; memcpy(server, parent, sizeof(*server)); + atomic_inc(&server->nfs_client->cl_count); hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; len = strlen(hostname) + 1; server->hostname = kmalloc(len, GFP_KERNEL); @@ -796,6 +811,7 @@ out_deactivate: out_rpciod_down: rpciod_down(); kfree(server->hostname); + nfs_put_client(server->nfs_client); kfree(server); return simple_set_mnt(mnt, sb); kill_rpciod: @@ -803,6 +819,7 @@ kill_rpciod: free_hostname: kfree(server->hostname); free_server: + nfs_put_client(server->nfs_client); kfree(server); out_err: return error; @@ -1071,6 +1088,7 @@ static void nfs_kill_super(struct super_block *s) nfs_free_iostats(server->io_stats); kfree(server->hostname); + nfs_put_client(server->nfs_client); kfree(server); nfs_release_automount_timer(); } @@ -1421,7 +1439,6 @@ static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_c nfs4_server_capabilities(server, &server->fh); down_write(&clp->cl_sem); - atomic_inc(&clp->cl_count); list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); up_write(&clp->cl_sem); return sb; @@ -1476,6 +1493,8 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf retrans = 1; nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + nfs_put_client(server->nfs_client); + server->nfs_client = NULL; server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); if (IS_ERR((err = server->client))) goto out_err; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 95f32d5f6e9c..e7d7662f51fd 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -70,6 +70,7 @@ struct nfs_client { * NFS client parameters stored in the superblock. */ struct nfs_server { + struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ @@ -103,7 +104,6 @@ struct nfs_server { */ char ip_addr[16]; char * mnt_path; - struct nfs_client * nfs_client; /* all NFSv4 state starts here */ struct list_head nfs4_siblings; /* List of other nfs_server structs * that share the same clientid */ From 1f163415dc05983830bcc47b33c155b2528b1574 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:11 -0400 Subject: [PATCH 0822/1063] NFS: Make better use of inode* dereferencing macros Make better use of inode* dereferencing macros to hide dereferencing chains (including NFS_PROTO and NFS_CLIENT). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/nfs4proc.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 48e892880d5b..a146ed338534 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); + res = NFS_PROTO(inode)->file_open(inode, filp); unlock_kernel(); return res; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 061be713b206..b731b1945270 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1268,7 +1268,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) BUG_ON(nd->intent.open.flags & O_CREAT); } - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); @@ -1291,7 +1291,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st struct rpc_cred *cred; struct nfs4_state *state; - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); @@ -1565,7 +1565,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (IS_ERR(cred)) return PTR_ERR(cred); @@ -1927,7 +1927,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct rpc_cred *cred; int status = 0; - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) { status = PTR_ERR(cred); goto out; @@ -2816,7 +2816,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); - ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (ret == 0) nfs4_write_cached_acl(inode, buf, buflen); return ret; From 8fa5c000d7f986ef9cdc6d95f9f7fcee20e0a7d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: [PATCH 0823/1063] NFS: Move rpc_ops from nfs_server to nfs_client Move the rpc_ops from the nfs_server struct to the nfs_client struct as they're common to all server records of a particular NFS protocol version. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 4 +-- fs/nfs/namespace.c | 6 ++-- fs/nfs/nfs4proc.c | 2 +- fs/nfs/super.c | 59 ++++++++++++++++++++++----------------- include/linux/nfs_fs.h | 2 +- include/linux/nfs_fs_sb.h | 2 +- 7 files changed, 43 insertions(+), 34 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 067d144d141b..19362712452f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1147,7 +1147,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, } if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->rpc_ops->getattr(server, fhandle, fattr); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) goto out_err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6ed018c9aad2..771c3b833757 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -237,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 85d9ed1dcf42..d8b8d56266cb 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -104,7 +104,9 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_follow; /* Look it up again */ parent = dget_parent(nd->dentry); - err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, + &nd->dentry->d_name, + &fh, &fattr); dput(parent); if (err != 0) goto out_err; @@ -178,7 +180,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, { #ifdef CONFIG_NFS_V4 struct vfsmount *mnt = NULL; - switch (server->rpc_ops->version) { + switch (server->nfs_client->cl_nfsversion) { case 2: case 3: mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b731b1945270..1573eeb07ce1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } nfs_confirm_seqid(&data->owner->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); + return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d1b4a5b36e33..e1e5eab0259b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -252,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) lock_kernel(); - error = server->rpc_ops->statfs(server, fh, &res); + error = server->nfs_client->rpc_ops->statfs(server, fh, &res); buf->f_type = NFS_SUPER_MAGIC; if (error < 0) goto out_err; @@ -343,10 +343,11 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; + struct nfs_client *clp = nfss->nfs_client; char buf[12]; const char *proto; - seq_printf(m, ",vers=%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", clp->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); if (nfss->acregmin != 3*HZ || showdefaults) @@ -427,7 +428,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, ",namelen=%d", nfss->namelen); #ifdef CONFIG_NFS_V4 - if (nfss->rpc_ops->version == 4) { + if (nfss->nfs_client->cl_nfsversion == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); @@ -503,7 +504,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f struct nfs_server *server = NFS_SB(sb); int error; - error = server->rpc_ops->getroot(server, rootfh, fsinfo); + error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); return ERR_PTR(error); @@ -553,14 +554,14 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) no_root_error = -ENOMEM; goto out_no_root; } - sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; /* mount time stamp, in seconds */ server->mount_time = jiffies; /* Get some general file system info */ if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) server->namelen = pathinfo.max_namelen; /* Work out a lot of parameters */ if (server->rsize == 0) @@ -663,9 +664,14 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + int nfsversion = 2; - clp = nfs_get_client(server->hostname, &server->addr, - server->rpc_ops->version); +#ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) + nfsversion = 3; +#endif + + clp = nfs_get_client(server->hostname, &server->addr, nfsversion); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return ERR_PTR(PTR_ERR(clp)); @@ -676,6 +682,19 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) server->retrans_timeo = timeparms.to_initval; server->retrans_count = timeparms.to_retries; + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ +#ifdef CONFIG_NFS_V3 + if (nfsversion == 3) { + clp->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + clp->rpc_ops = &nfs_v2_clientops; + } +#else + clp->rpc_ops = &nfs_v2_clientops; +#endif + /* create transport and client */ xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { @@ -686,7 +705,7 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); + clp->cl_nfsversion, data->pseudoflavor); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", __FUNCTION__, PTR_ERR(xprt)); @@ -750,7 +769,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl fsinfo.fattr = data->fattr; if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; sb->s_flags |= MS_ACTIVE; return server; out_put_root: @@ -865,19 +884,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return -ENOMEM; strcpy(server->hostname, data->hostname); - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif - /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) data->pseudoflavor = RPC_AUTH_UNIX; @@ -888,6 +894,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) server->client = nfs_create_client(server, data); if (IS_ERR(server->client)) return PTR_ERR(server->client); + /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { struct rpc_auth *auth; @@ -1129,6 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, /* Now create transport and client */ if (clp->cl_cons_state == NFS_CS_INITING) { + clp->rpc_ops = &nfs_v4_clientops; + xprt = xprt_create_proto(proto, &server->addr, timeparms); if (IS_ERR(xprt)) { err = PTR_ERR(xprt); @@ -1139,7 +1148,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, /* Bind to a reserved port! */ xprt->resvport = 1; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, flavor); + clp->cl_nfsversion, flavor); if (IS_ERR(clnt)) { err = PTR_ERR(clnt); dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1215,8 +1224,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; - server->rpc_ops = &nfs_v4_clientops; - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); server->retrans_timeo = timeparms.to_initval; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 70e1dc9162e2..51e9bd90dedc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -215,7 +215,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_FH(inode) (&NFS_I(inode)->fh) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) -#define NFS_PROTO(inode) (NFS_SERVER(inode)->rpc_ops) +#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e7d7662f51fd..aae7c117597a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs_client { struct list_head cl_superblocks; /* List of nfs_server structs */ struct rpc_clnt * cl_rpcclient; + const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -74,7 +75,6 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ - const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ From 5006a76cca8f86c6975c16fcf67e83b8b0eee2b6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: [PATCH 0824/1063] NFS: Eliminate client_sys in favour of cl_rpcclient Eliminate nfs_server::client_sys in favour of nfs_client::cl_rpcclient as we only really need one per server that we're talking to since it doesn't have any security on it. The retransmission management variables are also moved to the common struct as they're required to set up the cl_rpcclient connection. The NFS2/3 client and client_acl connections are thenceforth derived by cloning the cl_rpcclient connection and post-applying the authorisation flavour. The code for setting up the initial common connection has been moved to client.c as nfs_create_rpc_client(). All the NFS program definition tables are also moved there as that's where they're now required rather than super.c. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 119 ++++++++++++++++++++ fs/nfs/internal.h | 2 + fs/nfs/nfs3proc.c | 6 +- fs/nfs/proc.c | 4 +- fs/nfs/super.c | 222 +++++++++----------------------------- include/linux/nfs_fs_sb.h | 5 +- 6 files changed, 179 insertions(+), 179 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index cb5e92463bdb..c08cab935ad5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -50,6 +50,48 @@ static DEFINE_SPINLOCK(nfs_client_lock); static LIST_HEAD(nfs_client_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +/* + * RPC cruft for NFS + */ +static struct rpc_version *nfs_version[5] = { + [2] = &nfs_version2, +#ifdef CONFIG_NFS_V3 + [3] = &nfs_version3, +#endif +#ifdef CONFIG_NFS_V4 + [4] = &nfs_version4, +#endif +}; + +struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + /* * Allocate a shared client record * @@ -310,3 +352,80 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, + unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + +/* + * Create an RPC client handle + */ +int nfs_create_rpc_client(struct nfs_client *clp, int proto, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t flavor) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + + if (!IS_ERR(clp->cl_rpcclient)) + return 0; + + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + clp->retrans_timeo = timeparms.to_initval; + clp->retrans_count = timeparms.to_retries; + + /* create transport and client */ + xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms); + if (IS_ERR(xprt)) { + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + return PTR_ERR(xprt); + } + + /* Bind to a reserved port! */ + xprt->resvport = 1; + /* Create the client RPC handle */ + clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program, + clp->rpc_ops->version, RPC_AUTH_UNIX); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(clnt)); + return PTR_ERR(clnt); + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clp->cl_rpcclient = clnt; + return 0; +} diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ac370d5d4494..2f3aa52fbefc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -20,6 +20,8 @@ extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); extern void nfs_mark_client_ready(struct nfs_client *, int); +extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int, + unsigned int, rpc_authflavor_t); /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3e5371241cea..0622af0122be 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, int status; status = do_proc_get_root(server->client, fhandle, info); - if (status && server->client_sys != server->client) - status = do_proc_get_root(server->client_sys, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info); return status; } @@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 77676903e0f5..5a8b9407ee9a 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("%s: call getattr\n", __FUNCTION__); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_resp = &fsinfo; - status = rpc_call_sync(server->client_sys, &msg, 0); + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e1e5eab0259b..85583414a3ca 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -60,52 +60,6 @@ */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -/* - * RPC cruft for NFS - */ -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; - -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = ARRAY_SIZE(nfs_version), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; - -struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; - - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); @@ -376,8 +330,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, proto = buf; } seq_printf(m, ",proto=%s", proto); - seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", clp->retrans_count); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } @@ -621,38 +575,6 @@ out_no_root: return no_root_error; } -/* - * Initialise the timeout values for a connection - */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) -{ - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} - /* * Create an RPC client handle. */ @@ -660,11 +582,10 @@ static struct rpc_clnt * nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) { struct nfs_client *clp; - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; + struct rpc_clnt *clnt; int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; int nfsversion = 2; + int err; #ifdef CONFIG_NFS_V3 if (server->flags & NFS_MOUNT_VER3) @@ -677,52 +598,54 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return ERR_PTR(PTR_ERR(clp)); } - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ + if (clp->cl_cons_state == NFS_CS_INITING) { + /* Check NFS protocol revision and initialize RPC op + * vector and file handle pool. */ #ifdef CONFIG_NFS_V3 - if (nfsversion == 3) { - clp->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - clp->rpc_ops = &nfs_v2_clientops; - } + if (nfsversion == 3) { + clp->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + clp->rpc_ops = &nfs_v2_clientops; + } #else - clp->rpc_ops = &nfs_v2_clientops; + clp->rpc_ops = &nfs_v2_clientops; #endif - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - nfs_mark_client_ready(clp, PTR_ERR(xprt)); - nfs_put_client(clp); - return (struct rpc_clnt *)xprt; + /* create transport and client */ + err = nfs_create_rpc_client(clp, proto, data->timeo, + data->retrans, RPC_AUTH_UNIX); + if (err < 0) + goto client_init_error; + + nfs_mark_client_ready(clp, 0); } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - clp->cl_nfsversion, data->pseudoflavor); + + /* create an nfs_server-specific client */ + clnt = rpc_clone_client(clp->cl_rpcclient); if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; + dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + nfs_put_client(clp); + return ERR_PTR(PTR_ERR(clnt)); } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; + if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(data->pseudoflavor, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return ERR_PTR(PTR_ERR(auth)); + } + } - nfs_mark_client_ready(clp, 0); server->nfs_client = clp; return clnt; -out_fail: - nfs_mark_client_ready(clp, PTR_ERR(xprt)); +client_init_error: + nfs_mark_client_ready(clp, err); nfs_put_client(clp); - return clnt; + return ERR_PTR(err); } /* @@ -741,7 +664,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl sb->s_blocksize_bits = data->sb->s_blocksize_bits; sb->s_maxbytes = data->sb->s_maxbytes; - server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client_acl = ERR_PTR(-EINVAL); server->io_stats = nfs_alloc_iostats(); if (server->io_stats == NULL) goto out; @@ -750,11 +673,6 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl if (IS_ERR((err = server->client))) goto out; - if (!IS_ERR(parent->client_sys)) { - server->client_sys = rpc_clone_client(parent->client_sys); - if (IS_ERR((err = server->client_sys))) - goto out; - } if (!IS_ERR(parent->client_acl)) { server->client_acl = rpc_clone_client(parent->client_acl); if (IS_ERR((err = server->client_acl))) @@ -813,7 +731,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data, error = PTR_ERR(sb); goto kill_rpciod; } - + if (sb->s_root) goto out_rpciod_down; @@ -896,19 +814,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3_ACL if (!(server->flags & NFS_MOUNT_NOACL)) { @@ -1012,7 +917,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, goto out_err_noserver; /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) @@ -1083,8 +988,6 @@ static void nfs_kill_super(struct super_block *s) if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); if (!IS_ERR(server->client_acl)) rpc_shutdown_client(server->client_acl); @@ -1121,10 +1024,9 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type, #ifdef CONFIG_NFS_V4 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) + int timeo, int retrans, int proto, rpc_authflavor_t flavor) { struct nfs_client *clp; - struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; int err = -EIO; @@ -1138,26 +1040,10 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, if (clp->cl_cons_state == NFS_CS_INITING) { clp->rpc_ops = &nfs_v4_clientops; - xprt = xprt_create_proto(proto, &server->addr, timeparms); - if (IS_ERR(xprt)) { - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); + err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor); + if (err < 0) goto client_init_error; - } - /* Bind to a reserved port! */ - xprt->resvport = 1; - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - clp->cl_nfsversion, flavor); - if (IS_ERR(clnt)) { - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto client_init_error; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clp->cl_rpcclient = clnt; + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); err = nfs_idmap_new(clp); if (err < 0) { @@ -1205,7 +1091,6 @@ client_init_error: static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; - struct rpc_timeout timeparms; rpc_authflavor_t authflavour; int err = -EIO; @@ -1224,11 +1109,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { @@ -1244,7 +1124,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } } - server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); + server->client = nfs4_create_client(server, data->timeo, data->retrans, + data->proto, authflavour); if (IS_ERR(server->client)) { err = PTR_ERR(server->client); dprintk("%s: cannot create RPC client. Error = %d\n", @@ -1318,7 +1199,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, return -ENOMEM; /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1489,7 +1370,6 @@ err: static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) { struct nfs_server *server = NFS_SB(sb); - struct rpc_timeout timeparms; int proto, timeo, retrans; void *err; @@ -1498,11 +1378,11 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf set the timeouts and retries to low values */ timeo = 2; retrans = 1; - nfs_init_timeout_values(&timeparms, proto, timeo, retrans); nfs_put_client(server->nfs_client); server->nfs_client = NULL; - server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); + server->client = nfs4_create_client(server, timeo, retrans, proto, + data->authflavor); if (IS_ERR((err = server->client))) goto out_err; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index aae7c117597a..d404ceca9168 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_client { struct rpc_clnt * cl_rpcclient; const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ @@ -73,7 +75,6 @@ struct nfs_client { struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ struct rpc_clnt * client; /* RPC client handle */ - struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; @@ -90,8 +91,6 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; - unsigned long retrans_timeo; /* retransmit timeout */ - unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; From cf6d7b5de8535a9f0088c5cc28ee2dae87371b4a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:12 -0400 Subject: [PATCH 0825/1063] NFS: Start rpciod in server common management Start rpciod in the server common (nfs_client struct) management code rather than in the superblock management code. This means we only need to "start" it once per server instead of once per superblock. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 85583414a3ca..5842d510d732 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -722,18 +722,15 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data, if (server->hostname == NULL) goto free_server; memcpy(server->hostname, hostname, len); - error = rpciod_up(); - if (error != 0) - goto free_hostname; sb = fill_sb(server, data); if (IS_ERR(sb)) { error = PTR_ERR(sb); - goto kill_rpciod; + goto free_hostname; } if (sb->s_root) - goto out_rpciod_down; + goto out_share; server = fill_server(sb, data); if (IS_ERR(server)) { @@ -745,14 +742,11 @@ out_deactivate: up_write(&sb->s_umount); deactivate_super(sb); return error; -out_rpciod_down: - rpciod_down(); +out_share: kfree(server->hostname); nfs_put_client(server->nfs_client); kfree(server); return simple_set_mnt(mnt, sb); -kill_rpciod: - rpciod_down(); free_hostname: kfree(server->hostname); free_server: @@ -939,22 +933,14 @@ static int nfs_get_sb(struct file_system_type *fs_type, goto out_err; } - /* Fire up rpciod if not yet running */ - error = rpciod_up(); - if (error < 0) { - dprintk("%s: couldn't start rpciod! Error = %d\n", - __FUNCTION__, error); - goto out_err; - } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); - goto out_err_rpciod; + goto out_err; } if (s->s_root) - goto out_rpciod_down; + goto out_share; s->s_flags = flags; @@ -967,13 +953,10 @@ static int nfs_get_sb(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; return simple_set_mnt(mnt, s); -out_rpciod_down: - rpciod_down(); +out_share: kfree(server); return simple_set_mnt(mnt, s); -out_err_rpciod: - rpciod_down(); out_err: kfree(server); out_err_noserver: @@ -994,8 +977,6 @@ static void nfs_kill_super(struct super_block *s) if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ - rpciod_down(); /* release rpciod */ - nfs_free_iostats(server->io_stats); kfree(server->hostname); nfs_put_client(server->nfs_client); From 54ceac4515986030c2502960be620198dd8fe25b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:13 -0400 Subject: [PATCH 0826/1063] NFS: Share NFS superblocks per-protocol per-server per-FSID The attached patch makes NFS share superblocks between mounts from the same server and FSID over the same protocol. It does this by creating each superblock with a false root and returning the real root dentry in the vfsmount presented by get_sb(). The root dentry set starts off as an anonymous dentry if we don't already have the dentry for its inode, otherwise it simply returns the dentry we already have. We may thus end up with several trees of dentries in the superblock, and if at some later point one of anonymous tree roots is discovered by normal filesystem activity to be located in another tree within the superblock, the anonymous root is named and materialises attached to the second tree at the appropriate point. Why do it this way? Why not pass an extra argument to the mount() syscall to indicate the subpath and then pathwalk from the server root to the desired directory? You can't guarantee this will work for two reasons: (1) The root and intervening nodes may not be accessible to the client. With NFS2 and NFS3, for instance, mountd is called on the server to get the filehandle for the tip of a path. mountd won't give us handles for anything we don't have permission to access, and so we can't set up NFS inodes for such nodes, and so can't easily set up dentries (we'd have to have ghost inodes or something). With this patch we don't actually create dentries until we get handles from the server that we can use to set up their inodes, and we don't actually bind them into the tree until we know for sure where they go. (2) Inaccessible symbolic links. If we're asked to mount two exports from the server, eg: mount warthog:/warthog/aaa/xxx /mmm mount warthog:/warthog/bbb/yyy /nnn We may not be able to access anything nearer the root than xxx and yyy, but we may find out later that /mmm/www/yyy, say, is actually the same directory as the one mounted on /nnn. What we might then find out, for example, is that /warthog/bbb was actually a symbolic link to /warthog/aaa/xxx/www, but we can't actually determine that by talking to the server until /warthog is made available by NFS. This would lead to having constructed an errneous dentry tree which we can't easily fix. We can end up with a dentry marked as a directory when it should actually be a symlink, or we could end up with an apparently hardlinked directory. With this patch we need not make assumptions about the type of a dentry for which we can't retrieve information, nor need we assume we know its place in the grand scheme of things until we actually see that place. This patch reduces the possibility of aliasing in the inode and page caches for inodes that may be accessed by more than one NFS export. It also reduces the number of superblocks required for NFS where there are many NFS exports being used from a server (home directory server + autofs for example). This in turn makes it simpler to do local caching of network filesystems, as it can then be guaranteed that there won't be links from multiple inodes in separate superblocks to the same cache file. Obviously, cache aliasing between different levels of NFS protocol could still be a problem, but at least that gives us another key to use when indexing the cache. This patch makes the following changes: (1) The server record construction/destruction has been abstracted out into its own set of functions to make things easier to get right. These have been moved into fs/nfs/client.c. All the code in fs/nfs/client.c has to do with the management of connections to servers, and doesn't touch superblocks in any way; the remaining code in fs/nfs/super.c has to do with VFS superblock management. (2) The sequence of events undertaken by NFS mount is now reordered: (a) A volume representation (struct nfs_server) is allocated. (b) A server representation (struct nfs_client) is acquired. This may be allocated or shared, and is keyed on server address, port and NFS version. (c) If allocated, the client representation is initialised. The state member variable of nfs_client is used to prevent a race during initialisation from two mounts. (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we are given the root FH in advance. (e) The volume FSID is probed for on the root FH. (f) The volume representation is initialised from the FSINFO record retrieved on the root FH. (g) sget() is called to acquire a superblock. This may be allocated or shared, keyed on client pointer and FSID. (h) If allocated, the superblock is initialised. (i) If the superblock is shared, then the new nfs_server record is discarded. (j) The root dentry for this mount is looked up from the root FH. (k) The root dentry for this mount is assigned to the vfsmount. (3) nfs_readdir_lookup() creates dentries for each of the entries readdir() returns; this function now attaches disconnected trees from alternate roots that happen to be discovered attached to a directory being read (in the same way nfs_lookup() is made to do for lookup ops). The new d_materialise_unique() function is now used to do this, thus permitting the whole thing to be done under one set of locks, and thus avoiding any race between mount and lookup operations on the same directory. (4) The client management code uses a new debug facility: NFSDBG_CLIENT which is set by echoing 1024 to /proc/net/sunrpc/nfs_debug. (5) Clone mounts are now called xdev mounts. (6) Use the dentry passed to the statfs() op as the handle for retrieving fs statistics rather than the root dentry of the superblock (which is now a dummy). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 735 ++++++++++++++++++++- fs/nfs/dir.c | 16 +- fs/nfs/getroot.c | 306 +++++++++ fs/nfs/idmap.c | 3 +- fs/nfs/inode.c | 2 +- fs/nfs/internal.h | 82 ++- fs/nfs/namespace.c | 25 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 6 - fs/nfs/nfs4namespace.c | 110 +++- fs/nfs/nfs4proc.c | 59 +- fs/nfs/nfs4renewd.c | 13 - fs/nfs/nfs4state.c | 18 - fs/nfs/read.c | 2 +- fs/nfs/super.c | 1297 ++++++++++++++----------------------- fs/nfs/write.c | 2 +- include/linux/nfs_fs_sb.h | 21 +- 18 files changed, 1700 insertions(+), 1001 deletions(-) create mode 100644 fs/nfs/getroot.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 3b993a6f8163..f4580b44eef4 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := client.o dir.o file.o inode.o super.o nfs2xdr.o \ +nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ pagelist.o proc.o read.o symlink.o unlink.o \ write.o namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c08cab935ad5..dafba608c0a0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -48,6 +48,7 @@ static DEFINE_SPINLOCK(nfs_client_lock); static LIST_HEAD(nfs_client_list); +static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); /* @@ -268,9 +269,9 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -struct nfs_client *nfs_get_client(const char *hostname, - const struct sockaddr_in *addr, - int nfsversion) +static struct nfs_client *nfs_get_client(const char *hostname, + const struct sockaddr_in *addr, + int nfsversion) { struct nfs_client *clp, *new = NULL; int error; @@ -340,6 +341,8 @@ found_client: return ERR_PTR(error); } + BUG_ON(clp->cl_cons_state != NFS_CS_READY); + dprintk("--> nfs_get_client() = %p [share]\n", clp); return clp; } @@ -347,7 +350,7 @@ found_client: /* * Mark a server as ready or failed */ -void nfs_mark_client_ready(struct nfs_client *clp, int state) +static void nfs_mark_client_ready(struct nfs_client *clp, int state) { clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); @@ -389,10 +392,10 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, /* * Create an RPC client handle */ -int nfs_create_rpc_client(struct nfs_client *clp, int proto, - unsigned int timeo, - unsigned int retrans, - rpc_authflavor_t flavor) +static int nfs_create_rpc_client(struct nfs_client *clp, int proto, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t flavor) { struct rpc_timeout timeparms; struct rpc_xprt *xprt = NULL; @@ -429,3 +432,719 @@ int nfs_create_rpc_client(struct nfs_client *clp, int proto, clp->cl_rpcclient = clnt; return 0; } + +/* + * Version 2 or 3 client destruction + */ +static void nfs_destroy_server(struct nfs_server *server) +{ + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ +} + +/* + * Version 2 or 3 lockd setup + */ +static int nfs_start_lockd(struct nfs_server *server) +{ + int error = 0; + + if (server->nfs_client->cl_nfsversion > 3) + goto out; + if (server->flags & NFS_MOUNT_NONLM) + goto out; + error = lockd_up(); + if (error < 0) + server->flags |= NFS_MOUNT_NONLM; + else + server->destroy = nfs_destroy_server; +out: + return error; +} + +/* + * Initialise an NFSv3 ACL client connection + */ +#ifdef CONFIG_NFS_V3_ACL +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->nfs_client->cl_nfsversion != 3) + goto out_noacl; + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +/* + * Create a general RPC client + */ +static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour) +{ + struct nfs_client *clp = server->nfs_client; + + server->client = rpc_clone_client(clp->cl_rpcclient); + if (IS_ERR(server->client)) { + dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); + return PTR_ERR(server->client); + } + + if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(pseudoflavour, server->client); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); + } + } + server->client->cl_softrtry = 0; + if (server->flags & NFS_MOUNT_SOFT) + server->client->cl_softrtry = 1; + + server->client->cl_intr = 0; + if (server->flags & NFS4_MOUNT_INTR) + server->client->cl_intr = 1; + + return 0; +} + +/* + * Initialise an NFS2 or NFS3 client + */ +static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) +{ + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is already initialised */ + dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v2_clientops; +#ifdef CONFIG_NFS_V3 + if (clp->cl_nfsversion == 3) + clp->rpc_ops = &nfs_v3_clientops; +#endif + /* + * Create a client RPC handle for doing FSSTAT with UNIX auth only + * - RFC 2623, sec 2.3.2 + */ + error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, + RPC_AUTH_UNIX); + if (error < 0) + goto error; + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs_init_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 2 or 3 client + */ +static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct nfs_client *clp; + int error, nfsvers = 2; + + dprintk("--> nfs_init_server()\n"); + +#ifdef CONFIG_NFS_V3 + if (data->flags & NFS_MOUNT_VER3) + nfsvers = 3; +#endif + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(data->hostname, &data->addr, nfsvers); + if (IS_ERR(clp)) { + dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); + return PTR_ERR(clp); + } + + error = nfs_init_client(clp, data); + if (error < 0) + goto error; + + server->nfs_client = clp; + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + /* Start lockd here, before we might error out */ + error = nfs_start_lockd(server); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, data->pseudoflavor); + if (error < 0) + goto error; + + server->namelen = data->namlen; + /* Create a client RPC handle for the NFSv3 ACL management interface */ + nfs_init_server_aclclient(server); + if (clp->cl_nfsversion == 3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); + return 0; + +error: + server->nfs_client = NULL; + nfs_put_client(clp); + dprintk("<-- nfs_init_server() = xerror %d\n", error); + return error; +} + +/* + * Load up the server record from information gained in an fsinfo record + */ +static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) +{ + unsigned long max_rpc_payload; + + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo->rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo->wtpref, NULL); + + if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax) + server->rsize = nfs_block_size(fsinfo->rtmax, NULL); + if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) + server->wsize = nfs_block_size(fsinfo->wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + } + + server->maxfilesize = fsinfo->maxfilesize; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); +} + +/* + * Probe filesystem information, including the FSID on v2/v3 + */ +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +{ + struct nfs_fsinfo fsinfo; + struct nfs_client *clp = server->nfs_client; + int error; + + dprintk("--> nfs_probe_fsinfo()\n"); + + if (clp->rpc_ops->set_capabilities != NULL) { + error = clp->rpc_ops->set_capabilities(server, mntfh); + if (error < 0) + goto out_error; + } + + fsinfo.fattr = fattr; + nfs_fattr_init(fattr); + error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); + if (error < 0) + goto out_error; + + nfs_server_set_fsinfo(server, &fsinfo); + + /* Get some general file system info */ + if (server->namelen == 0) { + struct nfs_pathconf pathinfo; + + pathinfo.fattr = fattr; + nfs_fattr_init(fattr); + + if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + } + + dprintk("<-- nfs_probe_fsinfo() = 0\n"); + return 0; + +out_error: + dprintk("nfs_probe_fsinfo: error = %d\n", -error); + return error; +} + +/* + * Copy useful information when duplicating a server record + */ +static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +{ + target->flags = source->flags; + target->acregmin = source->acregmin; + target->acregmax = source->acregmax; + target->acdirmin = source->acdirmin; + target->acdirmax = source->acdirmax; + target->caps = source->caps; +} + +/* + * Allocate and initialise a server record + */ +static struct nfs_server *nfs_alloc_server(void) +{ + struct nfs_server *server; + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return NULL; + + server->client = server->client_acl = ERR_PTR(-EINVAL); + + /* Zero out the NFS state stuff */ + INIT_LIST_HEAD(&server->client_link); + INIT_LIST_HEAD(&server->master_link); + + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + kfree(server); + return NULL; + } + + return server; +} + +/* + * Free up a server record + */ +void nfs_free_server(struct nfs_server *server) +{ + dprintk("--> nfs_free_server()\n"); + + spin_lock(&nfs_client_lock); + list_del(&server->client_link); + list_del(&server->master_link); + spin_unlock(&nfs_client_lock); + + if (server->destroy != NULL) + server->destroy(server); + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + nfs_put_client(server->nfs_client); + + nfs_free_iostats(server->io_stats); + kfree(server); + nfs_release_automount_timer(); + dprintk("<-- nfs_free_server()\n"); +} + +/* + * Create a version 2 or 3 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + struct nfs_fattr fattr; + int error; + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client representation */ + error = nfs_init_server(server, data); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + if (!(fattr.valid & NFS_ATTR_FATTR)) { + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_create_server: getattr error = %d\n", -error); + goto error; + } + } + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + return server; + +error: + nfs_free_server(server); + return ERR_PTR(error); +} + +#ifdef CONFIG_NFS_V4 +/* + * Initialise an NFS4 client record + */ +static int nfs4_init_client(struct nfs_client *clp, + int proto, int timeo, int retrans, + rpc_authflavor_t authflavour) +{ + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return 0; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error; + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __FUNCTION__, error); + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + goto error; + } + + nfs_mark_client_ready(clp, NFS_CS_READY); + return 0; + +error: + nfs_mark_client_ready(clp, error); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return error; +} + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, const struct sockaddr_in *addr, + rpc_authflavor_t authflavour, + int proto, int timeo, int retrans) +{ + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(hostname, addr, 4); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + error = nfs4_init_client(clp, proto, timeo, retrans, authflavour); + if (error < 0) + goto error_put; + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; + +error_put: + nfs_put_client(clp); +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) +{ + int error; + + dprintk("--> nfs4_init_server()\n"); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps |= NFS_CAP_ATOMIC_OPEN; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + error = nfs_init_server_rpcclient(server, authflavour); + + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, + const char *hostname, + const struct sockaddr_in *addr, + const char *mntpath, + const char *ip_addr, + rpc_authflavor_t authflavour, + struct nfs_fh *mntfh) +{ + struct nfs_fattr fattr; + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Get a client record */ + error = nfs4_set_client(server, hostname, addr, authflavour, + data->proto, data->timeo, data->retrans); + if (error < 0) + goto error; + + /* set up the general RPC client */ + error = nfs4_init_server(server, data, authflavour); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* Probe the root fh to retrieve its FSID */ + error = nfs4_path_walk(server, mntfh, mntpath); + if (error < 0) + goto error; + + dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + error = nfs_probe_fsinfo(server, mntfh, &fattr); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *fh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + struct nfs_fattr fattr; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, data->addr, + data->authflavor, + parent_server->client->cl_xprt->prot, + parent_client->retrans_timeo, + parent_client->retrans_count); + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN; + + error = nfs_init_server_rpcclient(server, data->authflavor); + if (error < 0) + goto error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr); + if (error < 0) + goto error; + + dprintk("Referral FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +#endif /* CONFIG_NFS_V4 */ + +/* + * Clone an NFS2, NFS3 or NFS4 server record + */ +struct nfs_server *nfs_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_server *server; + struct nfs_fattr fattr_fsinfo; + int error; + + dprintk("--> nfs_clone_server(,%llx:%llx,)\n", + fattr->fsid.major, fattr->fsid.minor); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* Copy data from the source */ + server->nfs_client = source->nfs_client; + atomic_inc(&server->nfs_client->cl_count); + nfs_server_copy_userdata(server, source); + + server->fsid = fattr->fsid; + + error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor); + if (error < 0) + goto out_free_server; + if (!IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + + /* probe the filesystem info for this server filesystem */ + error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); + if (error < 0) + goto out_free_server; + + dprintk("Cloned FSID: %llx:%llx\n", + server->fsid.major, server->fsid.minor); + + error = nfs_start_lockd(server); + if (error < 0) + goto out_free_server; + + spin_lock(&nfs_client_lock); + list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); + list_add_tail(&server->master_link, &nfs_volume_list); + spin_unlock(&nfs_client_lock); + + server->mount_time = jiffies; + + dprintk("<-- nfs_clone_server() = %p\n", server); + return server; + +out_free_server: + nfs_free_server(server); + dprintk("<-- nfs_clone_server() = error %d\n", error); + return ERR_PTR(error); +} diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 19362712452f..9b496ef4abea 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } -static inline int nfs_reval_fsid(struct inode *dir, - struct nfs_fh *fh, struct nfs_fattr *fattr) +static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) /* Revalidate fsid on root dir */ - return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); return 0; } @@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } - error = nfs_reval_fsid(dir, &fhandle, &fattr); + error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); if (error < 0) { res = ERR_PTR(error); goto out_unlock; @@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = (struct dentry *)inode; if (IS_ERR(res)) goto out_unlock; + no_entry: - res = d_add_unique(dentry, inode); + res = d_materialise_unique(dentry, inode); if (res != NULL) dentry = res; nfs_renew_times(dentry); @@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) dput(dentry); return NULL; } - alias = d_add_unique(dentry, inode); + + alias = d_materialise_unique(dentry, inode); if (alias != NULL) { dput(dentry); dentry = alias; } + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); return dentry; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c new file mode 100644 index 000000000000..977e59088eeb --- /dev/null +++ b/fs/nfs/getroot.c @@ -0,0 +1,306 @@ +/* getroot.c: get the root dentry for an NFS mount + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nfs4_fs.h" +#include "delegation.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT +#define NFS_PARANOIA 1 + +/* + * get an NFS2/NFS3 root dentry from the root filehandle + */ +struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the actual root for this mount */ + fsinfo.fattr = &fattr; + + error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, fsinfo.fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + return mntroot; +} + +#ifdef CONFIG_NFS_V4 + +/* + * Do a simple pathwalk from the root FH of the server to the nominated target + * of the mountpoint + * - give error on symlinks + * - give error on ".." occurring in the path + * - follow traversals + */ +int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path) +{ + struct nfs_fsinfo fsinfo; + struct nfs_fattr fattr; + struct nfs_fh lastfh; + struct qstr name; + int ret; + //int referral_count = 0; + + dprintk("--> nfs4_path_walk(,,%s)\n", path); + + fsinfo.fattr = &fattr; + nfs_fattr_init(&fattr); + + if (*path++ != '/') { + dprintk("nfs4_get_root: Path does not begin with a slash\n"); + return -EINVAL; + } + + /* Start by getting the root filehandle from the server */ + ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " getroot encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " getroot obtained referral\n"); + return -EREMOTE; + } + +next_component: + dprintk("Next: %s\n", path); + + /* extract the next bit of the path */ + if (!*path) + goto path_walk_complete; + + name.name = path; + while (*path && *path != '/') + path++; + name.len = path - (const char *) name.name; + +eat_dot_dir: + while (*path == '/') + path++; + + if (path[0] == '.' && (path[1] == '/' || !path[1])) { + path += 2; + goto eat_dot_dir; + } + + if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2]) + ) { + printk(KERN_ERR "nfs4_get_root:" + " Mount path contains reference to \"..\"\n"); + return -EINVAL; + } + + /* lookup the next FH in the sequence */ + memcpy(&lastfh, mntfh, sizeof(lastfh)); + + dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); + + ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name, + mntfh, &fattr); + if (ret < 0) { + dprintk("nfs4_get_root: getroot error = %d\n", -ret); + return ret; + } + + if (fattr.type != NFDIR) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh encountered non-directory\n"); + return -ENOTDIR; + } + + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_root:" + " lookupfh obtained referral\n"); + return -EREMOTE; + } + + goto next_component; + +path_walk_complete: + memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); + dprintk("<-- nfs4_path_walk() = 0\n"); + return 0; +} + +/* + * get an NFS4 root dentry from the root filehandle + */ +struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_fattr fattr; + struct dentry *mntroot; + struct inode *inode; + int error; + + dprintk("--> nfs4_get_root()\n"); + + /* create a dummy root dentry with dummy inode for this superblock */ + if (!sb->s_root) { + struct nfs_fh dummyfh; + struct dentry *root; + struct inode *iroot; + + memset(&dummyfh, 0, sizeof(dummyfh)); + memset(&fattr, 0, sizeof(fattr)); + nfs_fattr_init(&fattr); + fattr.valid = NFS_ATTR_FATTR; + fattr.type = NFDIR; + fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR; + fattr.nlink = 2; + + iroot = nfs_fhget(sb, &dummyfh, &fattr); + if (IS_ERR(iroot)) + return ERR_PTR(PTR_ERR(iroot)); + + root = d_alloc_root(iroot); + if (!root) { + iput(iroot); + return ERR_PTR(-ENOMEM); + } + + sb->s_root = root; + } + + /* get the info about the server and filesystem */ + error = nfs4_server_capabilities(server, mntfh); + if (error < 0) { + dprintk("nfs_get_root: getcaps error = %d\n", + -error); + return ERR_PTR(error); + } + + /* get the actual root for this mount */ + error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + inode = nfs_fhget(sb, mntfh, &fattr); + if (IS_ERR(inode)) { + dprintk("nfs_get_root: get root inode failed\n"); + return ERR_PTR(PTR_ERR(inode)); + } + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already + * exists, we'll pick it up at this point and use it as the root + */ + mntroot = d_alloc_anon(inode); + if (!mntroot) { + iput(inode); + dprintk("nfs_get_root: get root dentry failed\n"); + return ERR_PTR(-ENOMEM); + } + + if (!mntroot->d_op) + mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; + + dprintk("<-- nfs4_get_root()\n"); + return mntroot; +} + +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 231c20ffc0ff..f96dfac7dc9a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -114,8 +114,7 @@ nfs_idmap_new(struct nfs_client *clp) struct idmap *idmap; int error; - if (clp->cl_idmap != NULL) - return 0; + BUG_ON(clp->cl_idmap != NULL); if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 771c3b833757..a547c58a83e6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1020,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) out_fileid: printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", - NFS_SERVER(inode)->hostname, inode->i_sb->s_id, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)nfsi->fileid, (long long)fattr->fileid); goto out_err; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2f3aa52fbefc..e73ba4f1052a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,6 +4,18 @@ #include +struct nfs_string; +struct nfs_mount_data; +struct nfs4_mount_data; + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -16,12 +28,25 @@ struct nfs_clone_mount { }; /* client.c */ +extern struct rpc_program nfs_program; + extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); -extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int); -extern void nfs_mark_client_ready(struct nfs_client *, int); -extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int, - unsigned int, rpc_authflavor_t); +extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, + const char *, + const struct sockaddr_in *, + const char *, + const char *, + rpc_authflavor_t, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, + struct nfs_fh *); +extern void nfs_free_server(struct nfs_server *server); +extern struct nfs_server *nfs_clone_server(struct nfs_server *, + struct nfs_fh *, + struct nfs_fattr *); /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 @@ -89,10 +114,10 @@ extern void nfs4_clear_inode(struct inode *); #endif /* super.c */ -extern struct file_system_type nfs_referral_nfs4_fs_type; -extern struct file_system_type clone_nfs_fs_type; +extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 -extern struct file_system_type clone_nfs4_fs_type; +extern struct file_system_type nfs4_xdev_fs_type; +extern struct file_system_type nfs4_referral_fs_type; #endif extern struct rpc_stat nfs_rpcstat; @@ -101,28 +126,30 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); /* namespace.c */ -extern char *nfs_path(const char *base, const struct dentry *dentry, +extern char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen); -/* - * Determine the mount path as a string - */ +/* getroot.c */ +extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); #ifdef CONFIG_NFS_V4 -static inline char * -nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) -{ - return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); -} +extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); + +extern int nfs4_path_walk(struct nfs_server *server, + struct nfs_fh *mntfh, + const char *path); #endif /* * Determine the device name as a string */ static inline char *nfs_devname(const struct vfsmount *mnt_parent, - const struct dentry *dentry, - char *buffer, ssize_t buflen) + const struct dentry *dentry, + char *buffer, ssize_t buflen) { - return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); + return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, + dentry, buffer, buflen); } /* @@ -178,20 +205,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } - -/* - * Check if the string represents a "valid" IPv4 address - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d8b8d56266cb..77b00684894d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFS namespace */ @@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - arbitrary string to prepend to the path + * @droot - pointer to root dentry for mountpoint * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer @@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition. */ -char *nfs_path(const char *base, const struct dentry *dentry, +char *nfs_path(const char *base, + const struct dentry *droot, + const struct dentry *dentry, char *buffer, ssize_t buflen) { char *end = buffer+buflen; @@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry, *--end = '\0'; buflen--; spin_lock(&dcache_lock); - while (!IS_ROOT(dentry)) { + while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; if (buflen < 0) @@ -96,12 +100,13 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr fattr; int err; + dprintk("--> nfs_follow_mountpoint()\n"); + BUG_ON(IS_ROOT(dentry)); dprintk("%s: enter\n", __FUNCTION__); dput(nd->dentry); nd->dentry = dget(dentry); - if (d_mountpoint(nd->dentry)) - goto out_follow; + /* Look it up again */ parent = dget_parent(nd->dentry); err = server->nfs_client->rpc_ops->lookup(parent->d_inode, @@ -134,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: dprintk("%s: done, returned %d\n", __FUNCTION__, err); + + dprintk("<-- nfs_follow_mountpoint() = %d\n", err); return ERR_PTR(err); out_err: path_release(nd); @@ -183,14 +190,14 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, switch (server->nfs_client->cl_nfsversion) { case 2: case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); break; case 4: - mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); } return mnt; #else - return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); #endif } @@ -216,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, char *page = (char *) __get_free_page(GFP_USER); char *devname; + dprintk("--> nfs_do_submount()\n"); + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); @@ -230,5 +239,7 @@ free_page: free_page((unsigned long)page); out: dprintk("%s: done\n", __FUNCTION__); + + dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0622af0122be..9e8258ece6fd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, } /* - * Bare-bones access to getattr: this is for nfs_read_super. + * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb */ static int nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e7879245361e..61095fe4b5ca 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -188,8 +188,6 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(void *); /* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); extern u32 nfs4_alloc_lockowner_id(struct nfs_client *); @@ -224,10 +222,6 @@ extern struct svc_version nfs4_callback_version1; #else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) #define nfs4_close_state(a, b) do { } while (0) #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index faed9bcba50f..24e47f3bbd17 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -2,6 +2,7 @@ * linux/fs/nfs/nfs4namespace.c * * Copyright (C) 2005 Trond Myklebust + * - Modified by David Howells * * NFSv4 namespace */ @@ -47,6 +48,68 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +/* + * Determine the mount path as a string + */ +static char *nfs4_path(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + const char *srvpath; + + srvpath = strchr(mnt_parent->mnt_devname, ':'); + if (srvpath) + srvpath++; + else + srvpath = mnt_parent->mnt_devname; + + return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen); +} + +/* + * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we + * believe to be the server path to this dentry + */ +static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + const struct nfs4_fs_locations *locations, + char *page, char *page2) +{ + const char *path, *fs_path; + + path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + return PTR_ERR(path); + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + return PTR_ERR(fs_path); + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", + __FUNCTION__, path, fs_path); + return -ENOENT; + } + + return 0; +} + +/* + * Check if the string represents a "valid" IPv4 address + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} /** * nfs_follow_referral - set up mountpoint when hitting a referral on moved error @@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, .dentry = dentry, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; - char *page, *page2; - char *path, *fs_path; + char *page = NULL, *page2 = NULL; char *devname; - int loc, s; + int loc, s, error; if (locations == NULL || locations->nlocations <= 0) goto out; @@ -79,31 +141,25 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, dprintk("%s: referral at %s/%s\n", __FUNCTION__, dentry->d_parent->d_name.name, dentry->d_name.name); - /* Ensure fs path is a prefix of current dentry path */ page = (char *) __get_free_page(GFP_USER); - if (page == NULL) + if (!page) goto out; + page2 = (char *) __get_free_page(GFP_USER); - if (page2 == NULL) + if (!page2) goto out; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) - goto out_free; - - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) - goto out_free; - - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { - dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); - goto out_free; + /* Ensure fs path is a prefix of current dentry path */ + error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); + if (error < 0) { + mnt = ERR_PTR(error); + goto out; } devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); if (IS_ERR(devname)) { mnt = (struct vfsmount *)devname; - goto out_free; + goto out; } loc = 0; @@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, addr.sin_port = htons(NFS_PORT); mountdata.addr = &addr; - mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata); if (!IS_ERR(mnt)) { break; } @@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, loc++; } -out_free: - free_page((unsigned long)page); - free_page((unsigned long)page2); out: + free_page((unsigned long) page); + free_page((unsigned long) page2); dprintk("%s: done\n", __FUNCTION__); return mnt; } @@ -165,7 +220,7 @@ out: */ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; @@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr goto out_free; /* Get locations */ + mnt = ERR_PTR(-ENOENT); + parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %s/%s\n", + __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); dput(parent); - if (err != 0 || fs_locations->nlocations <= 0 || + if (err != 0 || + fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) goto out_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1573eeb07ce1..a825547e8214 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +/* + * get the file handle for the "/" directory on the server + */ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsinfo *info) { - struct nfs_fattr * fattr = info->fattr; - unsigned char * p; - struct qstr q; - struct nfs4_lookup_arg args = { - .dir_fh = fhandle, - .name = &q, - .bitmask = nfs4_fattr_bitmap, - }; - struct nfs4_lookup_res res = { - .server = server, - .fattr = fattr, - .fh = fhandle, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], - .rpc_argp = &args, - .rpc_resp = &res, - }; int status; - /* - * Now we do a separate LOOKUP for each component of the mount path. - * The LOOKUPs are done separately so that we can conveniently - * catch an ERR_WRONGSEC if it occurs along the way... - */ status = nfs4_lookup_root(server, fhandle, info); - if (status) - goto out; - - p = server->mnt_path; - for (;;) { - struct nfs4_exception exception = { }; - - while (*p == '/') - p++; - if (!*p) - break; - q.name = p; - while (*p && (*p != '/')) - p++; - q.len = p - q.name; - - do { - nfs_fattr_init(fattr); - status = nfs4_handle_exception(server, - rpc_call_sync(server->client, &msg, 0), - &exception); - } while (exception.retry); - if (status == 0) - continue; - if (status == -ENOENT) { - printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); - printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n"); - } - break; - } if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); -out: return nfs4_map_errors(status); } diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index ff947ecb8b81..f2c893690ac4 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct nfs_client *clp = server->nfs_client; - - if (!clp) - return; flush_scheduled_work(); - down_write(&clp->cl_sem); - if (!list_empty(&server->nfs4_siblings)) - list_del_init(&server->nfs4_siblings); - up_write(&clp->cl_sem); } -/* Must be called with clp->cl_sem locked for writes */ void nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); - if (!list_empty(&clp->cl_superblocks)) { - up_read(&clp->cl_sem); - return; - } cancel_delayed_work(&clp->cl_renewd); up_read(&clp->cl_sem); flush_scheduled_work(); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 058811e39555..5fffbdfa971f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -58,24 +58,6 @@ const nfs4_stateid zero_stateid; static LIST_HEAD(nfs4_clientid_list); -void -init_nfsv4_state(struct nfs_server *server) -{ - server->nfs_client = NULL; - INIT_LIST_HEAD(&server->nfs4_siblings); -} - -void -destroy_nfsv4_state(struct nfs_server *server) -{ - kfree(server->mnt_path); - server->mnt_path = NULL; - if (server->nfs_client) { - nfs_put_client(server->nfs_client); - server->nfs_client = NULL; - } -} - static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f0aff824a291..dae33c1e8a77 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, rdata->args.offset = page_offset(page) + rdata->args.pgbase; dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", - NFS_SERVER(inode)->hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id, (long long)NFS_FILEID(inode), (unsigned long long)rdata->args.pgbase, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5842d510d732..867b5dcd3a40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -13,6 +13,11 @@ * * Split from inode.c by David Howells * + * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a + * particular server are held in the same superblock + * - NFS superblocks can have several effective roots to the dentry tree + * - directory type roots are spliced into the tree when a path from one root reaches the root + * of another (see nfs_lookup()) */ #include @@ -52,20 +57,12 @@ #define NFSDBG_FACILITY NFSDBG_VFS -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) - static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); @@ -77,10 +74,10 @@ static struct file_system_type nfs_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs_fs_type = { +struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs", - .get_sb = nfs_clone_nfs_sb, + .get_sb = nfs_xdev_get_sb, .kill_sb = nfs_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -99,10 +96,10 @@ static struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -113,18 +110,18 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type clone_nfs4_fs_type = { +struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_clone_nfs4_sb, + .get_sb = nfs4_xdev_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs_referral_nfs4_fs_type = { +struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .get_sb = nfs_referral_nfs4_sb, + .get_sb = nfs4_referral_get_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -345,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); + seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); return 0; } @@ -429,449 +426,30 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) /* * Begin unmount by attempting to remove all automounted mountpoints we added - * in response to traversals + * in response to xdev traversals and referrals */ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct nfs_server *server; - struct rpc_clnt *rpc; - shrink_submounts(vfsmnt, &nfs_automount_list); - if (!(flags & MNT_FORCE)) - return; - /* -EIO all pending I/O */ - server = NFS_SB(vfsmnt->mnt_sb); - rpc = server->client; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = server->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); } /* - * Obtain the root inode of the file system. + * Validate the NFS2/NFS3 mount data + * - fills in the mount root filehandle */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +static int nfs_validate_mount_data(struct nfs_mount_data *data, + struct nfs_fh *mntfh) { - struct nfs_server *server = NFS_SB(sb); - int error; - - error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); - } - - server->fsid = fsinfo->fattr->fsid; - return nfs_fhget(sb, rootfh, fsinfo->fattr); -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - return -ENOMEM; - - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; - } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; - } - sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; - - /* mount time stamp, in seconds */ - server->mount_time = jiffies; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->rsize > NFS_MAX_FILE_IO_SIZE) - server->rsize = NFS_MAX_FILE_IO_SIZE; - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - if (server->wsize > NFS_MAX_FILE_IO_SIZE) - server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; - } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; - - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); - return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; -} - -/* - * Create an RPC client handle. - */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) -{ - struct nfs_client *clp; - struct rpc_clnt *clnt; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - int nfsversion = 2; - int err; - -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) - nfsversion = 3; -#endif - - clp = nfs_get_client(server->hostname, &server->addr, nfsversion); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(PTR_ERR(clp)); - } - - if (clp->cl_cons_state == NFS_CS_INITING) { - /* Check NFS protocol revision and initialize RPC op - * vector and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (nfsversion == 3) { - clp->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - clp->rpc_ops = &nfs_v2_clientops; - } -#else - clp->rpc_ops = &nfs_v2_clientops; -#endif - - /* create transport and client */ - err = nfs_create_rpc_client(clp, proto, data->timeo, - data->retrans, RPC_AUTH_UNIX); - if (err < 0) - goto client_init_error; - - nfs_mark_client_ready(clp, 0); - } - - /* create an nfs_server-specific client */ - clnt = rpc_clone_client(clp->cl_rpcclient); - if (IS_ERR(clnt)) { - dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__); - nfs_put_client(clp); - return ERR_PTR(PTR_ERR(clnt)); - } - - if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(data->pseudoflavor, server->client); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return ERR_PTR(PTR_ERR(auth)); - } - } - - server->nfs_client = clp; - return clnt; - -client_init_error: - nfs_mark_client_ready(clp, err); - nfs_put_client(clp); - return ERR_PTR(err); -} - -/* - * Clone a server record - */ -static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - struct nfs_server *parent = NFS_SB(data->sb); - struct inode *root_inode; - struct nfs_fsinfo fsinfo; - void *err = ERR_PTR(-ENOMEM); - - sb->s_op = data->sb->s_op; - sb->s_blocksize = data->sb->s_blocksize; - sb->s_blocksize_bits = data->sb->s_blocksize_bits; - sb->s_maxbytes = data->sb->s_maxbytes; - - server->client_acl = ERR_PTR(-EINVAL); - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - goto out; - - server->client = rpc_clone_client(parent->client); - if (IS_ERR((err = server->client))) - goto out; - - if (!IS_ERR(parent->client_acl)) { - server->client_acl = rpc_clone_client(parent->client_acl); - if (IS_ERR((err = server->client_acl))) - goto out; - } - root_inode = nfs_fhget(sb, data->fh, data->fattr); - if (!root_inode) - goto out; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_put_root; - fsinfo.fattr = data->fattr; - if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops; - sb->s_flags |= MS_ACTIVE; - return server; -out_put_root: - iput(root_inode); -out: - return err; -} - -/* - * Copy an existing superblock and attach revised data - */ -static int nfs_clone_generic_sb(struct nfs_clone_mount *data, - struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), - struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *), - struct vfsmount *mnt) -{ - struct nfs_server *server; - struct nfs_server *parent = NFS_SB(data->sb); - struct super_block *sb = ERR_PTR(-EINVAL); - char *hostname; - int error = -ENOMEM; - int len; - - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (server == NULL) - goto out_err; - memcpy(server, parent, sizeof(*server)); - atomic_inc(&server->nfs_client->cl_count); - hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; - len = strlen(hostname) + 1; - server->hostname = kmalloc(len, GFP_KERNEL); - if (server->hostname == NULL) - goto free_server; - memcpy(server->hostname, hostname, len); - - sb = fill_sb(server, data); - if (IS_ERR(sb)) { - error = PTR_ERR(sb); - goto free_hostname; - } - - if (sb->s_root) - goto out_share; - - server = fill_server(sb, data); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_deactivate; - } - return simple_set_mnt(mnt, sb); -out_deactivate: - up_write(&sb->s_umount); - deactivate_super(sb); - return error; -out_share: - kfree(server->hostname); - nfs_put_client(server->nfs_client); - kfree(server); - return simple_set_mnt(mnt, sb); -free_hostname: - kfree(server->hostname); -free_server: - nfs_put_client(server->nfs_client); - kfree(server); -out_err: - return error; -} - -/* - * Set up an NFS2/3 superblock - * - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. - */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) -{ - struct nfs_server *server; - rpc_authflavor_t authflavor; - - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - - /* RFC 2623, sec 2.3.2 */ - if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; - sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; - } - - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); -} - -static int nfs_set_super(struct super_block *s, void *data) -{ - s->s_fs_info = data; - return set_anon_super(s, data); -} - -static int nfs_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) - return 0; - if (old->addr.sin_port != server->addr.sin_port) - return 0; - return !nfs_compare_fh(&old->fh, &server->fh); -} - -static int nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) -{ - int error; - struct nfs_server *server = NULL; - struct super_block *s; - struct nfs_fh *root; - struct nfs_mount_data *data = raw_data; - - error = -EINVAL; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err_noserver; + return -EINVAL; } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err_noserver; + return -EINVAL; } + switch (data->version) { case 1: data->namlen = 0; @@ -882,7 +460,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support NFSv3\n", __FUNCTION__, data->version); - goto out_err_noserver; + return -EINVAL; } data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); @@ -891,252 +469,308 @@ static int nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support strong security\n", __FUNCTION__, data->version); - goto out_err_noserver; + return -EINVAL; } + /* Fill in pseudoflavor for mount version < 5 */ + data->pseudoflavor = RPC_AUTH_UNIX; case 5: memset(data->context, 0, sizeof(data->context)); } + #ifndef CONFIG_NFS_V3 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - error = -EPROTONOSUPPORT; if (data->flags & NFS_MOUNT_VER3) { dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err_noserver; + return -EPROTONOSUPPORT; } #endif /* CONFIG_NFS_V3 */ - error = -ENOMEM; - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - goto out_err_noserver; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - error = -EINVAL; - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { + if (data->addr.sin_addr.s_addr == INADDR_ANY) { dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; + __FUNCTION__); + return -EINVAL; } + /* Prepare the root filehandle */ + if (data->flags & NFS_MOUNT_VER3) + mntfh->size = data->root.size; + else + mntfh->size = NFS2_FHSIZE; + + if (mntfh->size > sizeof(mntfh->data)) { + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + return -EINVAL; + } + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + + return 0; +} + +/* + * Initialise the common bits of the superblock + */ +static inline void nfs_initialise_sb(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_magic = NFS_SUPER_MAGIC; + + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + + if (server->flags & NFS_MOUNT_NOAC) + sb->s_flags |= MS_SYNCHRONOUS; + + nfs_super_set_maxbytes(sb, server->maxfilesize); +} + +/* + * Finish setting up an NFS2/3 superblock + */ +static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + + if (server->flags & NFS_MOUNT_VER3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; + } + + sb->s_op = &nfs_sops; + nfs_initialise_sb(sb); +} + +/* + * Finish setting up a cloned NFS2/3 superblock + */ +static void nfs_clone_super(struct super_block *sb, + const struct super_block *old_sb) +{ + struct nfs_server *server = NFS_SB(sb); + + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; + + if (server->flags & NFS_MOUNT_VER3) { + /* The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_time_gran = 1; + } + + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); +} + +static int nfs_set_super(struct super_block *s, void *_server) +{ + struct nfs_server *server = _server; + int ret; + + s->s_fs_info = server; + ret = set_anon_super(s, server); + if (ret == 0) + server->s_dev = s->s_dev; + return ret; +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data, *old = NFS_SB(sb); + + if (old->nfs_client != server->nfs_client) + return 0; + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) + return 0; + return 1; +} + +static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_server *server = NULL; + struct super_block *s; + struct nfs_fh mntfh; + struct nfs_mount_data *data = raw_data; + struct dentry *mntroot; + int error; + + /* Validate the mount data */ + error = nfs_validate_mount_data(data, &mntfh); + if (error < 0) + return error; + + /* Get a volume representation */ + server = nfs_create_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); - goto out_err; + goto out_err_nosb; } - if (s->s_root) - goto out_share; - - s->s_flags = flags; - - error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; } + + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_fill_super(s, data); + } + + mntroot = nfs_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + return 0; -out_share: - kfree(server); - return simple_set_mnt(mnt, s); - -out_err: - kfree(server); +out_err_nosb: + nfs_free_server(server); out_err_noserver: return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + return error; } +/* + * Destroy an NFS2/3 superblock + */ static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - nfs_put_client(server->nfs_client); - kfree(server); - nfs_release_automount_timer(); + nfs_free_server(server); } -static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); - if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return sb; -} - -static int nfs_clone_nfs_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +/* + * Clone an NFS2/3 server record on xdev traversal (FSID-change) + */ +static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; + + dprintk("--> nfs_xdev_get_sb()\n"); + + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs_clone_super(s, data->sb); + } + + mntroot = nfs_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); + return error; } #ifdef CONFIG_NFS_V4 -static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - int timeo, int retrans, int proto, rpc_authflavor_t flavor) + +/* + * Finish setting up a cloned NFS4 superblock + */ +static void nfs4_clone_super(struct super_block *sb, + const struct super_block *old_sb) { - struct nfs_client *clp; - struct rpc_clnt *clnt = NULL; - int err = -EIO; - - clp = nfs_get_client(server->hostname, &server->addr, 4); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(err); - } - - /* Now create transport and client */ - if (clp->cl_cons_state == NFS_CS_INITING) { - clp->rpc_ops = &nfs_v4_clientops; - - err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor); - if (err < 0) - goto client_init_error; - - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - err = nfs_idmap_new(clp); - if (err < 0) { - dprintk("%s: failed to create idmapper.\n", - __FUNCTION__); - goto client_init_error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - nfs_mark_client_ready(clp, 0); - } - - clnt = rpc_clone_client(clp->cl_rpcclient); - - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return clnt; - } - - if (clnt->cl_auth->au_flavor != flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(flavor, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return (struct rpc_clnt *)auth; - } - } - - server->nfs_client = clp; - down_write(&clp->cl_sem); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return clnt; - -client_init_error: - nfs_mark_client_ready(clp, err); - nfs_put_client(clp); - return ERR_PTR(err); + sb->s_blocksize_bits = old_sb->s_blocksize_bits; + sb->s_blocksize = old_sb->s_blocksize; + sb->s_maxbytes = old_sb->s_maxbytes; + sb->s_time_gran = 1; + sb->s_op = old_sb->s_op; + nfs_initialise_sb(sb); } /* * Set up an NFS4 superblock */ -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +static void nfs4_fill_super(struct super_block *sb) { - struct nfs_server *server; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - - server->client = nfs4_create_client(server, data->timeo, data->retrans, - data->proto, authflavour); - if (IS_ERR(server->client)) { - err = PTR_ERR(server->client); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - - out_fail: - return err; + nfs_initialise_sb(sb); } -static int nfs4_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; -} - -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) { void *p = NULL; @@ -1157,14 +791,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) return dst; } +/* + * Get the superblock for an NFS4 mountpoint + */ static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - int error; - struct nfs_server *server; - struct super_block *s; struct nfs4_mount_data *data = raw_data; + struct super_block *s; + struct nfs_server *server; + struct sockaddr_in addr; + rpc_authflavor_t authflavour; + struct nfs_fh mntfh; + struct dentry *mntroot; + char *mntpath = NULL, *hostname = NULL, ip_addr[16]; void *p; + int error; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); @@ -1175,75 +817,107 @@ static int nfs4_get_sb(struct file_system_type *fs_type, return -EINVAL; } - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return -ENOMEM; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_acl = ERR_PTR(-EINVAL); + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(addr)) + return -EINVAL; + + if (copy_from_user(&addr, data->host_addr, sizeof(addr))) + return -EFAULT; + + if (addr.sin_family != AF_INET || + addr.sin_addr.s_addr == INADDR_ANY + ) { + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); + return -EINVAL; + } + + /* Grab the authentication type */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + error = -EINVAL; + goto out_err_noserver; + } + + if (copy_from_user(&authflavour, data->auth_flavours, + sizeof(authflavour))) { + error = -EFAULT; + goto out_err_noserver; + } + } p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) goto out_err; - server->hostname = p; + hostname = p; p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); if (IS_ERR(p)) goto out_err; - server->mnt_path = p; + mntpath = p; - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); + dprintk("MNTPATH: %s\n", mntpath); + + p = nfs_copy_user_string(ip_addr, &data->client_addr, + sizeof(ip_addr) - 1); if (IS_ERR(p)) goto out_err; - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - error = -EINVAL; - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - error = -EFAULT; - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - error = -EINVAL; - goto out_free; + /* Get a volume representation */ + server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, + authflavour, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; } - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; } - if (s->s_root) { - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); - return simple_set_mnt(mnt, s); + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + + nfs4_fill_super(s); + } else { + nfs_free_server(server); } - s->s_flags = flags; - - error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return error; + mntroot = nfs4_get_root(s, &mntfh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; } + s->s_flags |= MS_ACTIVE; - return simple_set_mnt(mnt, s); + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + kfree(mntpath); + kfree(hostname); + return 0; + out_err: error = PTR_ERR(p); + goto out_err_noserver; + out_free: - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); + nfs_free_server(server); +out_err_noserver: + kfree(mntpath); + kfree(hostname); return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + goto out_err_noserver; } static void nfs4_kill_super(struct super_block *sb) @@ -1254,133 +928,140 @@ static void nfs4_kill_super(struct super_block *sb) kill_anon_super(sb); nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); - - destroy_nfsv4_state(server); - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); + nfs_free_server(server); } /* - * Constructs the SERVER-side path + * Clone an NFS4 server record on xdev traversal (FSID-change) */ -static inline char *nfs4_dup_path(const struct dentry *dentry) -{ - char *page = (char *) __get_free_page(GFP_USER); - char *path; - - path = nfs4_path(dentry, page, PAGE_SIZE); - if (!IS_ERR(path)) { - int len = PAGE_SIZE + page - path; - char *tmp = path; - - path = kmalloc(len, GFP_KERNEL); - if (path) - memcpy(path, tmp, len); - else - path = ERR_PTR(-ENOMEM); - } - free_page((unsigned long)page); - return path; -} - -static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - const struct dentry *dentry = data->dentry; - struct nfs_client *clp = server->nfs_client; - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - server->mnt_path = nfs4_dup_path(dentry); - if (IS_ERR(server->mnt_path)) { - sb = (struct super_block *)server->mnt_path; - goto err; - } - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - nfs4_server_capabilities(server, &server->fh); - - down_write(&clp->cl_sem); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} - -static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt); + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + int error; + + dprintk("--> nfs4_xdev_get_sb()\n"); + + /* create a new volume representation */ + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_clone_super(s, data->sb); + } + + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs4_xdev_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); + return error; } -static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb = ERR_PTR(-ENOMEM); - int len; - - len = strlen(data->mnt_path) + 1; - server->mnt_path = kmalloc(len, GFP_KERNEL); - if (server->mnt_path == NULL) - goto err; - memcpy(server->mnt_path, data->mnt_path, len); - memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); - - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} - -static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - int proto, timeo, retrans; - void *err; - - proto = IPPROTO_TCP; - /* Since we are following a referral and there may be alternatives, - set the timeouts and retries to low values */ - timeo = 2; - retrans = 1; - - nfs_put_client(server->nfs_client); - server->nfs_client = NULL; - server->client = nfs4_create_client(server, timeo, retrans, proto, - data->authflavor); - if (IS_ERR((err = server->client))) - goto out_err; - - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); - if (!IS_ERR(err)) - return server; -out_err: - return (struct nfs_server *)err; -} - -static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; + struct nfs_fh mntfh; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + /* create a new volume representation */ + server = nfs4_create_referral_server(data, &mntfh); + if (IS_ERR(server)) { + error = PTR_ERR(server); + goto out_err_noserver; + } + + /* Get a superblock - note that we may end up sharing one that already exists */ + s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + + if (!s->s_root) { + /* initial superblock/root creation */ + s->s_flags = flags; + nfs4_fill_super(s); + } + + mntroot = nfs4_get_root(s, data->fh); + if (IS_ERR(mntroot)) { + error = PTR_ERR(mntroot); + goto error_splat_super; + } + + s->s_flags |= MS_ACTIVE; + mnt->mnt_sb = s; + mnt->mnt_root = mntroot; + + dprintk("<-- nfs4_referral_get_sb() = 0\n"); + return 0; + +out_err_nosb: + nfs_free_server(server); +out_err_noserver: + dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); + return error; + +error_splat_super: + up_write(&s->s_umount); + deactivate_super(s); + dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); + return error; } -#endif +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7084ac9a6455..453d44666ea5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1273,7 +1273,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->hostname, + NFS_SERVER(data->inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d404ceca9168..6d0be0efd1b5 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -51,7 +51,6 @@ struct nfs_client { unsigned long cl_lease_time; unsigned long cl_last_renewal; struct work_struct cl_renewd; - struct work_struct cl_recoverd; struct rpc_wait_queue cl_rpcwaitq; @@ -74,6 +73,10 @@ struct nfs_client { */ struct nfs_server { struct nfs_client * nfs_client; /* shared client and NFS4 state */ + struct list_head client_link; /* List of other nfs_server structs + * that share the same client + */ + struct list_head master_link; /* link in master servers list */ struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_iostats * io_stats; /* I/O statistics */ @@ -92,20 +95,13 @@ struct nfs_server { unsigned int acdirmin; unsigned int acdirmax; unsigned int namelen; - char * hostname; /* remote hostname */ - struct nfs_fh fh; - struct sockaddr_in addr; + struct nfs_fsid fsid; + __u64 maxfilesize; /* maximum file size */ unsigned long mount_time; /* when this fs was mounted */ + dev_t s_dev; /* superblock dev numbers */ + #ifdef CONFIG_NFS_V4 - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char ip_addr[16]; - char * mnt_path; - struct list_head nfs4_siblings; /* List of other nfs_server structs - * that share the same clientid - */ u32 attr_bitmask[2];/* V4 bitmask representing the set of attributes supported on this filesystem */ @@ -113,6 +109,7 @@ struct nfs_server { that are supported on this filesystem */ #endif + void (*destroy)(struct nfs_server *); }; /* Server capabilities */ From 6aaca566503296a73f956908ec98173946134fe2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 22 Aug 2006 20:06:13 -0400 Subject: [PATCH 0827/1063] NFS: Add server and volume lists to /proc Make two new proc files available: /proc/fs/nfsfs/servers /proc/fs/nfsfs/volumes The first lists the servers with which we are currently dealing (struct nfs_client), and the second lists the volumes we have on those servers (struct nfs_server). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/inode.c | 7 ++ fs/nfs/internal.h | 12 ++ 3 files changed, 303 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index dafba608c0a0..27f64781444a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1148,3 +1148,287 @@ out_free_server: dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } + +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *proc_fs_nfs; + +static int nfs_server_list_open(struct inode *inode, struct file *file); +static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_server_list_stop(struct seq_file *p, void *v); +static int nfs_server_list_show(struct seq_file *m, void *v); + +static struct seq_operations nfs_server_list_ops = { + .start = nfs_server_list_start, + .next = nfs_server_list_next, + .stop = nfs_server_list_stop, + .show = nfs_server_list_show, +}; + +static struct file_operations nfs_server_list_fops = { + .open = nfs_server_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int nfs_volume_list_open(struct inode *inode, struct file *file); +static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos); +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); +static void nfs_volume_list_stop(struct seq_file *p, void *v); +static int nfs_volume_list_show(struct seq_file *m, void *v); + +static struct seq_operations nfs_volume_list_ops = { + .start = nfs_volume_list_start, + .next = nfs_volume_list_next, + .stop = nfs_volume_list_stop, + .show = nfs_volume_list_show, +}; + +static struct file_operations nfs_volume_list_fops = { + .open = nfs_volume_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which + * we're dealing + */ +static int nfs_server_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_server_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the server list and return the first item + */ +static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) +{ + struct list_head *_p; + loff_t pos = *_pos; + + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + + /* allow for the header line */ + if (!pos) + return SEQ_START_TOKEN; + pos--; + + /* find the n'th element in the list */ + list_for_each(_p, &nfs_client_list) + if (!pos--) + break; + + return _p != &nfs_client_list ? _p : NULL; +} + +/* + * move to next server + */ +static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + struct list_head *_p; + + (*pos)++; + + _p = v; + _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next; + + return _p != &nfs_client_list ? _p : NULL; +} + +/* + * clean up after reading from the transports list + */ +static void nfs_server_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_server_list_show(struct seq_file *m, void *v) +{ + struct nfs_client *clp; + + /* display header on line 1 */ + if (v == SEQ_START_TOKEN) { + seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); + return 0; + } + + /* display one transport per line on subsequent lines */ + clp = list_entry(v, struct nfs_client, cl_share_link); + + seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n", + clp->cl_nfsversion, + NIPQUAD(clp->cl_addr.sin_addr), + ntohs(clp->cl_addr.sin_port), + atomic_read(&clp->cl_count), + clp->cl_hostname); + + return 0; +} + +/* + * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes + */ +static int nfs_volume_list_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, &nfs_volume_list_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = PDE(inode)->data; + + return 0; +} + +/* + * set up the iterator to start reading from the volume list and return the first item + */ +static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) +{ + struct list_head *_p; + loff_t pos = *_pos; + + /* lock the list against modification */ + spin_lock(&nfs_client_lock); + + /* allow for the header line */ + if (!pos) + return SEQ_START_TOKEN; + pos--; + + /* find the n'th element in the list */ + list_for_each(_p, &nfs_volume_list) + if (!pos--) + break; + + return _p != &nfs_volume_list ? _p : NULL; +} + +/* + * move to next volume + */ +static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) +{ + struct list_head *_p; + + (*pos)++; + + _p = v; + _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next; + + return _p != &nfs_volume_list ? _p : NULL; +} + +/* + * clean up after reading from the transports list + */ +static void nfs_volume_list_stop(struct seq_file *p, void *v) +{ + spin_unlock(&nfs_client_lock); +} + +/* + * display a header line followed by a load of call lines + */ +static int nfs_volume_list_show(struct seq_file *m, void *v) +{ + struct nfs_server *server; + struct nfs_client *clp; + char dev[8], fsid[17]; + + /* display header on line 1 */ + if (v == SEQ_START_TOKEN) { + seq_puts(m, "NV SERVER PORT DEV FSID\n"); + return 0; + } + /* display one transport per line on subsequent lines */ + server = list_entry(v, struct nfs_server, master_link); + clp = server->nfs_client; + + snprintf(dev, 8, "%u:%u", + MAJOR(server->s_dev), MINOR(server->s_dev)); + + snprintf(fsid, 17, "%llx:%llx", + server->fsid.major, server->fsid.minor); + + seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n", + clp->cl_nfsversion, + NIPQUAD(clp->cl_addr.sin_addr), + ntohs(clp->cl_addr.sin_port), + dev, + fsid); + + return 0; +} + +/* + * initialise the /proc/fs/nfsfs/ directory + */ +int __init nfs_fs_proc_init(void) +{ + struct proc_dir_entry *p; + + proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + if (!proc_fs_nfs) + goto error_0; + + proc_fs_nfs->owner = THIS_MODULE; + + /* a file of servers with which we're dealing */ + p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); + if (!p) + goto error_1; + + p->proc_fops = &nfs_server_list_fops; + p->owner = THIS_MODULE; + + /* a file of volumes that we have mounted */ + p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); + if (!p) + goto error_2; + + p->proc_fops = &nfs_volume_list_fops; + p->owner = THIS_MODULE; + return 0; + +error_2: + remove_proc_entry("servers", proc_fs_nfs); +error_1: + remove_proc_entry("nfsfs", proc_root_fs); +error_0: + return -ENOMEM; +} + +/* + * clean up the /proc/fs/nfsfs/ directory + */ +void nfs_fs_proc_exit(void) +{ + remove_proc_entry("volumes", proc_fs_nfs); + remove_proc_entry("servers", proc_fs_nfs); + remove_proc_entry("nfsfs", proc_root_fs); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a547c58a83e6..cb5c65f0bc12 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1141,6 +1141,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_fs_proc_init(); + if (err) + goto out5; + err = nfs_init_nfspagecache(); if (err) goto out4; @@ -1181,6 +1185,8 @@ out2: out3: nfs_destroy_nfspagecache(); out4: + nfs_fs_proc_exit(); +out5: return err; } @@ -1195,6 +1201,7 @@ static void __exit exit_nfs_fs(void) rpc_proc_unregister("nfs"); #endif unregister_nfs_fs(); + nfs_fs_proc_exit(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e73ba4f1052a..bea0b016bd70 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -47,6 +47,18 @@ extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); +#ifdef CONFIG_PROC_FS +extern int __init nfs_fs_proc_init(void); +extern void nfs_fs_proc_exit(void); +#else +static inline int nfs_fs_proc_init(void) +{ + return 0; +} +static inline void nfs_fs_proc_exit(void) +{ +} +#endif /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 From 27ba851244f627a302d0fc6469d1ad413fc34fcb Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 30 Jul 2006 14:40:56 -0400 Subject: [PATCH 0828/1063] NFS: Fix error handling Fix an error handling problem: nfs_put_client() can be given a NULL pointer if nfs_free_server() is asked to destroy a partially initialised record. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 27f64781444a..700bd5801223 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -208,6 +208,9 @@ static void nfs_free_client(struct nfs_client *clp) */ void nfs_put_client(struct nfs_client *clp) { + if (!clp) + return; + dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { From 738a35195941ecf604d3070e2a053e1df3de350b Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 30 Jul 2006 14:58:27 -0400 Subject: [PATCH 0829/1063] NFS: Secure the roots of the NFS subtrees in a shared superblock Invoke security_d_instantiate() on root dentries after allocating them with dentry_alloc_anon(). Normally dentry_alloc_root() would do that, but we don't call that as we don't want to assign a name to the root dentry at this point (we may discover the real name later). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/getroot.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 977e59088eeb..76b08ae9ed82 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -109,6 +110,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) return ERR_PTR(-ENOMEM); } + security_d_instantiate(mntroot, inode); + if (!mntroot->d_op) mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; @@ -296,6 +299,8 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) return ERR_PTR(-ENOMEM); } + security_d_instantiate(mntroot, inode); + if (!mntroot->d_op) mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; From 36b15c54cd0d6f707a3ac03e4a2a60bb530a95b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:14 -0400 Subject: [PATCH 0830/1063] NFS: Ensure NFSv2/v3 mounts respect the NFS_MOUNT_SECFLAVOUR flag Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 867b5dcd3a40..97cfb143e09f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -471,12 +471,14 @@ static int nfs_validate_mount_data(struct nfs_mount_data *data, data->version); return -EINVAL; } - /* Fill in pseudoflavor for mount version < 5 */ - data->pseudoflavor = RPC_AUTH_UNIX; case 5: memset(data->context, 0, sizeof(data->context)); } + /* Set the pseudoflavor */ + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + #ifndef CONFIG_NFS_V3 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ if (data->flags & NFS_MOUNT_VER3) { From 9c5bf38d85a31b946664bcc21078ef5bb10672f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:14 -0400 Subject: [PATCH 0831/1063] NFS: Fix nfs_alloc_client() The scheme to indicate which services have been started up appears to be seriously broken. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 700bd5801223..471d975e63c3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -113,9 +113,9 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, if (error < 0) { dprintk("%s: couldn't start rpciod! Error = %d\n", __FUNCTION__, error); - __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); goto error_1; } + __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); if (nfsversion == 4) { if (nfs_callback_up() < 0) @@ -153,8 +153,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, return clp; error_3: - nfs_callback_down(); - __clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(); error_2: rpciod_down(); __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); @@ -195,7 +195,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs_callback_down(); if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) - rpciod_down(); + rpciod_down(); kfree(clp->cl_hostname); kfree(clp); @@ -881,9 +881,9 @@ static int nfs4_init_client(struct nfs_client *clp, if (error < 0) { dprintk("%s: failed to create idmapper. Error = %d\n", __FUNCTION__, error); - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); goto error; } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); nfs_mark_client_ready(clp, NFS_CS_READY); return 0; From ec739ef03dc926d05051c8c5838971445504470a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: [PATCH 0832/1063] SUNRPC: Create a helper to tell whether a transport is bound Hide the contents and format of xprt->addr by eliminating direct uses of the xprt->addr.sin_port field. This change is required to support alternate RPC host address formats (eg IPv6). Test-plan: Destructive testing (unplugging the network temporarily). Repeated runs of Connectathon locking suite with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++++ net/sunrpc/clnt.c | 10 +++++----- net/sunrpc/pmap_clnt.c | 5 ++++- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 14 ++++++++++---- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3a0cca255b76..a71106723d71 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -269,6 +269,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTED (1) #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) +#define XPRT_BOUND (4) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -312,6 +313,21 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_CONNECTING, &xprt->state); } +static inline void xprt_set_bound(struct rpc_xprt *xprt) +{ + test_and_set_bit(XPRT_BOUND, &xprt->state); +} + +static inline int xprt_bound(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_BOUND, &xprt->state); +} + +static inline void xprt_clear_bound(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_BOUND, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3e19d321067a..0b8d03d08561 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -148,7 +148,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; clnt->cl_pmap = &clnt->cl_pmap_default; - clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_prot = xprt->prot; @@ -156,7 +155,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_metrics = rpc_alloc_iostats(clnt); rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); - if (!clnt->cl_port) + if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; clnt->cl_rtt = &clnt->cl_rtt_default; @@ -570,7 +569,7 @@ EXPORT_SYMBOL(rpc_max_payload); void rpc_force_rebind(struct rpc_clnt *clnt) { if (clnt->cl_autobind) - clnt->cl_port = 0; + xprt_clear_bound(clnt->cl_xprt); } EXPORT_SYMBOL(rpc_force_rebind); @@ -782,14 +781,15 @@ static void call_bind(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_connect; - if (!clnt->cl_port) { + if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; - task->tk_timeout = task->tk_xprt->bind_timeout; + task->tk_timeout = xprt->bind_timeout; rpc_getport(task, clnt); } } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 623180f224c9..209ffdfee10b 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -142,15 +142,17 @@ pmap_getport_done(struct rpc_task *task) dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); - xprt->ops->set_port(xprt, 0); if (task->tk_status < 0) { /* Make the calling task exit with an error */ + xprt->ops->set_port(xprt, 0); task->tk_action = rpc_exit_task; } else if (clnt->cl_port == 0) { /* Program not registered */ + xprt->ops->set_port(xprt, 0); rpc_exit(task, -EACCES); } else { xprt->ops->set_port(xprt, clnt->cl_port); + xprt_set_bound(xprt); clnt->cl_port = htons(clnt->cl_port); } spin_lock(&pmap_lock); @@ -218,6 +220,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; xprt->ops->set_port(xprt, RPC_PMAP_PORT); + xprt_set_bound(xprt); if (!privileged) xprt->resvport = 0; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e8c2bc4977f3..e239ef985ef7 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task) dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); - if (!xprt->addr.sin_port) { + if (!xprt_bound(xprt)) { task->tk_status = -EIO; return; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 441bd53f5eca..123ac1e5ba15 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1016,7 +1016,7 @@ static void xs_udp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); @@ -1099,7 +1099,7 @@ static void xs_tcp_connect_worker(void *args) struct socket *sock = xprt->sock; int err, status = -EIO; - if (xprt->shutdown || xprt->addr.sin_port == 0) + if (xprt->shutdown || !xprt_bound(xprt)) goto out; dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); @@ -1307,8 +1307,11 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_UDP; + if (ntohs(xprt->addr.sin_port) != 0) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1348,8 +1351,11 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - xprt->prot = IPPROTO_TCP; + if (ntohs(xprt->addr.sin_port) != 0) + xprt_set_bound(xprt); xprt->port = xs_get_random_port(); + + xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; From 4a68179d38874c37be2802442a71b847f5d1a2a9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:15 -0400 Subject: [PATCH 0833/1063] SUNRPC: Make RPC portmapper use per-transport storage Move connection and bind state that was maintained in the rpc_clnt structure to the rpc_xprt structure. This will allow the creation of a clean API for plugging in different types of bind mechanisms. This brings improvements such as the elimination of a single spin lock to control serialization for all in-kernel RPC binding. A set of per-xprt bitops is used to serialize tasks during RPC binding, just like it now works for making RPC transport connections. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 23 +----- include/linux/sunrpc/xprt.h | 14 ++++ net/sunrpc/clnt.c | 8 +- net/sunrpc/pmap_clnt.c | 158 ++++++++++++++++++++++++------------ net/sunrpc/xprt.c | 1 + 5 files changed, 123 insertions(+), 81 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8fe9f35eba31..00e9dbaec9c5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -18,18 +18,6 @@ #include #include -/* - * This defines an RPC port mapping - */ -struct rpc_portmap { - __u32 pm_prog; - __u32 pm_vers; - __u32 pm_prot; - __u16 pm_port; - unsigned char pm_binding : 1; /* doing a getport() */ - struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ -}; - struct rpc_inode; /* @@ -40,7 +28,9 @@ struct rpc_clnt { atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ - u32 cl_maxproc; /* max procedure number */ + u32 cl_prog, /* RPC program number */ + cl_vers, /* RPC version number */ + cl_maxproc; /* max procedure number */ char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ @@ -55,7 +45,6 @@ struct rpc_clnt { cl_dead : 1;/* abandoned */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ - struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; @@ -64,14 +53,8 @@ struct rpc_clnt { struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; - struct rpc_portmap cl_pmap_default; char cl_inline_name[32]; }; -#define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap->pm_prog -#define cl_vers cl_pmap->pm_vers -#define cl_port cl_pmap->pm_port -#define cl_prot cl_pmap->pm_prot /* * General RPC program info diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a71106723d71..4ce82616873d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -138,6 +138,7 @@ struct rpc_xprt { unsigned int tsh_size; /* size of transport specific header */ + struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ @@ -270,6 +271,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_CONNECTING (2) #define XPRT_CLOSE_WAIT (3) #define XPRT_BOUND (4) +#define XPRT_BINDING (5) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -328,6 +330,18 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt) clear_bit(XPRT_BOUND, &xprt->state); } +static inline void xprt_clear_binding(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_BINDING, &xprt->state); + smp_mb__after_clear_bit(); +} + +static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_BINDING, &xprt->state); +} + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0b8d03d08561..cee504162a3f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -147,13 +147,10 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_pmap = &clnt->cl_pmap_default; clnt->cl_prog = program->number; clnt->cl_vers = version->number; - clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); - rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; @@ -243,8 +240,6 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); - /* Duplicate portmapper */ - rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; @@ -254,8 +249,7 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); - new->cl_pmap = &new->cl_pmap_default; - new->cl_metrics = rpc_alloc_iostats(clnt); + new->cl_metrics = rpc_alloc_iostats(clnt); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 209ffdfee10b..59d542436ca9 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -24,11 +24,57 @@ #define PMAP_UNSET 2 #define PMAP_GETPORT 3 +struct portmap_args { + u32 pm_prog; + u32 pm_vers; + u32 pm_prot; + unsigned short pm_port; + struct rpc_task * pm_task; +}; + static struct rpc_procinfo pmap_procedures[]; static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *); +static void pmap_getport_done(struct rpc_task *, void *); static struct rpc_program pmap_program; -static DEFINE_SPINLOCK(pmap_lock); + +static void pmap_getport_prepare(struct rpc_task *task, void *calldata) +{ + struct portmap_args *map = calldata; + struct rpc_message msg = { + .rpc_proc = &pmap_procedures[PMAP_GETPORT], + .rpc_argp = map, + .rpc_resp = &map->pm_port, + }; + + rpc_call_setup(task, &msg, 0); +} + +static inline struct portmap_args *pmap_map_alloc(void) +{ + return kmalloc(sizeof(struct portmap_args), GFP_NOFS); +} + +static inline void pmap_map_free(struct portmap_args *map) +{ + kfree(map); +} + +static void pmap_map_release(void *data) +{ + pmap_map_free(data); +} + +static const struct rpc_call_ops pmap_getport_ops = { + .rpc_call_prepare = pmap_getport_prepare, + .rpc_call_done = pmap_getport_done, + .rpc_release = pmap_map_release, +}; + +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) +{ + xprt_clear_binding(xprt); + rpc_wake_up(&xprt->binding); +} /* * Obtain the port for a given RPC service on a given host. This one can @@ -37,67 +83,71 @@ static DEFINE_SPINLOCK(pmap_lock); void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = map, - .rpc_resp = &clnt->cl_port, - .rpc_cred = NULL - }; + struct rpc_xprt *xprt = task->tk_xprt; + struct sockaddr_in *sap = &xprt->addr; + struct portmap_args *map; struct rpc_clnt *pmap_clnt; - struct rpc_task *child; + struct rpc_task *child; - dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", + dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", task->tk_pid, clnt->cl_server, - map->pm_prog, map->pm_vers, map->pm_prot); + clnt->cl_prog, clnt->cl_vers, xprt->prot); /* Autobind on cloned rpc clients is discouraged */ BUG_ON(clnt->cl_parent != clnt); - spin_lock(&pmap_lock); - if (map->pm_binding) { - rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); - spin_unlock(&pmap_lock); + if (xprt_test_and_set_binding(xprt)) { + task->tk_status = -EACCES; /* tell caller to check again */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); return; } - map->pm_binding = 1; - spin_unlock(&pmap_lock); + + /* Someone else may have bound if we slept */ + if (xprt_bound(xprt)) { + task->tk_status = 0; + goto bailout_nofree; + } + + map = pmap_map_alloc(); + if (!map) { + task->tk_status = -ENOMEM; + goto bailout_nofree; + } + map->pm_prog = clnt->cl_prog; + map->pm_vers = clnt->cl_vers; + map->pm_prot = xprt->prot; + map->pm_port = 0; + map->pm_task = task; pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); if (IS_ERR(pmap_clnt)) { task->tk_status = PTR_ERR(pmap_clnt); goto bailout; } - task->tk_status = 0; - /* - * Note: rpc_new_child will release client after a failure. - */ - if (!(child = rpc_new_child(pmap_clnt, task))) + child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); + if (IS_ERR(child)) { + task->tk_status = -EIO; goto bailout; + } + rpc_release_task(child); - /* Setup the call info struct */ - rpc_call_setup(child, &msg, 0); + rpc_sleep_on(&xprt->binding, task, NULL, NULL); - /* ... and run the child task */ task->tk_xprt->stat.bind_count++; - rpc_run_child(task, child, pmap_getport_done); return; bailout: - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); - rpc_exit(task, -EIO); + pmap_map_free(map); +bailout_nofree: + pmap_wake_portmap_waiters(xprt); } #ifdef CONFIG_ROOT_NFS int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -133,32 +183,32 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) #endif static void -pmap_getport_done(struct rpc_task *task) +pmap_getport_done(struct rpc_task *child, void *data) { - struct rpc_clnt *clnt = task->tk_client; + struct portmap_args *map = data; + struct rpc_task *task = map->pm_task; struct rpc_xprt *xprt = task->tk_xprt; - struct rpc_portmap *map = clnt->cl_pmap; + int status = child->tk_status; - dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", - task->tk_pid, task->tk_status, clnt->cl_port); - - if (task->tk_status < 0) { - /* Make the calling task exit with an error */ + if (status < 0) { + /* Portmapper not available */ xprt->ops->set_port(xprt, 0); - task->tk_action = rpc_exit_task; - } else if (clnt->cl_port == 0) { - /* Program not registered */ + task->tk_status = status; + } else if (map->pm_port == 0) { + /* Requested RPC service wasn't registered */ xprt->ops->set_port(xprt, 0); - rpc_exit(task, -EACCES); + task->tk_status = -EACCES; } else { - xprt->ops->set_port(xprt, clnt->cl_port); + /* Succeeded */ + xprt->ops->set_port(xprt, map->pm_port); xprt_set_bound(xprt); - clnt->cl_port = htons(clnt->cl_port); + task->tk_status = 0; } - spin_lock(&pmap_lock); - map->pm_binding = 0; - rpc_wake_up(&map->pm_bindwait); - spin_unlock(&pmap_lock); + + dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", + child->tk_pid, child->tk_status, map->pm_port); + + pmap_wake_portmap_waiters(xprt); } /* @@ -172,7 +222,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; - struct rpc_portmap map = { + struct portmap_args map = { .pm_prog = prog, .pm_vers = vers, .pm_prot = prot, @@ -239,7 +289,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg * XDR encode/decode functions for PMAP */ static int -xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) +xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) { dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e239ef985ef7..b45abd0743cb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -928,6 +928,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); rpc_init_wait_queue(&xprt->sending, "xprt_sending"); rpc_init_wait_queue(&xprt->resend, "xprt_resend"); From c4a5692fb83f23008c720fe84454d5603e80b103 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: [PATCH 0834/1063] SUNRPC: Clean-up after recent changes to sunrpc/pmap_clnt.c Add comments for external functions, use modern function definition style, and fix up dprintk formatting. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/pmap_clnt.c | 70 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 59d542436ca9..0efcbf1302a2 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -1,7 +1,9 @@ /* - * linux/net/sunrpc/pmap.c + * linux/net/sunrpc/pmap_clnt.c * - * Portmapper client. + * In-kernel RPC portmapper client. + * + * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). * * Copyright (C) 1996, Olaf Kirch */ @@ -76,12 +78,15 @@ static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) rpc_wake_up(&xprt->binding); } -/* - * Obtain the port for a given RPC service on a given host. This one can - * be called for an ongoing RPC request. +/** + * rpc_getport - obtain the port for a given RPC service on a given host + * @task: task that is waiting for portmapper request + * @clnt: controlling rpc_clnt + * + * This one can be called for an ongoing RPC request, and can be used in + * an async (rpciod) context. */ -void -rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) +void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { struct rpc_xprt *xprt = task->tk_xprt; struct sockaddr_in *sap = &xprt->addr; @@ -144,8 +149,16 @@ bailout_nofree: } #ifdef CONFIG_ROOT_NFS -int -rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) +/** + * rpc_getport_external - obtain the port for a given RPC service on a given host + * @sin: address of remote peer + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * + * This one is called from outside the RPC client in a synchronous task context. + */ +int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) { struct portmap_args map = { .pm_prog = prog, @@ -162,7 +175,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) char hostname[32]; int status; - dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", + dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); @@ -182,8 +195,10 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) } #endif -static void -pmap_getport_done(struct rpc_task *child, void *data) +/* + * Portmapper child task invokes this callback via tk_exit. + */ +static void pmap_getport_done(struct rpc_task *child, void *data) { struct portmap_args *map = data; struct rpc_task *task = map->pm_task; @@ -211,12 +226,17 @@ pmap_getport_done(struct rpc_task *child, void *data) pmap_wake_portmap_waiters(xprt); } -/* - * Set or unset a port registration with the local portmapper. +/** + * rpc_register - set or unset a port registration with the local portmapper + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * @port: port value to register + * @okay: result code + * * port == 0 means unregister, port != 0 means register. */ -int -rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -236,7 +256,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) struct rpc_clnt *pmap_clnt; int error = 0; - dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", + dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", prog, vers, prot, port); pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); @@ -259,13 +279,11 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return error; } -static struct rpc_clnt * -pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) +static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - /* printk("pmap: create xprt\n"); */ xprt = xprt_create_proto(proto, srvaddr, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; @@ -274,7 +292,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg if (!privileged) xprt->resvport = 0; - /* printk("pmap: create clnt\n"); */ clnt = rpc_new_client(xprt, hostname, &pmap_program, RPC_PMAP_VERSION, RPC_AUTH_UNIX); @@ -288,10 +305,9 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg /* * XDR encode/decode functions for PMAP */ -static int -xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) +static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) { - dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", + dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); *p++ = htonl(map->pm_prog); *p++ = htonl(map->pm_vers); @@ -302,15 +318,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) return 0; } -static int -xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) +static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); return 0; } -static int -xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) +static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); return 0; From 5b1eacbcd78930d976eb50a93f1779d311b553d1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: [PATCH 0835/1063] SUNRPC: Support for RPC child tasks no longer needed The previous patches removed the last user of RPC child tasks, so we can remove support for child tasks from net/sunrpc/sched.c now. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 5 --- net/sunrpc/sched.c | 82 ------------------------------------ 2 files changed, 87 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 82a91bb22362..f399c138f79d 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -127,7 +127,6 @@ struct rpc_call_ops { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ -#define RPC_TASK_CHILD 0x0008 /* is child of other task */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ @@ -136,7 +135,6 @@ struct rpc_call_ops { #define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) -#define RPC_IS_CHILD(t) ((t)->tk_flags & RPC_TASK_CHILD) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) @@ -253,7 +251,6 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); -struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, void *data); @@ -261,8 +258,6 @@ void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); -void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, - rpc_action action); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5c3eee768504..015ffe423a2f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -44,12 +44,6 @@ static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); static void rpc_async_schedule(void *); -/* - * RPC tasks that create another task (e.g. for contacting the portmapper) - * will wait on this queue for their child's completion - */ -static RPC_WAITQ(childq, "childq"); - /* * RPC tasks sit here while waiting for conditions to improve. */ @@ -323,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task) wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } -/* - * Place a newly initialized task on the workqueue. - */ -static inline void -rpc_schedule_run(struct rpc_task *task) -{ - rpc_set_active(task); - rpc_make_runnable(task); -} - /* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. @@ -933,72 +917,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, } EXPORT_SYMBOL(rpc_run_task); -/** - * rpc_find_parent - find the parent of a child task. - * @child: child task - * @parent: parent task - * - * Checks that the parent task is still sleeping on the - * queue 'childq'. If so returns a pointer to the parent. - * Upon failure returns NULL. - * - * Caller must hold childq.lock - */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) -{ - struct rpc_task *task; - struct list_head *le; - - task_for_each(task, le, &childq.tasks[0]) - if (task == parent) - return parent; - - return NULL; -} - -static void rpc_child_exit(struct rpc_task *child, void *calldata) -{ - struct rpc_task *parent; - - spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child, calldata)) != NULL) { - parent->tk_status = child->tk_status; - __rpc_wake_up_task(parent); - } - spin_unlock_bh(&childq.lock); -} - -static const struct rpc_call_ops rpc_child_ops = { - .rpc_call_done = rpc_child_exit, -}; - -/* - * Note: rpc_new_task releases the client after a failure. - */ -struct rpc_task * -rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) -{ - struct rpc_task *task; - - task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); - if (!task) - goto fail; - return task; - -fail: - parent->tk_status = -ENOMEM; - return NULL; -} - -void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) -{ - spin_lock_bh(&childq.lock); - /* N.B. Is it possible for the child to have already finished? */ - __rpc_sleep_on(&childq, task, func, NULL); - rpc_schedule_run(child); - spin_unlock_bh(&childq.lock); -} - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? From bbf7c1dd2ae2b4040b41b1065ee9b1b6905b1605 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:16 -0400 Subject: [PATCH 0836/1063] SUNRPC: Introduce transport switch callout for pluggable rpcbind Introduce a clean transport switch API for plugging in different types of rpcbind mechanisms. For instance, rpcbind can cleanly replace the existing portmapper client, or a transport can choose to implement RPC binding any way it likes. Test plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 3 +-- net/sunrpc/pmap_clnt.c | 4 ++-- net/sunrpc/xprtsock.c | 2 ++ 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 00e9dbaec9c5..2e68ac0aa022 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -106,7 +106,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -void rpc_getport(struct rpc_task *, struct rpc_clnt *); +void rpc_getport(struct rpc_task *); int rpc_register(u32, u32, int, unsigned short, int *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4ce82616873d..84122559fa17 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -105,6 +105,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); void * (*buf_alloc)(struct rpc_task *task, size_t size); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cee504162a3f..d003c2f5688f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -774,7 +774,6 @@ call_encode(struct rpc_task *task) static void call_bind(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind (status %d)\n", @@ -784,7 +783,7 @@ call_bind(struct rpc_task *task) if (!xprt_bound(xprt)) { task->tk_action = call_bind_status; task->tk_timeout = xprt->bind_timeout; - rpc_getport(task, clnt); + xprt->ops->rpcbind(task); } } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 0efcbf1302a2..f7b279a63baa 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -81,13 +81,13 @@ static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) /** * rpc_getport - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request - * @clnt: controlling rpc_clnt * * This one can be called for an ongoing RPC request, and can be used in * an async (rpciod) context. */ -void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) +void rpc_getport(struct rpc_task *task) { + struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = task->tk_xprt; struct sockaddr_in *sap = &xprt->addr; struct portmap_args *map; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 123ac1e5ba15..4c98b89a5b48 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1262,6 +1262,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1278,6 +1279,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, + .rpcbind = rpc_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, From ed39440a2573abc926f230267000f21fa5a87822 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:17 -0400 Subject: [PATCH 0837/1063] SUNRPC: create API for getting remote peer address Provide an API for retrieving the remote peer address without allowing direct access to the rpc_xprt struct. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2e68ac0aa022..65196b03f0ab 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -123,6 +123,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); +size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); /* * Helper function for NFSroot support diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d003c2f5688f..94768cf5fd5b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -533,6 +533,27 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) task->tk_action = rpc_exit_task; } +/** + * rpc_peeraddr - extract remote peer address from clnt's xprt + * @clnt: RPC client structure + * @buf: target buffer + * @size: length of target buffer + * + * Returns the number of bytes that are actually in the stored address. + */ +size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) +{ + size_t bytes; + struct rpc_xprt *xprt = clnt->cl_xprt; + + bytes = sizeof(xprt->addr); + if (bytes > bufsize) + bytes = bufsize; + memcpy(buf, &clnt->cl_xprt->addr, bytes); + return sizeof(xprt->addr); +} +EXPORT_SYMBOL(rpc_peeraddr); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { From 44c31be261540acf66ddd730631ead8009cc361d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:17 -0400 Subject: [PATCH 0838/1063] LOCKD: Teach lockd to use the new rpc_peeraddr() API Hide the details of how the RPC client stores remote peer addresses from the Network Lock Manager. Test plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 89ba0df14c22..50dbb67ae0c4 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req) int nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) { + struct rpc_clnt *client = NFS_CLIENT(inode); + struct sockaddr_in addr; struct nlm_host *host; struct nlm_rqst *call; sigset_t oldset; unsigned long flags; - int status, proto, vers; + int status, vers; vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; if (NFS_PROTO(inode)->version > 3) { @@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) return -ENOLCK; } - /* Retrieve transport protocol from NFS client */ - proto = NFS_CLIENT(inode)->cl_xprt->prot; - - host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers); + rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr)); + host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers); if (host == NULL) return -ENOLCK; From 081f79a9b09b634f0dc08ed014e0195464d52535 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:17 -0400 Subject: [PATCH 0839/1063] SUNRPC: Teach the RPC portmapper to use the new rpc_peeraddr() API. Hide the details of how the RPC client stores remote peer addresses from the RPC portmapper. Test plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/pmap_clnt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index f7b279a63baa..3eee8e907275 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -89,7 +89,7 @@ void rpc_getport(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; struct rpc_xprt *xprt = task->tk_xprt; - struct sockaddr_in *sap = &xprt->addr; + struct sockaddr_in addr; struct portmap_args *map; struct rpc_clnt *pmap_clnt; struct rpc_task *child; @@ -124,7 +124,8 @@ void rpc_getport(struct rpc_task *task) map->pm_port = 0; map->pm_task = task; - pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); + rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); + pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); if (IS_ERR(pmap_clnt)) { task->tk_status = PTR_ERR(pmap_clnt); goto bailout; From 39d7bbcb5ba5e9d8d658b70903dd7939400e57db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: [PATCH 0840/1063] SUNRPC: remove extraneous header inclusions include/linux/sunrpc/clnt.h already includes include/linux/sunrpc/xprt.h. We can remove xprt.h from source files that already include clnt.h. Likewise include/linux/sunrpc/timer.h. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 1 - include/linux/nfs_xdr.h | 1 - net/sunrpc/pmap_clnt.c | 1 - net/sunrpc/sched.c | 1 - net/sunrpc/timer.c | 2 -- 5 files changed, 6 deletions(-) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 445abb4d4214..41274874b9a5 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2426b11b6cce..0f33e621892f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1,7 +1,6 @@ #ifndef _LINUX_NFS_XDR_H #define _LINUX_NFS_XDR_H -#include #include /* diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 3eee8e907275..523f0e825dea 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #ifdef RPC_DEBUG diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 015ffe423a2f..ecf366351bf7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -21,7 +21,6 @@ #include #include -#include #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index bcbdf6430d5c..8142fdb8a930 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -19,8 +19,6 @@ #include #include -#include -#include #define RPC_RTO_MAX (60*HZ) #define RPC_RTO_INIT (HZ/5) From edb267a688fcee5335d596752f117a30c7152e44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: [PATCH 0841/1063] SUNRPC: add xprt switch API for printing formatted remote peer addresses Add a new method to the transport switch API to provide a way to convert the opaque contents of xprt->addr to a human-readable string. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 11 ++++++ net/sunrpc/xprtsock.c | 79 +++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 84122559fa17..8372ab8fc9b5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -51,6 +51,14 @@ struct rpc_timeout { unsigned char to_exponential; }; +enum rpc_display_format_t { + RPC_DISPLAY_ADDR = 0, + RPC_DISPLAY_PORT, + RPC_DISPLAY_PROTO, + RPC_DISPLAY_ALL, + RPC_DISPLAY_MAX, +}; + struct rpc_task; struct rpc_xprt; struct seq_file; @@ -103,6 +111,7 @@ struct rpc_rqst { struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); + char * (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); @@ -207,6 +216,8 @@ struct rpc_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); + + char * address_strings[RPC_DISPLAY_MAX]; }; #define XPRT_LAST_FRAG (1 << 0) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4c98b89a5b48..cb8e6c34e12f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif +static void xs_format_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + char *buf; + + buf = kzalloc(20, GFP_KERNEL); + if (buf) { + snprintf(buf, 20, "%u.%u.%u.%u", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + if (xprt->prot == IPPROTO_UDP) + xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; + else + xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + + buf = kzalloc(48, GFP_KERNEL); + if (buf) { + snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; +} + +static void xs_free_peer_addresses(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); +} + #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) @@ -490,6 +531,7 @@ static void xs_destroy(struct rpc_xprt *xprt) xprt_disconnect(xprt); xs_close(xprt); + xs_free_peer_addresses(xprt); kfree(xprt->slot); } @@ -964,6 +1006,19 @@ static unsigned short xs_get_random_port(void) return rand + xprt_min_resvport; } +/** + * xs_print_peer_address - format an IPv4 address for printing + * @xprt: generic transport + * @format: flags field indicating which parts of the address to render + */ +static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) +{ + if (xprt->address_strings[format] != NULL) + return xprt->address_strings[format]; + else + return "unprintable"; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1019,8 +1074,6 @@ static void xs_udp_connect_worker(void *args) if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); - /* Start by resetting any existing state */ xs_close(xprt); @@ -1034,6 +1087,9 @@ static void xs_udp_connect_worker(void *args) goto out; } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1102,8 +1158,6 @@ static void xs_tcp_connect_worker(void *args) if (xprt->shutdown || !xprt_bound(xprt)) goto out; - dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); - if (!xprt->sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { @@ -1119,6 +1173,9 @@ static void xs_tcp_connect_worker(void *args) /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt); + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + if (!xprt->inet) { struct sock *sk = sock->sk; @@ -1260,6 +1317,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .rpcbind = rpc_getport, @@ -1277,6 +1335,7 @@ static struct rpc_xprt_ops xs_udp_ops = { }; static struct rpc_xprt_ops xs_tcp_ops = { + .print_addr = xs_print_peer_address, .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .rpcbind = rpc_getport, @@ -1301,8 +1360,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - dprintk("RPC: setting up udp-ipv4 transport...\n"); - xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); @@ -1332,6 +1389,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } @@ -1345,8 +1406,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; - dprintk("RPC: setting up tcp-ipv4 transport...\n"); - xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); xprt->slot = kzalloc(slot_table_size, GFP_KERNEL); @@ -1375,5 +1434,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xs_format_peer_addresses(xprt); + dprintk("RPC: set up transport to address %s\n", + xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); + return 0; } From f425eba437f0051bde979ea2eef8bc875a77cd00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:18 -0400 Subject: [PATCH 0842/1063] SUNRPC: Create API for displaying remote peer address Provide an API for formatting the remote peer address for printing without exposing its internal structure. The address could be dynamic, so we support a function call to get the address rather than reading it straight out of a structure. Test-plan: Destructive testing (unplugging the network temporarily). Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 65196b03f0ab..b7d47f018353 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); +char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); /* * Helper function for NFSroot support diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 94768cf5fd5b..e5b19e348d88 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -554,6 +554,19 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) } EXPORT_SYMBOL(rpc_peeraddr); +/** + * rpc_peeraddr2str - return remote peer address in printable format + * @clnt: RPC client structure + * @format: address format + * + */ +char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) +{ + struct rpc_xprt *xprt = clnt->cl_xprt; + return xprt->ops->print_addr(xprt, format); +} +EXPORT_SYMBOL(rpc_peeraddr2str); + void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { From e7f7865743fff3d3938ec7540e5a784d662426da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: [PATCH 0843/1063] SUNRPC: Teach rpc_pipe.c to use new rpc_peeraddr() API Hide the details of how the RPC client stores remote peer addresses from the RPC pipefs implementation. Test plan: Connectathon with Kerberos 5 authentication. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0b1a1ac8a4bc..c21dc07f2a8c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -327,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v) seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); - seq_printf(m, "address: %u.%u.%u.%u\n", - NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); - seq_printf(m, "protocol: %s\n", - clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); return 0; } From c4efcb1d3e0bc76aeb9ca6301d19a5079893c6c9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: [PATCH 0844/1063] SUNRPC: Use "sockaddr_storage" for storing RPC client's remote peer address IPv6 addresses are big (128 bytes). Now that no RPC client consumers treat the addr field in rpc_xprt structs as an opaque, and access it only via the API calls, we can safely widen the field in the rpc_xprt struct to accomodate larger addresses. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/clnt.c | 2 +- net/sunrpc/xprt.c | 3 ++- net/sunrpc/xprtsock.c | 15 ++++++++++----- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8372ab8fc9b5..fc05cfbd5805 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -134,7 +134,8 @@ struct rpc_xprt { struct sock * inet; /* INET layer */ struct rpc_timeout timeout; /* timeout parms */ - struct sockaddr_in addr; /* server address */ + struct sockaddr_storage addr; /* server address */ + size_t addrlen; /* size of server address */ int prot; /* IP protocol */ unsigned long cong; /* current congestion */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e5b19e348d88..ff1e90fd81ab 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -550,7 +550,7 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) if (bytes > bufsize) bytes = bufsize; memcpy(buf, &clnt->cl_xprt->addr, bytes); - return sizeof(xprt->addr); + return xprt->addrlen; } EXPORT_SYMBOL(rpc_peeraddr); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b45abd0743cb..4987517cc74b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -896,7 +896,8 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); - xprt->addr = *ap; + memcpy(&xprt->addr, ap, sizeof(*ap)); + xprt->addrlen = sizeof(*ap); switch (proto) { case IPPROTO_UDP: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cb8e6c34e12f..17179aa4c207 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -341,7 +341,7 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), xdr, req->rq_bytes_sent); + xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -1027,8 +1027,11 @@ static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_forma */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { + struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - xprt->addr.sin_port = htons(port); + + sap->sin_port = htons(port); } static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) @@ -1209,7 +1212,7 @@ static void xs_tcp_connect_worker(void *args) xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); + xprt->addrlen, O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { @@ -1359,6 +1362,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_udp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1366,7 +1370,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - if (ntohs(xprt->addr.sin_port) != 0) + if (ntohs(addr->sin_port != 0)) xprt_set_bound(xprt); xprt->port = xs_get_random_port(); @@ -1405,6 +1409,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) { size_t slot_table_size; + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; xprt->max_reqs = xprt_tcp_slot_table_entries; slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); @@ -1412,7 +1417,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) if (xprt->slot == NULL) return -ENOMEM; - if (ntohs(xprt->addr.sin_port) != 0) + if (ntohs(addr->sin_port) != 0) xprt_set_bound(xprt); xprt->port = xs_get_random_port(); From 6ca948238724c945bd353f51d54ae7d285f3889f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:19 -0400 Subject: [PATCH 0845/1063] SUNRPC: Clean-up after previous patches. Remove some unused macros related to accessing an RPC peer address Test plan: Compile kernel with CONFIG_NFS option enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 1 - include/linux/nfs_fs.h | 1 - include/linux/sunrpc/clnt.h | 3 --- 3 files changed, 5 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 38b0e8a1aec0..a516a01561b8 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -26,7 +26,6 @@ #define NLM_HOST_REBIND (60 * HZ) #define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) #define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) -#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr) static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; static unsigned long next_gc; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 51e9bd90dedc..3b5b04193fee 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_SERVER(inode) (NFS_SB(inode->i_sb)) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) -#define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b7d47f018353..a26d69583c7a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -89,9 +89,6 @@ struct rpc_procinfo { char * p_name; /* name of procedure */ }; -#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) -#define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) - #ifdef __KERNEL__ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, From c2866763b4029411d166040306691773c12d4caf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:20 -0400 Subject: [PATCH 0846/1063] SUNRPC: use sockaddr + size when creating remote transport endpoints Prepare for more generic transport endpoint handling needed by transports that might use different forms of addressing, such as IPv6. Introduce a single function call to replace the two-call xprt_create_proto/rpc_create_client API. Define a new rpc_create_args structure that allows callers to pass in remote endpoint addresses of varying length. Test-plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 22 +++++++++++ include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 61 ++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 75 +++++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a26d69583c7a..7817ba82f1b2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -97,6 +97,28 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); + +struct rpc_create_args { + int protocol; + struct sockaddr *address; + size_t addrsize; + struct rpc_timeout *timeout; + char *servername; + struct rpc_program *program; + u32 version; + rpc_authflavor_t authflavor; + unsigned long flags; +}; + +/* Values for "flags" field */ +#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) +#define RPC_CLNT_CREATE_INTR (1UL << 1) +#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) +#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) +#define RPC_CLNT_CREATE_NOPING (1UL << 5) + +struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fc05cfbd5805..bc80fcfdd892 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -237,6 +237,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ +struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ff1e90fd81ab..dbb93bdf6cc9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -192,6 +192,67 @@ out_no_xprt: return ERR_PTR(err); } +/* + * rpc_create - create an RPC client and transport with one call + * @args: rpc_clnt create argument structure + * + * Creates and initializes an RPC transport and an RPC client. + * + * It can ping the server in order to determine if it is up, and to see if + * it supports this program and version. RPC_CLNT_CREATE_NOPING disables + * this behavior so asynchronous tasks can also use rpc_create. + */ +struct rpc_clnt *rpc_create(struct rpc_create_args *args) +{ + struct rpc_xprt *xprt; + struct rpc_clnt *clnt; + + xprt = xprt_create_transport(args->protocol, args->address, + args->addrsize, args->timeout); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + + /* + * By default, kernel RPC client connects from a reserved port. + * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, + * but it is always enabled for rpciod, which handles the connect + * operation. + */ + xprt->resvport = 1; + if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) + xprt->resvport = 0; + + dprintk("RPC: creating %s client for %s (xprt %p)\n", + args->program->name, args->servername, xprt); + + clnt = rpc_new_client(xprt, args->servername, args->program, + args->version, args->authflavor); + if (IS_ERR(clnt)) + return clnt; + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_INTR) + clnt->cl_intr = 1; + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_ONESHOT) + clnt->cl_oneshot = 1; + + return clnt; +} +EXPORT_SYMBOL(rpc_create); + /** * Create an RPC client * @xprt - pointer to xprt struct diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4987517cc74b..17f56cfe2412 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -887,6 +887,81 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i to->to_exponential = 0; } +/** + * xprt_create_transport - create an RPC transport + * @proto: requested transport protocol + * @ap: remote peer address + * @size: length of address + * @to: timeout parameters + * + */ +struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) +{ + int result; + struct rpc_xprt *xprt; + struct rpc_rqst *req; + + if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { + dprintk("RPC: xprt_create_transport: no memory\n"); + return ERR_PTR(-ENOMEM); + } + if (size <= sizeof(xprt->addr)) { + memcpy(&xprt->addr, ap, size); + xprt->addrlen = size; + } else { + kfree(xprt); + dprintk("RPC: xprt_create_transport: address too large\n"); + return ERR_PTR(-EBADF); + } + + switch (proto) { + case IPPROTO_UDP: + result = xs_setup_udp(xprt, to); + break; + case IPPROTO_TCP: + result = xs_setup_tcp(xprt, to); + break; + default: + printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", + proto); + return ERR_PTR(-EIO); + } + if (result) { + kfree(xprt); + dprintk("RPC: xprt_create_transport: failed, %d\n", result); + return ERR_PTR(result); + } + + spin_lock_init(&xprt->transport_lock); + spin_lock_init(&xprt->reserve_lock); + + INIT_LIST_HEAD(&xprt->free); + INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; + xprt->cwnd = RPC_INITCWND; + + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); + rpc_init_wait_queue(&xprt->pending, "xprt_pending"); + rpc_init_wait_queue(&xprt->sending, "xprt_sending"); + rpc_init_wait_queue(&xprt->resend, "xprt_resend"); + rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); + + /* initialize free list */ + for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) + list_add(&req->rq_list, &xprt->free); + + xprt_init_xid(xprt); + + dprintk("RPC: created transport %p with %u slots\n", xprt, + xprt->max_reqs); + + return xprt; +} + static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) { int result; From e1ec78928b4d5a31b7a847e65c6009f4229f7c0f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:20 -0400 Subject: [PATCH 0847/1063] LOCKD: Convert to use new rpc_create() API Replace xprt_create_proto/rpc_create_client with new rpc_create() interface in the Network Lock Manager. Note that the semantics of NLM transports is now "hard" instead of "soft" to provide a better guarantee that lock requests will get to the server. Test plan: Repeated runs of Connectathon locking suite. Check network trace to ensure NLM requests are working correctly. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 48 ++++++++++++++++++++++++++---------------------- fs/lockd/mon.c | 39 ++++++++++++++++----------------------- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a516a01561b8..703fb038c813 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -166,7 +166,6 @@ struct rpc_clnt * nlm_bind_host(struct nlm_host *host) { struct rpc_clnt *clnt; - struct rpc_xprt *xprt; dprintk("lockd: nlm_bind_host(%08x)\n", (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); @@ -178,7 +177,6 @@ nlm_bind_host(struct nlm_host *host) * RPC rebind is required */ if ((clnt = host->h_rpcclnt) != NULL) { - xprt = clnt->cl_xprt; if (time_after_eq(jiffies, host->h_nextrebind)) { rpc_force_rebind(clnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; @@ -186,31 +184,37 @@ nlm_bind_host(struct nlm_host *host) host->h_nextrebind - jiffies); } } else { - xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); - if (IS_ERR(xprt)) - goto forgetit; + unsigned long increment = nlmsvc_timeout * HZ; + struct rpc_timeout timeparms = { + .to_initval = increment, + .to_increment = increment, + .to_maxval = increment * 6UL, + .to_retries = 5U, + }; + struct rpc_create_args args = { + .protocol = host->h_proto, + .address = (struct sockaddr *)&host->h_addr, + .addrsize = sizeof(host->h_addr), + .timeout = &timeparms, + .servername = host->h_name, + .program = &nlm_program, + .version = host->h_version, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_HARDRTRY | + RPC_CLNT_CREATE_AUTOBIND), + }; - xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); - xprt->resvport = 1; /* NLM requires a reserved port */ - - /* Existing NLM servers accept AUTH_UNIX only */ - clnt = rpc_new_client(xprt, host->h_name, &nlm_program, - host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) - goto forgetit; - clnt->cl_autobind = 1; /* turn on pmap queries */ - clnt->cl_softrtry = 1; /* All queries are soft */ - - host->h_rpcclnt = clnt; + clnt = rpc_create(&args); + if (!IS_ERR(clnt)) + host->h_rpcclnt = clnt; + else { + printk("lockd: couldn't create RPC handle for %s\n", host->h_name); + clnt = NULL; + } } mutex_unlock(&host->h_mutex); return clnt; - -forgetit: - printk("lockd: couldn't create RPC handle for %s\n", host->h_name); - mutex_unlock(&host->h_mutex); - return NULL; } /* diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3fc683f46b3e..5954dcb497e4 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host) static struct rpc_clnt * nsm_create(void) { - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; - struct sockaddr_in sin; + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = 0, + }; + struct rpc_create_args args = { + .protocol = IPPROTO_UDP, + .address = (struct sockaddr *)&sin, + .addrsize = sizeof(sin), + .servername = "localhost", + .program = &nsm_program, + .version = SM_VERSION, + .authflavor = RPC_AUTH_NULL, + .flags = (RPC_CLNT_CREATE_ONESHOT), + }; - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - sin.sin_port = 0; - - xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); - if (IS_ERR(xprt)) - return (struct rpc_clnt *)xprt; - xprt->resvport = 1; /* NSM requires a reserved port */ - - clnt = rpc_create_client(xprt, "localhost", - &nsm_program, SM_VERSION, - RPC_AUTH_NULL); - if (IS_ERR(clnt)) - goto out_err; - clnt->cl_softrtry = 1; - clnt->cl_oneshot = 1; - return clnt; - -out_err: - return clnt; + return rpc_create(&args); } /* From 41877d207c46f050b709f452703ade20c3b4a096 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:20 -0400 Subject: [PATCH 0848/1063] NFS: Convert NFS client to use new rpc_create() API Convert NFS client mount logic to use rpc_create() instead of the old xprt_create_proto/rpc_create_client API. Test plan: Mount stress tests. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 471d975e63c3..12941a8a6d75 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -401,8 +401,17 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, rpc_authflavor_t flavor) { struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; + struct rpc_create_args args = { + .protocol = proto, + .address = (struct sockaddr *)&clp->cl_addr, + .addrsize = sizeof(clp->cl_addr), + .timeout = &timeparms, + .servername = clp->cl_hostname, + .program = &nfs_program, + .version = clp->rpc_ops->version, + .authflavor = flavor, + }; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -411,27 +420,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, clp->retrans_timeo = timeparms.to_initval; clp->retrans_count = timeparms.to_retries; - /* create transport and client */ - xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - return PTR_ERR(xprt); - } - - /* Bind to a reserved port! */ - xprt->resvport = 1; - /* Create the client RPC handle */ - clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program, - clp->rpc_ops->version, RPC_AUTH_UNIX); + clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("%s: cannot create RPC client. Error = %ld\n", __FUNCTION__, PTR_ERR(clnt)); return PTR_ERR(clnt); } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; clp->cl_rpcclient = clnt; return 0; } From ae5c79476f36512d1100e162606bb5691f2cce5a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:21 -0400 Subject: [PATCH 0849/1063] NFSD: Convert NFS server callback logic to use new rpc_create API Replace xprt_create_proto/rpc_create_client call in NFS server callback functions to use new rpc_create() API. Test plan: NFSv4 delegation functionality tests. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 64 +++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 54b37b1d2e3a..8583d99ee740 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp) { struct sockaddr_in addr; struct nfs4_callback *cb = &clp->cl_callback; - struct rpc_timeout timeparms; - struct rpc_xprt * xprt; + struct rpc_timeout timeparms = { + .to_initval = (NFSD_LEASE_TIME/4) * HZ, + .to_retries = 5, + .to_maxval = (NFSD_LEASE_TIME/2) * HZ, + .to_exponential = 1, + }; struct rpc_program * program = &cb->cb_program; - struct rpc_stat * stat = &cb->cb_stat; - struct rpc_clnt * clnt; + struct rpc_create_args args = { + .protocol = IPPROTO_TCP, + .address = (struct sockaddr *)&addr, + .addrsize = sizeof(addr), + .timeout = &timeparms, + .servername = clp->cl_name.data, + .program = program, + .version = nfs_cb_version[1]->number, + .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ + .flags = (RPC_CLNT_CREATE_NOPING), + }; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - char hostname[32]; int status; if (atomic_read(&cb->cb_set)) @@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp) addr.sin_port = htons(cb->cb_port); addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Initialize timeout */ - timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ; - timeparms.to_retries = 0; - timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ; - timeparms.to_exponential = 1; - - /* Create RPC transport */ - xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("NFSD: couldn't create callback transport!\n"); - goto out_err; - } - /* Initialize rpc_program */ program->name = "nfs4_cb"; program->number = cb->cb_prog; program->nrvers = ARRAY_SIZE(nfs_cb_version); program->version = nfs_cb_version; - program->stats = stat; + program->stats = &cb->cb_stat; /* Initialize rpc_stat */ - memset(stat, 0, sizeof(struct rpc_stat)); - stat->program = program; + memset(program->stats, 0, sizeof(cb->cb_stat)); + program->stats->program = program; - /* Create RPC client - * - * XXX AUTH_UNIX only - need AUTH_GSS.... - */ - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); - clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { + /* Create RPC client */ + cb->cb_client = rpc_create(&args); + if (!cb->cb_client) { dprintk("NFSD: couldn't create callback client\n"); goto out_err; } - clnt->cl_intr = 0; - clnt->cl_softrtry = 1; /* Kick rpciod, put the call on the wire. */ - - if (rpciod_up() != 0) { - dprintk("nfsd: couldn't start rpciod for callbacks!\n"); + if (rpciod_up() != 0) goto out_clnt; - } - - cb->cb_client = clnt; /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); @@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) goto out_rpciod; - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); + status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { @@ -462,7 +450,7 @@ out_rpciod: rpciod_down(); cb->cb_client = NULL; out_clnt: - rpc_shutdown_client(clnt); + rpc_shutdown_client(cb->cb_client); out_err: dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); From 9e1968c58d72c4b85d8a69bda1e194f9701fb224 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:21 -0400 Subject: [PATCH 0850/1063] SUNRPC: Convert RPC portmapper to use new rpc_create() API Replace xprt_create_proto/rpc_create_client calls in pmap_clnt.c with new rpc_create() API. Test plan: Repeated runs of Connectathon locking suite. Check network trace for proper PMAP calls and replies. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/pmap_clnt.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 523f0e825dea..f476f4df0f48 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -281,25 +281,22 @@ int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) { - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; + struct rpc_create_args args = { + .protocol = proto, + .address = (struct sockaddr *)srvaddr, + .addrsize = sizeof(*srvaddr), + .servername = hostname, + .program = &pmap_program, + .version = RPC_PMAP_VERSION, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_NOPING), + }; - xprt = xprt_create_proto(proto, srvaddr, NULL); - if (IS_ERR(xprt)) - return (struct rpc_clnt *)xprt; - xprt->ops->set_port(xprt, RPC_PMAP_PORT); - xprt_set_bound(xprt); + srvaddr->sin_port = htons(RPC_PMAP_PORT); if (!privileged) - xprt->resvport = 0; - - clnt = rpc_new_client(xprt, hostname, - &pmap_program, RPC_PMAP_VERSION, - RPC_AUTH_UNIX); - if (!IS_ERR(clnt)) { - clnt->cl_softrtry = 1; - clnt->cl_oneshot = 1; - } - return clnt; + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + return rpc_create(&args); } /* From ff9aa5e56df60cc8565a93cc868fe25ae3f20e49 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:21 -0400 Subject: [PATCH 0851/1063] SUNRPC: Eliminate xprt_create_proto and rpc_create_client The two function call API for creating a new RPC client is now obsolete. Remove it. Also, remove an unnecessary check to see whether the caller is capable of using privileged network services. The kernel RPC client always uses a privileged ephemeral port by default; callers are responsible for checking the authority of users to make use of any RPC service, or for specifying that a nonprivileged port is acceptable. Test plan: Repeated runs of Connectathon locking suite. Check network trace to ensure correctness of NLM requests and replies. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 7 ---- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/clnt.c | 42 +------------------- net/sunrpc/sunrpc_syms.c | 3 -- net/sunrpc/xprt.c | 79 ------------------------------------- net/sunrpc/xprtsock.c | 2 - 6 files changed, 1 insertion(+), 133 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 7817ba82f1b2..f6d1d646ce05 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -91,13 +91,6 @@ struct rpc_procinfo { #ifdef __KERNEL__ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); -struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, - u32 version, rpc_authflavor_t authflavor); - struct rpc_create_args { int protocol; struct sockaddr *address; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc80fcfdd892..de4efea7c856 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -231,7 +231,6 @@ struct rpc_xprt { /* * Transport operations used by ULPs */ -struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); /* diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index dbb93bdf6cc9..428704dd5b3e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) } } -/* - * Create an RPC client - * FIXME: This should also take a flags argument (as in task->tk_flags). - * It's called (among others) from pmap_create_client, which may in - * turn be called by an async task. In this case, rpciod should not be - * made to sleep too long. - */ -struct rpc_clnt * -rpc_new_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *program, u32 vers, - rpc_authflavor_t flavor) +static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { struct rpc_version *version; struct rpc_clnt *clnt = NULL; @@ -253,36 +243,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) } EXPORT_SYMBOL(rpc_create); -/** - * Create an RPC client - * @xprt - pointer to xprt struct - * @servname - name of server - * @info - rpc_program - * @version - rpc_program version - * @authflavor - rpc_auth flavour to use - * - * Creates an RPC client structure, then pings the server in order to - * determine if it is up, and if it supports this program and version. - * - * This function should never be called by asynchronous tasks such as - * the portmapper. - */ -struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, - struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) -{ - struct rpc_clnt *clnt; - int err; - - clnt = rpc_new_client(xprt, servname, info, version, authflavor); - if (IS_ERR(clnt)) - return clnt; - err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); - if (err == 0) - return clnt; - rpc_shutdown_client(clnt); - return ERR_PTR(err); -} - /* * This function clones the RPC client structure. It allows us to share the * same transport while varying parameters such as the authentication diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f38f939ce95f..26c0531d7e25 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ -EXPORT_SYMBOL(rpc_create_client); -EXPORT_SYMBOL(rpc_new_client); EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); @@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ -EXPORT_SYMBOL(xprt_create_proto); EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 17f56cfe2412..e4f64fb58ff2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -962,85 +962,6 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si return xprt; } -static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) -{ - int result; - struct rpc_xprt *xprt; - struct rpc_rqst *req; - - if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) - return ERR_PTR(-ENOMEM); - - memcpy(&xprt->addr, ap, sizeof(*ap)); - xprt->addrlen = sizeof(*ap); - - switch (proto) { - case IPPROTO_UDP: - result = xs_setup_udp(xprt, to); - break; - case IPPROTO_TCP: - result = xs_setup_tcp(xprt, to); - break; - default: - printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", - proto); - result = -EIO; - break; - } - if (result) { - kfree(xprt); - return ERR_PTR(result); - } - - spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->reserve_lock); - - INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv); - INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); - init_timer(&xprt->timer); - xprt->timer.function = xprt_init_autodisconnect; - xprt->timer.data = (unsigned long) xprt; - xprt->last_used = jiffies; - xprt->cwnd = RPC_INITCWND; - - rpc_init_wait_queue(&xprt->binding, "xprt_binding"); - rpc_init_wait_queue(&xprt->pending, "xprt_pending"); - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); - rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - - xprt_init_xid(xprt); - - dprintk("RPC: created transport %p with %u slots\n", xprt, - xprt->max_reqs); - - return xprt; -} - -/** - * xprt_create_proto - create an RPC client transport - * @proto: requested transport protocol - * @sap: remote peer's address - * @to: timeout parameters for new transport - * - */ -struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) -{ - struct rpc_xprt *xprt; - - xprt = xprt_setup(proto, sap, to); - if (IS_ERR(xprt)) - dprintk("RPC: xprt_create_proto failed\n"); - else - dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); - return xprt; -} - /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 17179aa4c207..0b84fab68d7e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1376,7 +1376,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); @@ -1423,7 +1422,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); - xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); From b86acd501a34227e0ed2b2d54dc8002c1701ce17 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:22 -0400 Subject: [PATCH 0852/1063] SUNRPC: export new RPC client functions with _GPL This patch is optional. It has been suggested that the RPC client internal functions used by upper layer protocols (such as NFS) be exported via EXPORT_SYMBOL_GPL. This patch does that. Test plan: Compile kernel with CONFIG_NFS enabled as a module. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 428704dd5b3e..87efcd207f23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -241,7 +241,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; } -EXPORT_SYMBOL(rpc_create); +EXPORT_SYMBOL_GPL(rpc_create); /* * This function clones the RPC client structure. It allows us to share the @@ -573,7 +573,7 @@ size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) memcpy(buf, &clnt->cl_xprt->addr, bytes); return xprt->addrlen; } -EXPORT_SYMBOL(rpc_peeraddr); +EXPORT_SYMBOL_GPL(rpc_peeraddr); /** * rpc_peeraddr2str - return remote peer address in printable format @@ -586,7 +586,7 @@ char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) struct rpc_xprt *xprt = clnt->cl_xprt; return xprt->ops->print_addr(xprt, format); } -EXPORT_SYMBOL(rpc_peeraddr2str); +EXPORT_SYMBOL_GPL(rpc_peeraddr2str); void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) @@ -608,7 +608,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) { return clnt->cl_xprt->max_payload; } -EXPORT_SYMBOL(rpc_max_payload); +EXPORT_SYMBOL_GPL(rpc_max_payload); /** * rpc_force_rebind - force transport to check that remote port is unchanged @@ -620,7 +620,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt) if (clnt->cl_autobind) xprt_clear_bound(clnt->cl_xprt); } -EXPORT_SYMBOL(rpc_force_rebind); +EXPORT_SYMBOL_GPL(rpc_force_rebind); /* * Restart an (async) RPC call. Usually called from within the From d3db90e270791b21cd00d3c094884bffa907cc9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:22 -0400 Subject: [PATCH 0853/1063] NFS: remove a no-longer-needed error check in nfs_symlink() In the early days of NFS, there was no duplicate reply cache on the server. Thus retransmitted non-idempotent requests often found that the request had already completed on the server. To avoid passing an unanticipated return code to unsuspecting applications, NFS clients would often shunt error codes that implied the request had been retried but already completed. Thanks to NFS over TCP, duplicate reply caches on the server, and network performance and reliability improvements, it is safe to remove such checks. Test plan: None. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9b496ef4abea..084e8cb41c84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1476,14 +1476,10 @@ dentry->d_parent->d_name.name, dentry->d_name.name); error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, &attr, &sym_fh, &sym_attr); nfs_end_data_update(dir); - if (!error) { + if (!error) error = nfs_instantiate(dentry, &sym_fh, &sym_attr); - } else { - if (error == -EEXIST) - printk("nfs_proc_symlink: %s/%s already exists??\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + else d_drop(dentry); - } unlock_kernel(); return error; } From 4f390c152bc87165da4b1f5b7d870b46fb106d4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:22 -0400 Subject: [PATCH 0854/1063] NFS: Fix double d_drop in nfs_instantiate() error path If the LOOKUP or GETATTR in nfs_instantiate fail, nfs_instantiate will do a d_drop before returning. But some callers already do a d_drop in the case of an error return. Make certain we do only one d_drop in all error paths. This issue was introduced because over time, the symlink proc API diverged slightly from the create/mkdir/mknod proc API. To prevent other coding mistakes of this type, change the symlink proc API to be more like create/mkdir/mknod and move the nfs_instantiate call into the symlink proc routines so it is used in exactly the same way for create, mkdir, mknod, and symlink. Test plan: Connectathon, all versions of NFS. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 16 ++++------------ fs/nfs/nfs3proc.c | 26 ++++++++++++++++---------- fs/nfs/nfs4proc.c | 31 ++++++++++++++++--------------- fs/nfs/proc.c | 29 +++++++++++++++++++++-------- include/linux/nfs_xdr.h | 5 ++--- 5 files changed, 59 insertions(+), 48 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 084e8cb41c84..affd3ae52e55 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1147,23 +1147,20 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct inode *dir = dentry->d_parent->d_inode; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) - goto out_err; + return error; } if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) - goto out_err; + return error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); error = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_err; + return error; d_instantiate(dentry, inode); return 0; -out_err: - d_drop(dentry); - return error; } /* @@ -1448,8 +1445,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct iattr attr; - struct nfs_fattr sym_attr; - struct nfs_fh sym_fh; struct qstr qsymname; int error; @@ -1473,12 +1468,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name); lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, - &attr, &sym_fh, &sym_attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr); nfs_end_data_update(dir); if (!error) - error = nfs_instantiate(dentry, &sym_fh, &sym_attr); - else d_drop(dentry); unlock_kernel(); return error; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9e8258ece6fd..d85ac427c326 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, + struct iattr *sattr) { - struct nfs_fattr dir_attr; + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_attr; struct nfs3_symlinkargs arg = { .fromfh = NFS_FH(dir), - .fromname = name->name, - .fromlen = name->len, + .fromname = dentry->d_name.name, + .fromlen = dentry->d_name.len, .topath = path->name, .tolen = path->len, .sattr = sattr }; struct nfs3_diropres res = { .dir_attr = &dir_attr, - .fh = fhandle, - .fattr = fattr + .fh = &fhandle, + .fattr = &fattr }; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], @@ -571,11 +571,17 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + + dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, + path->name); nfs_fattr_init(&dir_attr); - nfs_fattr_init(fattr); + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_post_op_update_inode(dir, &dir_attr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); +out: dprintk("NFS reply symlink: %d\n", status); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a825547e8214..2d18eac6bee5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2084,24 +2084,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n return err; } -static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, - struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct qstr *path, struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); - struct nfs_fattr dir_fattr; + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, - .name = name, + .name = &dentry->d_name, .attrs = sattr, .ftype = NF4LNK, .bitmask = server->attr_bitmask, }; struct nfs4_create_res res = { .server = server, - .fh = fhandle, - .fattr = fattr, + .fh = &fhandle, + .fattr = &fattr, .dir_fattr = &dir_fattr, }; struct rpc_message msg = { @@ -2113,27 +2113,28 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, if (path->len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; + arg.u.symlink = path; - nfs_fattr_init(fattr); + nfs_fattr_init(&fattr); nfs_fattr_init(&dir_fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) + if (!status) { update_changeattr(dir, &res.dir_cinfo); - nfs_post_op_update_inode(dir, res.dir_fattr); + nfs_post_op_update_inode(dir, res.dir_fattr); + status = nfs_instantiate(dentry, &fhandle, &fattr); + } return status; } -static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, - struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct qstr *path, struct iattr *sattr) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, name, path, sattr, - fhandle, fattr), + _nfs4_proc_symlink(dir, dentry, path, sattr), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5a8b9407ee9a..0b507bf0f330 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -425,14 +425,15 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, + struct iattr *sattr) { + struct nfs_fh fhandle; + struct nfs_fattr fattr; struct nfs_symlinkargs arg = { .fromfh = NFS_FH(dir), - .fromname = name->name, - .fromlen = name->len, + .fromname = dentry->d_name.name, + .fromlen = dentry->d_name.len, .topath = path->name, .tolen = path->len, .sattr = sattr @@ -445,11 +446,23 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, if (path->len > NFS2_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", name->name, path->name); - nfs_fattr_init(fattr); - fhandle->size = 0; + + dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, + path->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); + + /* + * V2 SYMLINK requests don't return any attributes. Setting the + * filehandle size to zero indicates to nfs_instantiate that it + * should fill in the data with a LOOKUP call on the wire. + */ + if (status == 0) { + nfs_fattr_init(&fattr); + fhandle.size = 0; + status = nfs_instantiate(dentry, &fhandle, &fattr); + } + dprintk("NFS reply symlink: %d\n", status); return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0f33e621892f..ddf5d75e97a2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -793,9 +793,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct qstr *, struct qstr *, - struct iattr *, struct nfs_fh *, - struct nfs_fattr *); + int (*symlink) (struct inode *, struct dentry *, struct qstr *, + struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, From 873101b33776780d32610fc4c90c7358a5e98f51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:23 -0400 Subject: [PATCH 0855/1063] NFS: copy symlinks into page cache before sending NFS SYMLINK request Currently the NFS client does not cache symlinks it creates. They get cached only when the NFS client reads them back from the server. Copy the symlink into the page cache before sending it. Test plan: Connectathon, all NFS versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 86 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index affd3ae52e55..b483e5d206cb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1441,39 +1442,88 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) return error; } -static int -nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +/* + * To create a symbolic link, most file systems instantiate a new inode, + * add a page to it containing the path, then write it out to the disk + * using prepare_write/commit_write. + * + * Unfortunately the NFS client can't create the in-core inode first + * because it needs a file handle to create an in-core inode (see + * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the + * symlink request has completed on the server. + * + * So instead we allocate a raw page, copy the symname into it, then do + * the SYMLINK request with the page as the buffer. If it succeeds, we + * now have a new file handle and can instantiate an in-core NFS inode + * and move the raw page into its mapping. + */ +static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { + struct pagevec lru_pvec; + struct page *page; + char *kaddr; struct iattr attr; - struct qstr qsymname; + unsigned int pathlen = strlen(symname); + struct qstr qsymname = { + .name = symname, + .len = pathlen, + }; int error; dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name, symname); -#ifdef NFS_PARANOIA -if (dentry->d_inode) -printk("nfs_proc_symlink: %s/%s not negative!\n", -dentry->d_parent->d_name.name, dentry->d_name.name); -#endif - /* - * Fill in the sattr for the call. - * Note: SunOS 4.1.2 crashes if the mode isn't initialized! - */ - attr.ia_valid = ATTR_MODE; - attr.ia_mode = S_IFLNK | S_IRWXUGO; + if (pathlen > PAGE_SIZE) + return -ENAMETOOLONG; - qsymname.name = symname; - qsymname.len = strlen(symname); + attr.ia_mode = S_IFLNK | S_IRWXUGO; + attr.ia_valid = ATTR_MODE; lock_kernel(); + + page = alloc_page(GFP_KERNEL); + if (!page) { + unlock_kernel(); + return -ENOMEM; + } + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(kaddr, symname, pathlen); + if (pathlen < PAGE_SIZE) + memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); + kunmap_atomic(kaddr, KM_USER0); + + /* XXX: eventually this will pass in {page, pathlen}, + * instead of qsymname; need XDR changes for that */ nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr); nfs_end_data_update(dir); - if (!error) + if (error != 0) { + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", + dir->i_sb->s_id, dir->i_ino, + dentry->d_name.name, symname, error); d_drop(dentry); + __free_page(page); + unlock_kernel(); + return error; + } + + /* + * No big deal if we can't add this page to the page cache here. + * READLINK will get the missing page from the server if needed. + */ + pagevec_init(&lru_pvec, 0); + if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, + GFP_KERNEL)) { + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + SetPageUptodate(page); + unlock_page(page); + } else + __free_page(page); + unlock_kernel(); - return error; + return 0; } static int From 94a6d75320b3681e6e728b70e18bd186cb55e682 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Aug 2006 20:06:23 -0400 Subject: [PATCH 0856/1063] NFS: Use cached page as buffer for NFS symlink requests Now that we have a copy of the symlink path in the page cache, we can pass a struct page down to the XDR routines instead of a string buffer. Test plan: Connectathon, all NFS versions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +------- fs/nfs/nfs2xdr.c | 21 ++++++++++++++++++--- fs/nfs/nfs3proc.c | 14 +++++++------- fs/nfs/nfs3xdr.c | 7 +++++-- fs/nfs/nfs4proc.c | 12 +++++++----- fs/nfs/nfs4xdr.c | 8 ++++---- fs/nfs/proc.c | 14 +++++++------- include/linux/nfs_xdr.h | 17 ++++++++++------- 8 files changed, 59 insertions(+), 42 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b483e5d206cb..51328ae640dd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1464,10 +1464,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); - struct qstr qsymname = { - .name = symname, - .len = pathlen, - }; int error; dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, @@ -1493,10 +1489,8 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr, KM_USER0); - /* XXX: eventually this will pass in {page, pathlen}, - * instead of qsymname; need XDR changes for that */ nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr); + error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); nfs_end_data_update(dir); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 67391eef6b93..b49501fc0a79 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -51,7 +51,7 @@ #define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) #define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) #define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) -#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) +#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz) #define NFS_readdirargs_sz (NFS_fhandle_sz+2) #define NFS_attrstat_sz (1+NFS_fattr_sz) @@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) static int nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) { + struct xdr_buf *sndbuf = &req->rq_snd_buf; + size_t pad; + p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_array(p, args->topath, args->tolen); + *p++ = htonl(args->pathlen); + sndbuf->len = xdr_adjust_iovec(sndbuf->head, p); + + xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen); + + /* + * xdr_encode_pages may have added a few bytes to ensure the + * pathname ends on a 4-byte boundary. Start encoding the + * attributes after the pad bytes. + */ + pad = sndbuf->tail->iov_len; + if (pad > 0) + p++; p = xdr_encode_sattr(p, args->sattr); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad; return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d85ac427c326..f8688eaa0001 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -544,8 +544,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, - struct iattr *sattr) +nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) { struct nfs_fh fhandle; struct nfs_fattr fattr, dir_attr; @@ -553,8 +553,8 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, .fromfh = NFS_FH(dir), .fromname = dentry->d_name.name, .fromlen = dentry->d_name.len, - .topath = path->name, - .tolen = path->len, + .pages = &page, + .pathlen = len, .sattr = sattr }; struct nfs3_diropres res = { @@ -569,11 +569,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, }; int status; - if (path->len > NFS3_MAXPATHLEN) + if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, - path->name); + dprintk("NFS call symlink %s\n", dentry->d_name.name); + nfs_fattr_init(&dir_attr); nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 0250269e9753..16556fa4effb 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -56,7 +56,7 @@ #define NFS3_writeargs_sz (NFS3_fh_sz+5) #define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) #define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) -#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) +#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz) #define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) #define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) #define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) @@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); p = xdr_encode_sattr(p, args->sattr); - p = xdr_encode_array(p, args->topath, args->tolen); + *p++ = htonl(args->pathlen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + /* Copy the page */ + xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2d18eac6bee5..7f60beb40df3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2085,7 +2085,7 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct qstr *path, struct iattr *sattr) + struct page *page, unsigned int len, struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); struct nfs_fh fhandle; @@ -2111,10 +2111,11 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, }; int status; - if (path->len > NFS4_MAXPATHLEN) + if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; - arg.u.symlink = path; + arg.u.symlink.pages = &page; + arg.u.symlink.len = len; nfs_fattr_init(&fattr); nfs_fattr_init(&dir_fattr); @@ -2128,13 +2129,14 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, } static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct qstr *path, struct iattr *sattr) + struct page *page, unsigned int len, struct iattr *sattr) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, dentry, path, sattr), + _nfs4_proc_symlink(dir, dentry, page, + len, sattr), &exception); } while (exception.retry); return err; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 99926067eca4..3dd413f52da1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -128,7 +128,7 @@ static int nfs4_stat_to_errno(int); #define decode_link_maxsz (op_decode_hdr_maxsz + 5) #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ - nfs4_path_maxsz + \ + 1 + \ nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ @@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c switch (create->ftype) { case NF4LNK: - RESERVE_SPACE(4 + create->u.symlink->len); - WRITE32(create->u.symlink->len); - WRITEMEM(create->u.symlink->name, create->u.symlink->len); + RESERVE_SPACE(4); + WRITE32(create->u.symlink.len); + xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); break; case NF4BLK: case NF4CHR: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0b507bf0f330..630e50647bbb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -425,8 +425,8 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) } static int -nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, - struct iattr *sattr) +nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, + unsigned int len, struct iattr *sattr) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -434,8 +434,8 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, .fromfh = NFS_FH(dir), .fromname = dentry->d_name.name, .fromlen = dentry->d_name.len, - .topath = path->name, - .tolen = path->len, + .pages = &page, + .pathlen = len, .sattr = sattr }; struct rpc_message msg = { @@ -444,11 +444,11 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path, }; int status; - if (path->len > NFS2_MAXPATHLEN) + if (len > NFS2_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s -> %s\n", dentry->d_name.name, - path->name); + dprintk("NFS call symlink %s\n", dentry->d_name.name); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ddf5d75e97a2..dc5397d9d23c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -358,8 +358,8 @@ struct nfs_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -434,8 +434,8 @@ struct nfs3_symlinkargs { struct nfs_fh * fromfh; const char * fromname; unsigned int fromlen; - const char * topath; - unsigned int tolen; + struct page ** pages; + unsigned int pathlen; struct iattr * sattr; }; @@ -533,7 +533,10 @@ struct nfs4_accessres { struct nfs4_create_arg { u32 ftype; union { - struct qstr * symlink; /* NF4LNK */ + struct { + struct page ** pages; + unsigned int len; + } symlink; /* NF4LNK */ struct { u32 specdata1; u32 specdata2; @@ -793,8 +796,8 @@ struct nfs_rpc_ops { int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); - int (*symlink) (struct inode *, struct dentry *, struct qstr *, - struct iattr *); + int (*symlink) (struct inode *, struct dentry *, struct page *, + unsigned int, struct iattr *); int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:24 -0400 Subject: [PATCH 0857/1063] Add a real API for dealing with blk_congestion_wait() Signed-off-by: Trond Myklebust --- block/ll_rw_blk.c | 12 ++++++++++++ fs/nfs/write.c | 1 + include/linux/blkdev.h | 1 + include/linux/writeback.h | 1 + mm/page-writeback.c | 9 +++++++++ 5 files changed, 24 insertions(+) diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index ddd9253f9d55..dcbd6ff1fa33 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2734,6 +2734,18 @@ long blk_congestion_wait(int rw, long timeout) EXPORT_SYMBOL(blk_congestion_wait); +/** + * blk_congestion_end - wake up sleepers on a congestion queue + * @rw: READ or WRITE + */ +void blk_congestion_end(int rw) +{ + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + if (waitqueue_active(wqh)) + wake_up(wqh); +} + /* * Has to be called with the request spinlock acquired */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 453d44666ea5..38ba5c09af08 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) out: clear_bit(BDI_write_congested, &bdi->state); wake_up_all(&nfs_write_congestion); + writeback_congestion_end(); return err; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aafe82788b4e..96c9040c00a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -746,6 +746,7 @@ extern void blk_queue_free_tags(request_queue_t *); extern int blk_queue_resize_tags(request_queue_t *, int); extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); +extern void blk_congestion_end(int rw); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); extern int blkdev_issue_flush(struct block_device *, sector_t *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9e38b566d0e7..0422036af4eb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(void); +void writeback_congestion_end(void); /* These are exported to sysctl. */ extern int dirty_background_ratio; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e630188ccc40..77a0bc4e261a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -802,6 +802,15 @@ int test_set_page_writeback(struct page *page) } EXPORT_SYMBOL(test_set_page_writeback); +/* + * Wakes up tasks that are being throttled due to writeback congestion + */ +void writeback_congestion_end(void) +{ + blk_congestion_end(WRITE); +} +EXPORT_SYMBOL(writeback_congestion_end); + /* * Return true if any of the pages in the mapping are marged with the * passed tag. From 5dd3177ae5012c1e2ad7a9ffdbd0e0d0de2f60e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:05 -0400 Subject: [PATCH 0858/1063] NFSv4: Fix a use-after-free issue with the nfs server. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 36 +++++++++++++++++++++--------------- fs/nfs/nfs4renewd.c | 1 + fs/nfs/super.c | 8 +++++--- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 12941a8a6d75..f1ff2aec2ca5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -164,6 +164,26 @@ error_0: return NULL; } +static void nfs4_shutdown_client(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + while (!list_empty(&clp->cl_unused)) { + struct nfs4_state_owner *sp; + + sp = list_entry(clp->cl_unused.next, + struct nfs4_state_owner, + so_list); + list_del(&sp->so_list); + kfree(sp); + } + BUG_ON(!list_empty(&clp->cl_state_owners)); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); +#endif +} + /* * Destroy a shared client record */ @@ -171,21 +191,7 @@ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion); -#ifdef CONFIG_NFS_V4 - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) { - while (!list_empty(&clp->cl_unused)) { - struct nfs4_state_owner *sp; - - sp = list_entry(clp->cl_unused.next, - struct nfs4_state_owner, - so_list); - list_del(&sp->so_list); - kfree(sp); - } - BUG_ON(!list_empty(&clp->cl_state_owners)); - nfs_idmap_delete(clp); - } -#endif + nfs4_shutdown_client(clp); /* -EIO all pending I/O */ if (!IS_ERR(clp->cl_rpcclient)) diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index f2c893690ac4..7b6df1852e75 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -121,6 +121,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) __FUNCTION__, (timeout + HZ - 1) / HZ); cancel_delayed_work(&clp->cl_renewd); schedule_delayed_work(&clp->cl_renewd, timeout); + set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 97cfb143e09f..665949d27798 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -883,13 +883,15 @@ static int nfs4_get_sb(struct file_system_type *fs_type, goto out_free; } + if (s->s_fs_info != server) { + nfs_free_server(server); + server = NULL; + } + if (!s->s_root) { /* initial superblock/root creation */ s->s_flags = flags; - nfs4_fill_super(s); - } else { - nfs_free_server(server); } mntroot = nfs4_get_root(s, &mntfh); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6d0be0efd1b5..7ccfc7ef0a83 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -19,6 +19,7 @@ struct nfs_client { #define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ +#define NFS_CS_RENEWD 3 /* - renewd started */ struct sockaddr_in cl_addr; /* server identifier */ char * cl_hostname; /* hostname of server */ struct list_head cl_share_link; /* link in global client list */ From 158998b6fe36f6acef087f574c96d44713499cc9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 01:03:17 -0400 Subject: [PATCH 0859/1063] SUNRPC: Make rpc_mkpipe() take the parent dentry as an argument Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 6 +---- include/linux/sunrpc/rpc_pipe_fs.h | 2 +- net/sunrpc/auth_gss/auth_gss.c | 7 ++---- net/sunrpc/rpc_pipe.c | 38 ++++++++++++++++++------------ 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index f96dfac7dc9a..82ad7110a1c0 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -84,7 +84,6 @@ struct idmap_hashtable { }; struct idmap { - char idmap_path[48]; struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; @@ -119,10 +118,7 @@ nfs_idmap_new(struct nfs_client *clp) if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) return -ENOMEM; - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", clp->cl_rpcclient->cl_pathname); - - idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a481472c9484..a2eb9b4a9de3 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -43,7 +43,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ef1cf5b476c8..6eed3e166ba3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -88,7 +88,6 @@ struct gss_auth { struct list_head upcalls; struct rpc_clnt *client; struct dentry *dentry; - char path[48]; spinlock_t lock; }; @@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) if (err) goto err_put_mech; - snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", - clnt->cl_pathname, - gss_auth->mech->gm_name); - gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(gss_auth->dentry)) { err = PTR_ERR(gss_auth->dentry); goto err_put_mech; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c21dc07f2a8c..11ec12a09d70 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -621,17 +621,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) } static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +rpc_lookup_create(struct dentry *parent, const char *name, int len) { + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dir = nd->dentry->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); + dentry = lookup_one_len(name, parent, len); if (IS_ERR(dentry)) goto out_err; if (dentry->d_inode) { @@ -642,7 +638,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd) return dentry; out_err: mutex_unlock(&dir->i_mutex); - rpc_release_path(nd); + return dentry; +} + +static struct dentry * +rpc_lookup_negative(char *path, struct nameidata *nd) +{ + struct dentry *dentry; + int error; + + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); + dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); + if (IS_ERR(dentry)) + rpc_release_path(nd); return dentry; } @@ -701,17 +710,16 @@ rpc_rmdir(struct dentry *dentry) } struct dentry * -rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) +rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { - struct nameidata nd; struct dentry *dentry; struct inode *dir, *inode; struct rpc_inode *rpci; - dentry = rpc_lookup_negative(path, &nd); + dentry = rpc_lookup_create(parent, name, strlen(name)); if (IS_ERR(dentry)) return dentry; - dir = nd.dentry->d_inode; + dir = parent->d_inode; inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; @@ -726,13 +734,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) dget(dentry); out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); - printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", - __FILE__, __FUNCTION__, path, -ENOMEM); + printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", + __FILE__, __FUNCTION__, parent->d_name.name, name, + -ENOMEM); goto out; } From 6daabf1b04c89f1fbd8eab5450261360943c8e20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Aug 2006 15:44:16 -0400 Subject: [PATCH 0860/1063] NFS: Fix up compiler warnings on 64-bit platforms in client.c Fix up warnings from compiling on ppc64. Signed-Off-By: David Howells Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f1ff2aec2ca5..a4aa47913a5c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -836,7 +836,9 @@ struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, } memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); - dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); BUG_ON(!server->nfs_client); BUG_ON(!server->nfs_client->rpc_ops); @@ -1002,7 +1004,9 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, if (error < 0) goto error; - dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor); + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); dprintk("Mount FH: %d\n", mntfh->size); error = nfs_probe_fsinfo(server, mntfh, &fattr); @@ -1074,7 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, goto error; dprintk("Referral FSID: %llx:%llx\n", - server->fsid.major, server->fsid.minor); + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); spin_lock(&nfs_client_lock); list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); @@ -1106,7 +1111,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, int error; dprintk("--> nfs_clone_server(,%llx:%llx,)\n", - fattr->fsid.major, fattr->fsid.minor); + (unsigned long long) fattr->fsid.major, + (unsigned long long) fattr->fsid.minor); server = nfs_alloc_server(); if (!server) @@ -1131,7 +1137,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, goto out_free_server; dprintk("Cloned FSID: %llx:%llx\n", - server->fsid.major, server->fsid.minor); + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); error = nfs_start_lockd(server); if (error < 0) @@ -1375,7 +1382,8 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) MAJOR(server->s_dev), MINOR(server->s_dev)); snprintf(fsid, 17, "%llx:%llx", - server->fsid.major, server->fsid.minor); + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n", clp->cl_nfsversion, From 058ad9cbf14b3c7480d01b20280cb4d5858f7a50 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 27 Aug 2006 17:23:53 -0400 Subject: [PATCH 0861/1063] NFS: NFS_ROOT should use the new rpc_create API Teach NFS_ROOT to use the new rpc_create API instead of the old two-call API for creating an RPC transport. Test plan: Compile the kernel with the NFS client build-in, and set CONFIG_NFS_ROOT. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 41274874b9a5..d507b021207f 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -76,22 +76,19 @@ static struct rpc_clnt * mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, int protocol) { - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; + struct rpc_create_args args = { + .protocol = protocol, + .address = (struct sockaddr *)srvaddr, + .addrsize = sizeof(*srvaddr), + .servername = hostname, + .program = &mnt_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_INTR), + }; - xprt = xprt_create_proto(protocol, srvaddr, NULL); - if (IS_ERR(xprt)) - return (struct rpc_clnt *)xprt; - - clnt = rpc_create_client(xprt, hostname, - &mnt_program, version, - RPC_AUTH_UNIX); - if (!IS_ERR(clnt)) { - clnt->cl_softrtry = 1; - clnt->cl_oneshot = 1; - clnt->cl_intr = 1; - } - return clnt; + return rpc_create(&args); } /* From 297de4f65698ee1e1c75e27d57933b5fa8227e72 Mon Sep 17 00:00:00 2001 From: "andros@citi.umich.edu" Date: Tue, 29 Aug 2006 12:19:41 -0400 Subject: [PATCH 0862/1063] Fix a referral error Oops Fix an oops when the referral server is not responding. Check the error return from nfs4_set_client() in nfs4_create_referral_server. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a4aa47913a5c..110f80e7bd4c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1059,6 +1059,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, parent_server->client->cl_xprt->prot, parent_client->retrans_timeo, parent_client->retrans_count); + if (error < 0) + goto error; /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); From 8014793b1b2869445adfe678d64cdacd10e99d53 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Aug 2006 18:24:08 -0400 Subject: [PATCH 0863/1063] SUNRPC: rpc_delay() should not clobber the rpc_task->tk_status Doing so prevents stuff like call_encode() from working correctly. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ecf366351bf7..6390461a9756 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -542,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) spin_unlock_bh(&queue->lock); } +static void __rpc_atrun(struct rpc_task *task) +{ + rpc_wake_up_task(task); +} + /* * Run a task at a later time */ -static void __rpc_atrun(struct rpc_task *); -void -rpc_delay(struct rpc_task *task, unsigned long delay) +void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); } -static void -__rpc_atrun(struct rpc_task *task) -{ - task->tk_status = 0; - rpc_wake_up_task(task); -} - /* * Helper to call task->tk_ops->rpc_call_prepare */ From 76303992b4701124f4cd0791ae2049ab4332f02c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Aug 2006 14:32:49 -0400 Subject: [PATCH 0864/1063] SUNRPC: Handle ENETUNREACH, EHOSTUNREACH and EHOSTDOWN socket errors In case of any of the above errors occuring, delay for 3 seconds, then handle as if it were a timeout error. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 87efcd207f23..355e7863c0aa 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1030,6 +1030,14 @@ call_status(struct rpc_task *task) task->tk_status = 0; switch(status) { + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + /* + * Delay any retries for 3 seconds, then handle as if it + * were a timeout. + */ + rpc_delay(task, 3*HZ); case -ETIMEDOUT: task->tk_action = call_timeout; break; From da45828e2835057045150b318c4fbe9bb91f18dd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Aug 2006 15:44:52 -0400 Subject: [PATCH 0865/1063] SUNRPC: Clean up soft task error handling - Ensure that the task aborts the RPC call only when it has actually timed out. - Ensure that req->rq_majortimeo is initialised correctly. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 34 ++++++++++++++-------------------- net/sunrpc/xprt.c | 8 +------- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 355e7863c0aa..ceadb728f0da 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -863,15 +863,11 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", task->tk_pid); rpc_delay(task, 3*HZ); - goto retry_bind; + goto retry_timeout; case -ETIMEDOUT: dprintk("RPC: %4d rpcbind request timed out\n", task->tk_pid); - if (RPC_IS_SOFT(task)) { - status = -EIO; - break; - } - goto retry_bind; + goto retry_timeout; case -EPFNOSUPPORT: dprintk("RPC: %4d remote rpcbind service unavailable\n", task->tk_pid); @@ -884,16 +880,13 @@ call_bind_status(struct rpc_task *task) dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); status = -EIO; - break; } rpc_exit(task, status); return; -retry_bind: - task->tk_status = 0; - task->tk_action = call_bind; - return; +retry_timeout: + task->tk_action = call_timeout; } /* @@ -941,14 +934,16 @@ call_connect_status(struct rpc_task *task) switch (status) { case -ENOTCONN: - case -ETIMEDOUT: case -EAGAIN: task->tk_action = call_bind; - break; - default: - rpc_exit(task, -EIO); - break; + if (!RPC_IS_SOFT(task)) + return; + /* if soft mounted, test if we've timed out */ + case -ETIMEDOUT: + task->tk_action = call_timeout; + return; } + rpc_exit(task, -EIO); } /* @@ -1057,7 +1052,6 @@ call_status(struct rpc_task *task) printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); - break; } } @@ -1125,10 +1119,10 @@ call_decode(struct rpc_task *task) clnt->cl_stats->rpcretrans++; goto out_retry; } - printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", + dprintk("%s: too small RPC reply size (%d bytes)\n", clnt->cl_protname, task->tk_status); - rpc_exit(task, -EIO); - return; + task->tk_action = call_timeout; + goto out_retry; } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e4f64fb58ff2..a85f82baefc1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task) task->tk_pid, -task->tk_status, task->tk_client->cl_server); xprt_release_write(xprt, task); task->tk_status = -EIO; - return; - } - - /* if soft mounted, just cause this RPC to fail */ - if (RPC_IS_SOFT(task)) { - xprt_release_write(xprt, task); - task->tk_status = -EIO; } } @@ -829,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; + xprt_reset_majortimeo(req); dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, ntohl(req->rq_xid)); } From 6b6ca86b77b62b798cf9ca2599036420abce7796 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2006 12:55:57 -0400 Subject: [PATCH 0866/1063] SUNRPC: Add refcounting to the struct rpc_xprt In a subsequent patch, this will allow the portmapper to take a reference to the rpc_xprt for which it is updating the port number, fixing an Oops. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++++- net/sunrpc/clnt.c | 8 +++----- net/sunrpc/xprt.c | 30 ++++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index de4efea7c856..bdeba8538c71 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,7 @@ struct rpc_xprt_ops { }; struct rpc_xprt { + struct kref kref; /* Reference count */ struct rpc_xprt_ops * ops; /* transport methods */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -248,7 +250,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); -int xprt_destroy(struct rpc_xprt *xprt); +struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); +void xprt_put(struct rpc_xprt *xprt); static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ceadb728f0da..084a0ad5c64e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -177,7 +177,7 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: - xprt_destroy(xprt); + xprt_put(xprt); out_no_xprt: return ERR_PTR(err); } @@ -261,6 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt) atomic_set(&new->cl_users, 0); new->cl_parent = clnt; atomic_inc(&clnt->cl_count); + new->cl_xprt = xprt_get(clnt->cl_xprt); /* Turn off autobind on clones */ new->cl_autobind = 0; new->cl_oneshot = 0; @@ -337,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_rmdir(clnt->cl_dentry); rpc_put_mount(); } - if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); - clnt->cl_xprt = NULL; - } if (clnt->cl_server != clnt->cl_inline_name) kfree(clnt->cl_server); out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; + xprt_put(clnt->cl_xprt); kfree(clnt); return 0; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a85f82baefc1..1f786f68729d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -926,6 +926,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si return ERR_PTR(result); } + kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); @@ -958,16 +959,37 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @xprt: transport to destroy + * @kref: kref for the transport to destroy * */ -int xprt_destroy(struct rpc_xprt *xprt) +static void xprt_destroy(struct kref *kref) { + struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); + dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); xprt->ops->destroy(xprt); kfree(xprt); - - return 0; +} + +/** + * xprt_put - release a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +void xprt_put(struct rpc_xprt *xprt) +{ + kref_put(&xprt->kref, xprt_destroy); +} + +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + kref_get(&xprt->kref); + return xprt; } From 762d4527c2fc19d821a13d9a3455ccc2d4073731 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Sep 2006 00:51:55 -0400 Subject: [PATCH 0867/1063] SUNRPC: Fix Oops in pmap_getport_done There is no guarantee that the parent task still exists when we exit from the portmapper. Save the xprt instead. Signed-off-by: Trond Myklebust --- net/sunrpc/pmap_clnt.c | 46 ++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index f476f4df0f48..c04609d3476a 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -30,7 +30,7 @@ struct portmap_args { u32 pm_vers; u32 pm_prot; unsigned short pm_port; - struct rpc_task * pm_task; + struct rpc_xprt * pm_xprt; }; static struct rpc_procinfo pmap_procedures[]; @@ -71,10 +71,10 @@ static const struct rpc_call_ops pmap_getport_ops = { .rpc_release = pmap_map_release, }; -static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt) +static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) { xprt_clear_binding(xprt); - rpc_wake_up(&xprt->binding); + rpc_wake_up_status(&xprt->binding, status); } /** @@ -92,6 +92,7 @@ void rpc_getport(struct rpc_task *task) struct portmap_args *map; struct rpc_clnt *pmap_clnt; struct rpc_task *child; + int status; dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", task->tk_pid, clnt->cl_server, @@ -107,34 +108,30 @@ void rpc_getport(struct rpc_task *task) } /* Someone else may have bound if we slept */ - if (xprt_bound(xprt)) { - task->tk_status = 0; + status = 0; + if (xprt_bound(xprt)) goto bailout_nofree; - } + status = -ENOMEM; map = pmap_map_alloc(); - if (!map) { - task->tk_status = -ENOMEM; + if (!map) goto bailout_nofree; - } map->pm_prog = clnt->cl_prog; map->pm_vers = clnt->cl_vers; map->pm_prot = xprt->prot; map->pm_port = 0; - map->pm_task = task; + map->pm_xprt = xprt_get(xprt); rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); - if (IS_ERR(pmap_clnt)) { - task->tk_status = PTR_ERR(pmap_clnt); + status = PTR_ERR(pmap_clnt); + if (IS_ERR(pmap_clnt)) goto bailout; - } + status = -EIO; child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); - if (IS_ERR(child)) { - task->tk_status = -EIO; + if (IS_ERR(child)) goto bailout; - } rpc_release_task(child); rpc_sleep_on(&xprt->binding, task, NULL, NULL); @@ -144,8 +141,10 @@ void rpc_getport(struct rpc_task *task) bailout: pmap_map_free(map); + xprt_put(xprt); bailout_nofree: - pmap_wake_portmap_waiters(xprt); + task->tk_status = status; + pmap_wake_portmap_waiters(xprt, status); } #ifdef CONFIG_ROOT_NFS @@ -201,29 +200,28 @@ int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int pr static void pmap_getport_done(struct rpc_task *child, void *data) { struct portmap_args *map = data; - struct rpc_task *task = map->pm_task; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = map->pm_xprt; int status = child->tk_status; if (status < 0) { /* Portmapper not available */ xprt->ops->set_port(xprt, 0); - task->tk_status = status; } else if (map->pm_port == 0) { /* Requested RPC service wasn't registered */ xprt->ops->set_port(xprt, 0); - task->tk_status = -EACCES; + status = -EACCES; } else { /* Succeeded */ xprt->ops->set_port(xprt, map->pm_port); xprt_set_bound(xprt); - task->tk_status = 0; + status = 0; } dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", - child->tk_pid, child->tk_status, map->pm_port); + child->tk_pid, status, map->pm_port); - pmap_wake_portmap_waiters(xprt); + pmap_wake_portmap_waiters(xprt, status); + xprt_put(xprt); } /** From fd6840714d9cf6e93f1d42b904860a94df316b85 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Sep 2006 12:27:44 -0400 Subject: [PATCH 0868/1063] NFS: nfs_lookup - don't hash dentry when optimising away the lookup If the open intents tell us that a given lookup is going to result in a, exclusive create, we currently optimize away the lookup call itself. The reason is that the lookup would not be atomic with the create RPC call, so why do it in the first place? A problem occurs, however, if the VFS aborts the exclusive create operation after the lookup, but before the call to create the file/directory: in this case we will end up with a hashed negative dentry in the dcache that has never been looked up. Fix this by only actually hashing the dentry once the create operation has been successfully completed. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 51328ae640dd..3419c2da9ba9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -904,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru lock_kernel(); - /* If we're doing an exclusive create, optimize away the lookup */ - if (nfs_is_exclusive_create(dir, nd)) - goto no_entry; + /* + * If we're doing an exclusive create, optimize away the lookup + * but don't hash the dentry. + */ + if (nfs_is_exclusive_create(dir, nd)) { + d_instantiate(dentry, NULL); + res = NULL; + goto out_unlock; + } error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error == -ENOENT) @@ -1161,6 +1167,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (IS_ERR(inode)) return error; d_instantiate(dentry, inode); + if (d_unhashed(dentry)) + d_rehash(dentry); return 0; } From 2dec51466a08ac1c67da41bfd0518d43d983a2eb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 12 Sep 2006 11:53:23 -0400 Subject: [PATCH 0869/1063] NFSv4: It's perfectly legal for clp to be NULL here.... Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 110f80e7bd4c..ec1938d4b814 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -269,7 +269,7 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio clp = __nfs_find_client(addr, nfsversion); spin_unlock(&nfs_client_lock); - BUG_ON(clp->cl_cons_state == 0); + BUG_ON(clp && clp->cl_cons_state == 0); return clp; } From 5f004cf2aa8494708fd8d78e78142b7b2748e765 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Sep 2006 14:03:14 -0400 Subject: [PATCH 0870/1063] NFS: Make read() return an ESTALE if the file has been deleted Currently, a read() request will return EIO even if the file has been deleted on the server, simply because that is what the VM will return if the call to readpage() fails to update the page. Ensure that readpage() marks the inode as stale if it receives an ESTALE. Then return that error to userland. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index dae33c1e8a77..69f1549da2b9 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); - /* Is this a short read? */ - if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { + if (task->tk_status < 0) { + if (task->tk_status == -ESTALE) { + set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); + nfs_mark_for_revalidate(data->inode); + } + } else if (resp->count < argp->count && !resp->eof) { + /* This is a short read! */ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count != 0) { @@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page) if (error) goto out_error; + error = -ESTALE; + if (NFS_STALE(inode)) + goto out_error; + if (file == NULL) { ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (ctx == NULL) @@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, }; struct inode *inode = mapping->host; struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = -ESTALE; dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", inode->i_sb->s_id, @@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + if (NFS_STALE(inode)) + goto out; + if (filp == NULL) { desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); if (desc.ctx == NULL) @@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, ret = err; } put_nfs_open_context(desc.ctx); +out: return ret; } From 97db8f41792839a6912fd21be8b61dd6c50db58f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Sep 2006 14:03:14 -0400 Subject: [PATCH 0871/1063] NFS: Don't invalidate the symlink we just stuffed into the cache And slight optimisation of nfs_end_data_update(): directories never have delegations anyway. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cb5c65f0bc12..a56add0bc5b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -717,13 +717,11 @@ void nfs_end_data_update(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - if (!nfs_have_delegation(inode, FMODE_READ)) { - /* Directories and symlinks: invalidate page cache */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); - } + /* Directories: invalidate page cache */ + if (S_ISDIR(inode->i_mode)) { + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); } nfsi->cache_change_attribute = jiffies; atomic_dec(&nfsi->data_updates); From 6b30954ebb569fa1b2abdb21f2f4290eec76bf80 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Sep 2006 14:03:14 -0400 Subject: [PATCH 0872/1063] NFSv4: Retry lease recovery if it failed during a synchronous operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f60beb40df3..c218cc450b92 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -793,10 +793,17 @@ out: int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + int ret; - if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + for (;;) { + ret = nfs4_wait_clnt_recover(server->client, clp); + if (ret != 0) + return ret; + if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + break; nfs4_schedule_state_recovery(clp); - return nfs4_wait_clnt_recover(server->client, clp); + } + return 0; } /* From c514983d8d2260020543a81589a2b8c7d4bdab4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Sep 2006 08:25:04 -0400 Subject: [PATCH 0873/1063] NFSv4: Handle the condition NFS4ERR_FILE_OPEN Retry a few times before we give up: the error is usually due to ordering issues with asynchronous RPC calls. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c218cc450b92..c49ac3ea1dc3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2891,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct if (ret == 0) exception->retry = 1; break; + case -NFS4ERR_FILE_OPEN: case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: ret = nfs4_delay(server->client, &exception->timeout); From 2066fe89b459c3c787c811b3369df191cddd93d8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Sep 2006 08:30:46 -0400 Subject: [PATCH 0874/1063] NFSv4: Poll more aggressively when handling NFS4ERR_DELAY Change the initial retry delay from 1s to 0.1s (and then back off exponentially). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c49ac3ea1dc3..47c7e6e3910d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -55,7 +55,7 @@ #define NFSDBG_FACILITY NFSDBG_PROC -#define NFS4_POLL_RETRY_MIN (1*HZ) +#define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) struct nfs4_opendata; From 51b6ded4d9a94a61035deba1d8f51a54e3a3dd86 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Sep 2006 16:31:56 -0400 Subject: [PATCH 0875/1063] NFSv4: When mounting with a port=0 argument, substitute port=2049 RFC3530 states that the registered port 2049 for the NFS protocol should be the default configuration in order to allow clients not to use the RPC binding protocols. If the mount program sends us a port=0, we therefore substitute port=2049. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 665949d27798..b99113b0f65f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -833,6 +833,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, __FUNCTION__); return -EINVAL; } + /* RFC3530: The default port for NFS is 2049 */ + if (addr.sin_port == 0) + addr.sin_port = NFS_PORT; /* Grab the authentication type */ authflavour = RPC_AUTH_UNIX; From aec5e175288c711cbe44750276f61efa3fa3d370 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Sat, 16 Sep 2006 21:09:32 -0400 Subject: [PATCH 0876/1063] NFS: Use SEEK_END instead of hardcoded value Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a146ed338534..be997d649127 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -157,7 +157,7 @@ force_reval: static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { /* origin == SEEK_END => we must revalidate the cached file length */ - if (origin == 2) { + if (origin == SEEK_END) { struct inode *inode = filp->f_mapping->host; int retval = nfs_revalidate_file_size(inode, filp); if (retval < 0) From a53a3c58fd83e572a7c768d88b4c4e9840a57e82 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Wed, 6 Sep 2006 11:51:21 -0400 Subject: [PATCH 0877/1063] NFSv4: rpc_mkpipe creating socket inodes w/out sk buffers This patch stop rpc_mkpipe from create S_IFSOCK nodes what don't have associated sk buffers attached (which causes SELinux to oops during NFSv4 mounts). Instead the S_IFIFO mode bit is set which probably make more sense and seems to work just fine during my connectathon and fsx testing... Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 11ec12a09d70..dfa504fe383f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -720,7 +720,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi if (IS_ERR(dentry)) return dentry; dir = parent->d_inode; - inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); + inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); if (!inode) goto err_dput; inode->i_ino = iunique(dir->i_sb, 100); From f551e44ff11d3e2ec8f37907bb88ec2433cc8b74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Sep 2006 14:33:04 -0400 Subject: [PATCH 0878/1063] NFS: add comments clarifying the use of nfs_post_op_update() Comments-only change to clarify a detail of the NFS protocol and how it is implemented in Linux. Test plan: None. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++++ fs/nfs/write.c | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a56add0bc5b7..e8c143d182c4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -840,6 +840,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. + * + * NB: if the server didn't return any post op attributes, this + * function will force the retrieval of attributes before the next + * NFS request. Thus it should be used only for operations that + * are expected to change one or more attributes, to avoid + * unnecessary NFS requests and trips through nfs_update_inode(). */ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 38ba5c09af08..c12effb46fe5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1253,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) dprintk("NFS: %4d nfs_writeback_done (status %d)\n", task->tk_pid, task->tk_status); - /* Call the NFS version-specific code */ + /* + * ->write_done will attempt to use post-op attributes to detect + * conflicting writes by other clients. A strict interpretation + * of close-to-open would allow us to continue caching even if + * another writer had changed the file, but some applications + * depend on tighter cache coherency when writing. + */ status = NFS_PROTO(data->inode)->write_done(task, data); if (status != 0) return status; From 026ed5c9185dcc4b2df92e98c3d61a01cea19cbf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Sep 2006 14:33:07 -0400 Subject: [PATCH 0879/1063] NFS: unmark NFS direct I/O as experimental Remove the EXPERIMENTAL flag from the NFS_DIRECTIO option. Test plan: Unset the EXPERIMENTAL kernel build option and check to see that the NFS direct I/O option is still available. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 530581628311..a27002668bd3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1471,8 +1471,8 @@ config NFS_V4 If unsure, say N. config NFS_DIRECTIO - bool "Allow direct I/O on NFS files (EXPERIMENTAL)" - depends on NFS_FS && EXPERIMENTAL + bool "Allow direct I/O on NFS files" + depends on NFS_FS help This option enables applications to perform uncached I/O on files in NFS file systems using the O_DIRECT open() flag. When O_DIRECT From 42750b04c5baa7c5ffdf0a8be2b9b320efdf069f Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 1 Jun 2006 18:34:01 +0200 Subject: [PATCH 0880/1063] [ALSA] Control API - TLV implementation for additional information like dB scale This patch implements a TLV mechanism to transfer an additional information like dB scale to the user space. The types might be extended in future. Acked-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/asound.h | 9 ++++++- include/sound/control.h | 2 ++ include/sound/tlv.h | 43 +++++++++++++++++++++++++++++++++ sound/core/control.c | 41 +++++++++++++++++++++++++++++-- sound/pci/ca0106/ca0106_mixer.c | 4 +++ 5 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 include/sound/tlv.h diff --git a/include/sound/asound.h b/include/sound/asound.h index 41885f48ad91..76a20406bd18 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -688,7 +688,7 @@ struct snd_timer_tread { * * ****************************************************************************/ -#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 3) +#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) struct snd_ctl_card_info { int card; /* card number */ @@ -818,6 +818,12 @@ struct snd_ctl_elem_value { unsigned char reserved[128-sizeof(struct timespec)]; }; +struct snd_ctl_tlv { + unsigned int numid; /* control element numeric identification */ + unsigned int length; /* in bytes aligned to 4 */ + unsigned int tlv[0]; /* first TLV */ +}; + enum { SNDRV_CTL_IOCTL_PVERSION = _IOR('U', 0x00, int), SNDRV_CTL_IOCTL_CARD_INFO = _IOR('U', 0x01, struct snd_ctl_card_info), @@ -831,6 +837,7 @@ enum { SNDRV_CTL_IOCTL_ELEM_ADD = _IOWR('U', 0x17, struct snd_ctl_elem_info), SNDRV_CTL_IOCTL_ELEM_REPLACE = _IOWR('U', 0x18, struct snd_ctl_elem_info), SNDRV_CTL_IOCTL_ELEM_REMOVE = _IOWR('U', 0x19, struct snd_ctl_elem_id), + SNDRV_CTL_IOCTL_TLV_READ = _IOWR('U', 0x1a, struct snd_ctl_tlv), SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE = _IOWR('U', 0x20, int), SNDRV_CTL_IOCTL_HWDEP_INFO = _IOR('U', 0x21, struct snd_hwdep_info), SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE = _IOR('U', 0x30, int), diff --git a/include/sound/control.h b/include/sound/control.h index 2489b1eb0110..a93a58d0e688 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -42,6 +42,7 @@ struct snd_kcontrol_new { snd_kcontrol_info_t *info; snd_kcontrol_get_t *get; snd_kcontrol_put_t *put; + unsigned int *tlv; unsigned long private_value; }; @@ -58,6 +59,7 @@ struct snd_kcontrol { snd_kcontrol_info_t *info; snd_kcontrol_get_t *get; snd_kcontrol_put_t *put; + unsigned int *tlv; unsigned long private_value; void *private_data; void (*private_free)(struct snd_kcontrol *kcontrol); diff --git a/include/sound/tlv.h b/include/sound/tlv.h new file mode 100644 index 000000000000..b826e1df1da6 --- /dev/null +++ b/include/sound/tlv.h @@ -0,0 +1,43 @@ +#ifndef __SOUND_TLV_H +#define __SOUND_TLV_H + +/* + * Advanced Linux Sound Architecture - ALSA - Driver + * Copyright (c) 2006 by Jaroslav Kysela + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * TLV structure is right behind the struct snd_ctl_tlv: + * unsigned int type - see SNDRV_CTL_TLVT_* + * unsigned int length + * .... data aligned to sizeof(unsigned int), use + * block_length = (length + (sizeof(unsigned int) - 1)) & + * ~(sizeof(unsigned int) - 1)) .... + */ + +#define SNDRV_CTL_TLVT_CONTAINER 0 /* one level down - group of TLVs */ +#define SNDRV_CTL_TLVT_DB_SCALE 1 /* dB scale */ + +#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \ +unsigned int name[] = { \ + SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \ + (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \ +} + +#endif /* __SOUND_TLV_H */ diff --git a/sound/core/control.c b/sound/core/control.c index bb397eaa7187..e9c8854d2f7b 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -241,6 +241,7 @@ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol, kctl.info = ncontrol->info; kctl.get = ncontrol->get; kctl.put = ncontrol->put; + kctl.tlv = ncontrol->tlv; kctl.private_value = ncontrol->private_value; kctl.private_data = private_data; return snd_ctl_new(&kctl, access); @@ -1067,6 +1068,40 @@ static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr) return 0; } +static int snd_ctl_tlv_read(struct snd_card *card, + struct snd_ctl_tlv __user *_tlv) +{ + struct snd_ctl_tlv tlv; + struct snd_kcontrol *kctl; + unsigned int len; + int err = 0; + + if (copy_from_user(&tlv, _tlv, sizeof(tlv))) + return -EFAULT; + if (tlv.length < sizeof(unsigned int) * 3) + return -EINVAL; + down_read(&card->controls_rwsem); + kctl = snd_ctl_find_numid(card, tlv.numid); + if (kctl == NULL) { + err = -ENOENT; + goto __kctl_end; + } + if (kctl->tlv == NULL) { + err = -ENXIO; + goto __kctl_end; + } + len = kctl->tlv[1] + 2 * sizeof(unsigned int); + if (tlv.length < len) { + err = -ENOMEM; + goto __kctl_end; + } + if (copy_to_user(_tlv->tlv, kctl->tlv, len)) + err = -EFAULT; + __kctl_end: + up_read(&card->controls_rwsem); + return err; +} + static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_ctl_file *ctl; @@ -1086,11 +1121,11 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg case SNDRV_CTL_IOCTL_CARD_INFO: return snd_ctl_card_info(card, ctl, cmd, argp); case SNDRV_CTL_IOCTL_ELEM_LIST: - return snd_ctl_elem_list(ctl->card, argp); + return snd_ctl_elem_list(card, argp); case SNDRV_CTL_IOCTL_ELEM_INFO: return snd_ctl_elem_info_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_READ: - return snd_ctl_elem_read_user(ctl->card, argp); + return snd_ctl_elem_read_user(card, argp); case SNDRV_CTL_IOCTL_ELEM_WRITE: return snd_ctl_elem_write_user(ctl, argp); case SNDRV_CTL_IOCTL_ELEM_LOCK: @@ -1105,6 +1140,8 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg return snd_ctl_elem_remove(ctl, argp); case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS: return snd_ctl_subscribe_events(ctl, ip); + case SNDRV_CTL_IOCTL_TLV_READ: + return snd_ctl_tlv_read(card, argp); case SNDRV_CTL_IOCTL_POWER: return -ENOPROTOOPT; case SNDRV_CTL_IOCTL_POWER_STATE: diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index 146eed70dce6..35309b3ed8c0 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -70,9 +70,12 @@ #include #include #include +#include #include "ca0106.h" +static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale, -5150, 75, 1); + static int snd_ca0106_shared_spdif_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -472,6 +475,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol, .info = snd_ca0106_volume_info, \ .get = snd_ca0106_volume_get, \ .put = snd_ca0106_volume_put, \ + .tlv = snd_ca0106_db_scale, \ .private_value = ((chid) << 8) | (reg) \ } From 746d4a02e68499fc6c1f8d0c43d2271853ade181 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Jun 2006 14:37:59 +0200 Subject: [PATCH 0881/1063] [ALSA] Fix disconnection of proc interface - Add the linked list to each proc entry to enable a single-shot disconnection (unregister) - Deprecate snd_info_unregister(), use snd_info_free_entry() - Removed NULL checks of snd_info_free_entry() Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/info.h | 7 +- sound/core/hwdep.c | 2 +- sound/core/info.c | 102 ++++++++++++++-------------- sound/core/info_oss.c | 6 +- sound/core/init.c | 7 +- sound/core/oss/mixer_oss.c | 6 +- sound/core/oss/pcm_oss.c | 8 +-- sound/core/pcm.c | 53 +++++---------- sound/core/pcm_memory.c | 2 +- sound/core/rawmidi.c | 2 +- sound/core/seq/oss/seq_oss.c | 3 +- sound/core/seq/seq_device.c | 2 +- sound/core/seq/seq_info.c | 6 +- sound/core/sound.c | 3 +- sound/core/sound_oss.c | 3 +- sound/core/timer.c | 2 +- sound/drivers/opl4/opl4_proc.c | 3 +- sound/pci/ac97/ac97_proc.c | 18 ++--- sound/pci/cs46xx/dsp_spos.c | 52 ++++++-------- sound/pci/cs46xx/dsp_spos_scb_lib.c | 2 +- sound/synth/emux/emux_proc.c | 6 +- 21 files changed, 123 insertions(+), 172 deletions(-) diff --git a/include/sound/info.h b/include/sound/info.h index 74f6996769c7..97ffc4fb9969 100644 --- a/include/sound/info.h +++ b/include/sound/info.h @@ -71,7 +71,6 @@ struct snd_info_entry { mode_t mode; long size; unsigned short content; - unsigned short disconnected: 1; union { struct snd_info_entry_text text; struct snd_info_entry_ops *ops; @@ -83,6 +82,8 @@ struct snd_info_entry { void (*private_free)(struct snd_info_entry *entry); struct proc_dir_entry *p; struct mutex access; + struct list_head children; + struct list_head list; }; #if defined(CONFIG_SND_OSSEMUL) && defined(CONFIG_PROC_FS) @@ -122,8 +123,8 @@ int snd_info_restore_text(struct snd_info_entry * entry); int snd_info_card_create(struct snd_card * card); int snd_info_card_register(struct snd_card * card); int snd_info_card_free(struct snd_card * card); +void snd_info_card_disconnect(struct snd_card * card); int snd_info_register(struct snd_info_entry * entry); -int snd_info_unregister(struct snd_info_entry * entry); /* for card drivers */ int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp); @@ -156,8 +157,8 @@ static inline void snd_info_free_entry(struct snd_info_entry * entry) { ; } static inline int snd_info_card_create(struct snd_card * card) { return 0; } static inline int snd_info_card_register(struct snd_card * card) { return 0; } static inline int snd_info_card_free(struct snd_card * card) { return 0; } +static inline void snd_info_card_disconnect(struct snd_card * card) { } static inline int snd_info_register(struct snd_info_entry * entry) { return 0; } -static inline int snd_info_unregister(struct snd_info_entry * entry) { return 0; } static inline int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp) { return -EINVAL; } diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index 8bd0dcc93eba..cbd8a63282b6 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -497,7 +497,7 @@ static void __init snd_hwdep_proc_init(void) static void __exit snd_hwdep_proc_done(void) { - snd_info_unregister(snd_hwdep_proc_entry); + snd_info_free_entry(snd_hwdep_proc_entry); } #else /* !CONFIG_PROC_FS */ #define snd_hwdep_proc_init() diff --git a/sound/core/info.c b/sound/core/info.c index 340332c6d973..9663b6be9c3a 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -78,6 +78,7 @@ struct snd_info_private_data { static int snd_info_version_init(void); static int snd_info_version_done(void); +static void snd_info_disconnect(struct snd_info_entry *entry); /* resize the proc r/w buffer */ @@ -304,7 +305,7 @@ static int snd_info_entry_open(struct inode *inode, struct file *file) mutex_lock(&info_mutex); p = PDE(inode); entry = p == NULL ? NULL : (struct snd_info_entry *)p->data; - if (entry == NULL || entry->disconnected) { + if (entry == NULL || ! entry->p) { mutex_unlock(&info_mutex); return -ENODEV; } @@ -586,10 +587,10 @@ int __exit snd_info_done(void) snd_info_version_done(); if (snd_proc_root) { #if defined(CONFIG_SND_SEQUENCER) || defined(CONFIG_SND_SEQUENCER_MODULE) - snd_info_unregister(snd_seq_root); + snd_info_free_entry(snd_seq_root); #endif #ifdef CONFIG_SND_OSSEMUL - snd_info_unregister(snd_oss_root); + snd_info_free_entry(snd_oss_root); #endif snd_remove_proc_entry(&proc_root, snd_proc_root); } @@ -648,17 +649,28 @@ int snd_info_card_register(struct snd_card *card) * de-register the card proc file * called from init.c */ -int snd_info_card_free(struct snd_card *card) +void snd_info_card_disconnect(struct snd_card *card) { - snd_assert(card != NULL, return -ENXIO); + snd_assert(card != NULL, return); + mutex_lock(&info_mutex); if (card->proc_root_link) { snd_remove_proc_entry(snd_proc_root, card->proc_root_link); card->proc_root_link = NULL; } - if (card->proc_root) { - snd_info_unregister(card->proc_root); - card->proc_root = NULL; - } + if (card->proc_root) + snd_info_disconnect(card->proc_root); + mutex_unlock(&info_mutex); +} + +/* + * release the card proc file resources + * called from init.c + */ +int snd_info_card_free(struct snd_card *card) +{ + snd_assert(card != NULL, return -ENXIO); + snd_info_free_entry(card->proc_root); + card->proc_root = NULL; return 0; } @@ -767,6 +779,8 @@ static struct snd_info_entry *snd_info_create_entry(const char *name) entry->mode = S_IFREG | S_IRUGO; entry->content = SNDRV_INFO_CONTENT_TEXT; mutex_init(&entry->access); + INIT_LIST_HEAD(&entry->children); + INIT_LIST_HEAD(&entry->list); return entry; } @@ -819,6 +833,24 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, EXPORT_SYMBOL(snd_info_create_card_entry); +static void snd_info_disconnect(struct snd_info_entry *entry) +{ + struct list_head *p, *n; + struct proc_dir_entry *root; + + list_for_each_safe(p, n, &entry->children) { + snd_info_disconnect(list_entry(p, struct snd_info_entry, list)); + } + + if (! entry->p) + return; + list_del_init(&entry->list); + root = entry->parent == NULL ? snd_proc_root : entry->parent->p; + snd_assert(root, return); + snd_remove_proc_entry(root, entry->p); + entry->p = NULL; +} + static int snd_info_dev_free_entry(struct snd_device *device) { struct snd_info_entry *entry = device->device_data; @@ -832,19 +864,6 @@ static int snd_info_dev_register_entry(struct snd_device *device) return snd_info_register(entry); } -static int snd_info_dev_disconnect_entry(struct snd_device *device) -{ - struct snd_info_entry *entry = device->device_data; - entry->disconnected = 1; - return 0; -} - -static int snd_info_dev_unregister_entry(struct snd_device *device) -{ - struct snd_info_entry *entry = device->device_data; - return snd_info_unregister(entry); -} - /** * snd_card_proc_new - create an info entry for the given card * @card: the card instance @@ -871,8 +890,7 @@ int snd_card_proc_new(struct snd_card *card, const char *name, static struct snd_device_ops ops = { .dev_free = snd_info_dev_free_entry, .dev_register = snd_info_dev_register_entry, - .dev_disconnect = snd_info_dev_disconnect_entry, - .dev_unregister = snd_info_dev_unregister_entry + /* disconnect is done via snd_info_card_disconnect() */ }; struct snd_info_entry *entry; int err; @@ -901,6 +919,11 @@ void snd_info_free_entry(struct snd_info_entry * entry) { if (entry == NULL) return; + if (entry->p) { + mutex_lock(&info_mutex); + snd_info_disconnect(entry); + mutex_unlock(&info_mutex); + } kfree(entry->name); if (entry->private_free) entry->private_free(entry); @@ -935,38 +958,14 @@ int snd_info_register(struct snd_info_entry * entry) p->size = entry->size; p->data = entry; entry->p = p; + if (entry->parent) + list_add_tail(&entry->list, &entry->parent->children); mutex_unlock(&info_mutex); return 0; } EXPORT_SYMBOL(snd_info_register); -/** - * snd_info_unregister - de-register the info entry - * @entry: the info entry - * - * De-registers the info entry and releases the instance. - * - * Returns zero if successful, or a negative error code on failure. - */ -int snd_info_unregister(struct snd_info_entry * entry) -{ - struct proc_dir_entry *root; - - if (! entry) - return 0; - snd_assert(entry->p != NULL, return -ENXIO); - root = entry->parent == NULL ? snd_proc_root : entry->parent->p; - snd_assert(root, return -ENXIO); - mutex_lock(&info_mutex); - snd_remove_proc_entry(root, entry->p); - mutex_unlock(&info_mutex); - snd_info_free_entry(entry); - return 0; -} - -EXPORT_SYMBOL(snd_info_unregister); - /* */ @@ -999,8 +998,7 @@ static int __init snd_info_version_init(void) static int __exit snd_info_version_done(void) { - if (snd_info_version_entry) - snd_info_unregister(snd_info_version_entry); + snd_info_free_entry(snd_info_version_entry); return 0; } diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c index bb2c40d0ab66..3ebc34919c76 100644 --- a/sound/core/info_oss.c +++ b/sound/core/info_oss.c @@ -131,10 +131,8 @@ int snd_info_minor_register(void) int snd_info_minor_unregister(void) { - if (snd_sndstat_proc_entry) { - snd_info_unregister(snd_sndstat_proc_entry); - snd_sndstat_proc_entry = NULL; - } + snd_info_free_entry(snd_sndstat_proc_entry); + snd_sndstat_proc_entry = NULL; return 0; } diff --git a/sound/core/init.c b/sound/core/init.c index 4d9258884e44..1ecb029ff4c9 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -310,6 +310,7 @@ int snd_card_disconnect(struct snd_card *card) if (err < 0) snd_printk(KERN_ERR "not all devices for card %i can be disconnected\n", card->number); + snd_info_card_disconnect(card); return 0; } @@ -360,7 +361,7 @@ int snd_card_free(struct snd_card *card) } if (card->private_free) card->private_free(card); - snd_info_unregister(card->proc_id); + snd_info_free_entry(card->proc_id); if (snd_info_card_free(card) < 0) { snd_printk(KERN_WARNING "unable to free card info\n"); /* Not fatal error */ @@ -625,9 +626,9 @@ int __init snd_card_info_init(void) int __exit snd_card_info_done(void) { - snd_info_unregister(snd_card_info_entry); + snd_info_free_entry(snd_card_info_entry); #ifdef MODULE - snd_info_unregister(snd_card_module_info_entry); + snd_info_free_entry(snd_card_module_info_entry); #endif return 0; } diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 75a9505c7445..00c95def95aa 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -1193,10 +1193,8 @@ static void snd_mixer_oss_proc_init(struct snd_mixer_oss *mixer) static void snd_mixer_oss_proc_done(struct snd_mixer_oss *mixer) { - if (mixer->proc_entry) { - snd_info_unregister(mixer->proc_entry); - mixer->proc_entry = NULL; - } + snd_info_free_entry(mixer->proc_entry); + mixer->proc_entry = NULL; } #else /* !CONFIG_PROC_FS */ #define snd_mixer_oss_proc_init(mix) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 472fce0ee0e8..a92b93e5ebd5 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2846,11 +2846,9 @@ static void snd_pcm_oss_proc_done(struct snd_pcm *pcm) int stream; for (stream = 0; stream < 2; ++stream) { struct snd_pcm_str *pstr = &pcm->streams[stream]; - if (pstr->oss.proc_entry) { - snd_info_unregister(pstr->oss.proc_entry); - pstr->oss.proc_entry = NULL; - snd_pcm_oss_proc_free_setup_list(pstr); - } + snd_info_free_entry(pstr->oss.proc_entry); + pstr->oss.proc_entry = NULL; + snd_pcm_oss_proc_free_setup_list(pstr); } } #else /* !CONFIG_SND_VERBOSE_PROCFS */ diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 7581edd7b9ff..b8602471f7e5 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -494,19 +494,13 @@ static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr) static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr) { #ifdef CONFIG_SND_PCM_XRUN_DEBUG - if (pstr->proc_xrun_debug_entry) { - snd_info_unregister(pstr->proc_xrun_debug_entry); - pstr->proc_xrun_debug_entry = NULL; - } + snd_info_free_entry(pstr->proc_xrun_debug_entry); + pstr->proc_xrun_debug_entry = NULL; #endif - if (pstr->proc_info_entry) { - snd_info_unregister(pstr->proc_info_entry); - pstr->proc_info_entry = NULL; - } - if (pstr->proc_root) { - snd_info_unregister(pstr->proc_root); - pstr->proc_root = NULL; - } + snd_info_free_entry(pstr->proc_info_entry); + pstr->proc_info_entry = NULL; + snd_info_free_entry(pstr->proc_root); + pstr->proc_root = NULL; return 0; } @@ -570,29 +564,19 @@ static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream) return 0; } - + static int snd_pcm_substream_proc_done(struct snd_pcm_substream *substream) { - if (substream->proc_info_entry) { - snd_info_unregister(substream->proc_info_entry); - substream->proc_info_entry = NULL; - } - if (substream->proc_hw_params_entry) { - snd_info_unregister(substream->proc_hw_params_entry); - substream->proc_hw_params_entry = NULL; - } - if (substream->proc_sw_params_entry) { - snd_info_unregister(substream->proc_sw_params_entry); - substream->proc_sw_params_entry = NULL; - } - if (substream->proc_status_entry) { - snd_info_unregister(substream->proc_status_entry); - substream->proc_status_entry = NULL; - } - if (substream->proc_root) { - snd_info_unregister(substream->proc_root); - substream->proc_root = NULL; - } + snd_info_free_entry(substream->proc_info_entry); + substream->proc_info_entry = NULL; + snd_info_free_entry(substream->proc_hw_params_entry); + substream->proc_hw_params_entry = NULL; + snd_info_free_entry(substream->proc_sw_params_entry); + substream->proc_sw_params_entry = NULL; + snd_info_free_entry(substream->proc_status_entry); + substream->proc_status_entry = NULL; + snd_info_free_entry(substream->proc_root); + substream->proc_root = NULL; return 0; } #else /* !CONFIG_SND_VERBOSE_PROCFS */ @@ -1090,8 +1074,7 @@ static void snd_pcm_proc_init(void) static void snd_pcm_proc_done(void) { - if (snd_pcm_proc_entry) - snd_info_unregister(snd_pcm_proc_entry); + snd_info_free_entry(snd_pcm_proc_entry); } #else /* !CONFIG_PROC_FS */ diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c index 067d2056db9a..be030cb4d373 100644 --- a/sound/core/pcm_memory.c +++ b/sound/core/pcm_memory.c @@ -101,7 +101,7 @@ int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream) { snd_pcm_lib_preallocate_dma_free(substream); #ifdef CONFIG_SND_VERBOSE_PROCFS - snd_info_unregister(substream->proc_prealloc_entry); + snd_info_free_entry(substream->proc_prealloc_entry); substream->proc_prealloc_entry = NULL; #endif return 0; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 8c15c66eb4aa..51577c22f8ce 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1599,7 +1599,7 @@ static int snd_rawmidi_dev_unregister(struct snd_device *device) mutex_lock(®ister_mutex); list_del(&rmidi->list); if (rmidi->proc_entry) { - snd_info_unregister(rmidi->proc_entry); + snd_info_free_entry(rmidi->proc_entry); rmidi->proc_entry = NULL; } #ifdef CONFIG_SND_OSSEMUL diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c index e7234135641c..92858cf8b6eb 100644 --- a/sound/core/seq/oss/seq_oss.c +++ b/sound/core/seq/oss/seq_oss.c @@ -303,8 +303,7 @@ register_proc(void) static void unregister_proc(void) { - if (info_entry) - snd_info_unregister(info_entry); + snd_info_free_entry(info_entry); info_entry = NULL; } #endif /* CONFIG_PROC_FS */ diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c index 102ff548ce69..b85954e956d4 100644 --- a/sound/core/seq/seq_device.c +++ b/sound/core/seq/seq_device.c @@ -573,7 +573,7 @@ static void __exit alsa_seq_device_exit(void) { remove_drivers(); #ifdef CONFIG_PROC_FS - snd_info_unregister(info_entry); + snd_info_free_entry(info_entry); #endif if (num_ops) snd_printk(KERN_ERR "drivers not released (%d)\n", num_ops); diff --git a/sound/core/seq/seq_info.c b/sound/core/seq/seq_info.c index 142e9e6882c9..8a7fe5cca1c9 100644 --- a/sound/core/seq/seq_info.c +++ b/sound/core/seq/seq_info.c @@ -64,9 +64,9 @@ int __init snd_seq_info_init(void) int __exit snd_seq_info_done(void) { - snd_info_unregister(queues_entry); - snd_info_unregister(clients_entry); - snd_info_unregister(timer_entry); + snd_info_free_entry(queues_entry); + snd_info_free_entry(clients_entry); + snd_info_free_entry(timer_entry); return 0; } #endif diff --git a/sound/core/sound.c b/sound/core/sound.c index 7edd1fc58b17..b4430db3fa4c 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -387,8 +387,7 @@ int __init snd_minor_info_init(void) int __exit snd_minor_info_done(void) { - if (snd_minor_info_entry) - snd_info_unregister(snd_minor_info_entry); + snd_info_free_entry(snd_minor_info_entry); return 0; } #endif /* CONFIG_PROC_FS */ diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c index 74f0fe5a1ba0..b2fc40aa520b 100644 --- a/sound/core/sound_oss.c +++ b/sound/core/sound_oss.c @@ -270,8 +270,7 @@ int __init snd_minor_info_oss_init(void) int __exit snd_minor_info_oss_done(void) { - if (snd_minor_info_oss_entry) - snd_info_unregister(snd_minor_info_oss_entry); + snd_info_free_entry(snd_minor_info_oss_entry); return 0; } #endif /* CONFIG_PROC_FS */ diff --git a/sound/core/timer.c b/sound/core/timer.c index 0a984e881c10..52ecbe1e9abb 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1126,7 +1126,7 @@ static void __init snd_timer_proc_init(void) static void __exit snd_timer_proc_done(void) { - snd_info_unregister(snd_timer_proc_entry); + snd_info_free_entry(snd_timer_proc_entry); } #else /* !CONFIG_PROC_FS */ #define snd_timer_proc_init() diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c index e552ec34166f..11dd811771a4 100644 --- a/sound/drivers/opl4/opl4_proc.c +++ b/sound/drivers/opl4/opl4_proc.c @@ -159,8 +159,7 @@ int snd_opl4_create_proc(struct snd_opl4 *opl4) void snd_opl4_free_proc(struct snd_opl4 *opl4) { - if (opl4->proc_entry) - snd_info_unregister(opl4->proc_entry); + snd_info_free_entry(opl4->proc_entry); } #endif /* CONFIG_PROC_FS */ diff --git a/sound/pci/ac97/ac97_proc.c b/sound/pci/ac97/ac97_proc.c index 2118df50b9d6..a3fdd7da911c 100644 --- a/sound/pci/ac97/ac97_proc.c +++ b/sound/pci/ac97/ac97_proc.c @@ -457,14 +457,10 @@ void snd_ac97_proc_init(struct snd_ac97 * ac97) void snd_ac97_proc_done(struct snd_ac97 * ac97) { - if (ac97->proc_regs) { - snd_info_unregister(ac97->proc_regs); - ac97->proc_regs = NULL; - } - if (ac97->proc) { - snd_info_unregister(ac97->proc); - ac97->proc = NULL; - } + snd_info_free_entry(ac97->proc_regs); + ac97->proc_regs = NULL; + snd_info_free_entry(ac97->proc); + ac97->proc = NULL; } void snd_ac97_bus_proc_init(struct snd_ac97_bus * bus) @@ -485,8 +481,6 @@ void snd_ac97_bus_proc_init(struct snd_ac97_bus * bus) void snd_ac97_bus_proc_done(struct snd_ac97_bus * bus) { - if (bus->proc) { - snd_info_unregister(bus->proc); - bus->proc = NULL; - } + snd_info_free_entry(bus->proc); + bus->proc = NULL; } diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c index 5c9711c0265c..89c402770a1d 100644 --- a/sound/pci/cs46xx/dsp_spos.c +++ b/sound/pci/cs46xx/dsp_spos.c @@ -868,35 +868,23 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip) struct dsp_spos_instance * ins = chip->dsp_spos_instance; int i; - if (ins->proc_sym_info_entry) { - snd_info_unregister(ins->proc_sym_info_entry); - ins->proc_sym_info_entry = NULL; - } - - if (ins->proc_modules_info_entry) { - snd_info_unregister(ins->proc_modules_info_entry); - ins->proc_modules_info_entry = NULL; - } - - if (ins->proc_parameter_dump_info_entry) { - snd_info_unregister(ins->proc_parameter_dump_info_entry); - ins->proc_parameter_dump_info_entry = NULL; - } - - if (ins->proc_sample_dump_info_entry) { - snd_info_unregister(ins->proc_sample_dump_info_entry); - ins->proc_sample_dump_info_entry = NULL; - } - - if (ins->proc_scb_info_entry) { - snd_info_unregister(ins->proc_scb_info_entry); - ins->proc_scb_info_entry = NULL; - } - - if (ins->proc_task_info_entry) { - snd_info_unregister(ins->proc_task_info_entry); - ins->proc_task_info_entry = NULL; - } + snd_info_free_entry(ins->proc_sym_info_entry); + ins->proc_sym_info_entry = NULL; + + snd_info_free_entry(ins->proc_modules_info_entry); + ins->proc_modules_info_entry = NULL; + + snd_info_free_entry(ins->proc_parameter_dump_info_entry); + ins->proc_parameter_dump_info_entry = NULL; + + snd_info_free_entry(ins->proc_sample_dump_info_entry); + ins->proc_sample_dump_info_entry = NULL; + + snd_info_free_entry(ins->proc_scb_info_entry); + ins->proc_scb_info_entry = NULL; + + snd_info_free_entry(ins->proc_task_info_entry); + ins->proc_task_info_entry = NULL; mutex_lock(&chip->spos_mutex); for (i = 0; i < ins->nscb; ++i) { @@ -905,10 +893,8 @@ int cs46xx_dsp_proc_done (struct snd_cs46xx *chip) } mutex_unlock(&chip->spos_mutex); - if (ins->proc_dsp_dir) { - snd_info_unregister (ins->proc_dsp_dir); - ins->proc_dsp_dir = NULL; - } + snd_info_free_entry(ins->proc_dsp_dir); + ins->proc_dsp_dir = NULL; return 0; } diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c index 232b337852ff..343f51d5311b 100644 --- a/sound/pci/cs46xx/dsp_spos_scb_lib.c +++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c @@ -233,7 +233,7 @@ void cs46xx_dsp_proc_free_scb_desc (struct dsp_scb_descriptor * scb) snd_printdd("cs46xx_dsp_proc_free_scb_desc: freeing %s\n",scb->scb_name); - snd_info_unregister(scb->proc_info); + snd_info_free_entry(scb->proc_info); scb->proc_info = NULL; snd_assert (scb_info != NULL, return); diff --git a/sound/synth/emux/emux_proc.c b/sound/synth/emux/emux_proc.c index 58b9601f3ad0..59144ec026e4 100644 --- a/sound/synth/emux/emux_proc.c +++ b/sound/synth/emux/emux_proc.c @@ -128,10 +128,8 @@ void snd_emux_proc_init(struct snd_emux *emu, struct snd_card *card, int device) void snd_emux_proc_free(struct snd_emux *emu) { - if (emu->proc) { - snd_info_unregister(emu->proc); - emu->proc = NULL; - } + snd_info_free_entry(emu->proc); + emu->proc = NULL; } #endif /* CONFIG_PROC_FS */ From c461482c8072bb073e6146db320d3da85cdc89ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Jun 2006 14:38:23 +0200 Subject: [PATCH 0882/1063] [ALSA] Unregister device files at disconnection Orignally proposed by Sam Revitch . Unregister device files at disconnection to avoid the futher accesses. Also, the dev_unregister callback is removed and replaced with the combination of disconnect + free. A new function snd_card_free_when_closed() is introduced, which is used in USB disconnect callback. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/core.h | 3 +- include/sound/timer.h | 1 - sound/core/control.c | 27 +++++---------- sound/core/device.c | 20 +++++------ sound/core/hwdep.c | 10 +++--- sound/core/init.c | 69 +++++++++++++++++++++++++++++-------- sound/core/oss/mixer_oss.c | 16 ++++----- sound/core/oss/pcm_oss.c | 16 ++++----- sound/core/pcm.c | 48 ++++++++++---------------- sound/core/rawmidi.c | 35 +++++-------------- sound/core/rtctimer.c | 2 +- sound/core/seq/seq_device.c | 11 ------ sound/core/timer.c | 54 +++++++++++++---------------- sound/pci/ac97/ac97_codec.c | 8 ++--- sound/usb/usbaudio.c | 2 +- 15 files changed, 149 insertions(+), 173 deletions(-) diff --git a/include/sound/core.h b/include/sound/core.h index bab3ff457e40..cf4001cf6248 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -71,7 +71,6 @@ struct snd_device_ops { int (*dev_free)(struct snd_device *dev); int (*dev_register)(struct snd_device *dev); int (*dev_disconnect)(struct snd_device *dev); - int (*dev_unregister)(struct snd_device *dev); }; struct snd_device { @@ -131,6 +130,7 @@ struct snd_card { state */ spinlock_t files_lock; /* lock the files for this card */ int shutdown; /* this card is going down */ + int free_on_last_close; /* free in context of file_release */ wait_queue_head_t shutdown_sleep; struct work_struct free_workq; /* for free in workqueue */ struct device *dev; @@ -244,6 +244,7 @@ struct snd_card *snd_card_new(int idx, const char *id, struct module *module, int extra_size); int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); +int snd_card_free_when_closed(struct snd_card *card); int snd_card_free_in_thread(struct snd_card *card); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); diff --git a/include/sound/timer.h b/include/sound/timer.h index 5ece2bf541dc..d42c083db1da 100644 --- a/include/sound/timer.h +++ b/include/sound/timer.h @@ -129,7 +129,6 @@ void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstam int snd_timer_global_new(char *id, int device, struct snd_timer **rtimer); int snd_timer_global_free(struct snd_timer *timer); int snd_timer_global_register(struct snd_timer *timer); -int snd_timer_global_unregister(struct snd_timer *timer); int snd_timer_open(struct snd_timer_instance **ti, char *owner, struct snd_timer_id *tid, unsigned int slave_id); int snd_timer_close(struct snd_timer_instance *timeri); diff --git a/sound/core/control.c b/sound/core/control.c index e9c8854d2f7b..f0c7272a2d48 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1375,6 +1375,11 @@ static int snd_ctl_dev_disconnect(struct snd_device *device) struct snd_card *card = device->device_data; struct list_head *flist; struct snd_ctl_file *ctl; + int err, cardnum; + + snd_assert(card != NULL, return -ENXIO); + cardnum = card->number; + snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO); down_read(&card->controls_rwsem); list_for_each(flist, &card->ctl_files) { @@ -1383,6 +1388,10 @@ static int snd_ctl_dev_disconnect(struct snd_device *device) kill_fasync(&ctl->fasync, SIGIO, POLL_ERR); } up_read(&card->controls_rwsem); + + if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL, + card, -1)) < 0) + return err; return 0; } @@ -1403,23 +1412,6 @@ static int snd_ctl_dev_free(struct snd_device *device) return 0; } -/* - * de-registration of the control device - */ -static int snd_ctl_dev_unregister(struct snd_device *device) -{ - struct snd_card *card = device->device_data; - int err, cardnum; - - snd_assert(card != NULL, return -ENXIO); - cardnum = card->number; - snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO); - if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL, - card, -1)) < 0) - return err; - return snd_ctl_dev_free(device); -} - /* * create control core: * called from init.c @@ -1430,7 +1422,6 @@ int snd_ctl_create(struct snd_card *card) .dev_free = snd_ctl_dev_free, .dev_register = snd_ctl_dev_register, .dev_disconnect = snd_ctl_dev_disconnect, - .dev_unregister = snd_ctl_dev_unregister }; snd_assert(card != NULL, return -ENXIO); diff --git a/sound/core/device.c b/sound/core/device.c index 6ce4da4a1081..ccb25816ac9e 100644 --- a/sound/core/device.c +++ b/sound/core/device.c @@ -71,7 +71,7 @@ EXPORT_SYMBOL(snd_device_new); * @device_data: the data pointer to release * * Removes the device from the list on the card and invokes the - * callback, dev_unregister or dev_free, corresponding to the state. + * callbacks, dev_disconnect and dev_free, corresponding to the state. * Then release the device. * * Returns zero if successful, or a negative error code on failure or if the @@ -90,16 +90,14 @@ int snd_device_free(struct snd_card *card, void *device_data) continue; /* unlink */ list_del(&dev->list); - if ((dev->state == SNDRV_DEV_REGISTERED || - dev->state == SNDRV_DEV_DISCONNECTED) && - dev->ops->dev_unregister) { - if (dev->ops->dev_unregister(dev)) - snd_printk(KERN_ERR "device unregister failure\n"); - } else { - if (dev->ops->dev_free) { - if (dev->ops->dev_free(dev)) - snd_printk(KERN_ERR "device free failure\n"); - } + if (dev->state == SNDRV_DEV_REGISTERED && + dev->ops->dev_disconnect) + if (dev->ops->dev_disconnect(dev)) + snd_printk(KERN_ERR + "device disconnect failure\n"); + if (dev->ops->dev_free) { + if (dev->ops->dev_free(dev)) + snd_printk(KERN_ERR "device free failure\n"); } kfree(dev); return 0; diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index cbd8a63282b6..9aa9d94891f0 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -42,7 +42,7 @@ static DEFINE_MUTEX(register_mutex); static int snd_hwdep_free(struct snd_hwdep *hwdep); static int snd_hwdep_dev_free(struct snd_device *device); static int snd_hwdep_dev_register(struct snd_device *device); -static int snd_hwdep_dev_unregister(struct snd_device *device); +static int snd_hwdep_dev_disconnect(struct snd_device *device); static struct snd_hwdep *snd_hwdep_search(struct snd_card *card, int device) @@ -353,7 +353,7 @@ int snd_hwdep_new(struct snd_card *card, char *id, int device, static struct snd_device_ops ops = { .dev_free = snd_hwdep_dev_free, .dev_register = snd_hwdep_dev_register, - .dev_unregister = snd_hwdep_dev_unregister + .dev_disconnect = snd_hwdep_dev_disconnect, }; snd_assert(rhwdep != NULL, return -EINVAL); @@ -439,7 +439,7 @@ static int snd_hwdep_dev_register(struct snd_device *device) return 0; } -static int snd_hwdep_dev_unregister(struct snd_device *device) +static int snd_hwdep_dev_disconnect(struct snd_device *device) { struct snd_hwdep *hwdep = device->device_data; @@ -454,9 +454,9 @@ static int snd_hwdep_dev_unregister(struct snd_device *device) snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device); #endif snd_unregister_device(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, hwdep->device); - list_del(&hwdep->list); + list_del_init(&hwdep->list); mutex_unlock(®ister_mutex); - return snd_hwdep_free(hwdep); + return 0; } #ifdef CONFIG_PROC_FS diff --git a/sound/core/init.c b/sound/core/init.c index 1ecb029ff4c9..5850d99d21e3 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -327,22 +327,10 @@ EXPORT_SYMBOL(snd_card_disconnect); * Returns zero. Frees all associated devices and frees the control * interface associated to given soundcard. */ -int snd_card_free(struct snd_card *card) +static int snd_card_do_free(struct snd_card *card) { struct snd_shutdown_f_ops *s_f_ops; - if (card == NULL) - return -EINVAL; - mutex_lock(&snd_card_mutex); - snd_cards[card->number] = NULL; - mutex_unlock(&snd_card_mutex); - -#ifdef CONFIG_PM - wake_up(&card->power_sleep); -#endif - /* wait, until all devices are ready for the free operation */ - wait_event(card->shutdown_sleep, card->files == NULL); - #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE) if (snd_mixer_oss_notify_callback) snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_FREE); @@ -371,10 +359,55 @@ int snd_card_free(struct snd_card *card) card->s_f_ops = s_f_ops->next; kfree(s_f_ops); } + kfree(card); + return 0; +} + +static int snd_card_free_prepare(struct snd_card *card) +{ + if (card == NULL) + return -EINVAL; + (void) snd_card_disconnect(card); mutex_lock(&snd_card_mutex); + snd_cards[card->number] = NULL; snd_cards_lock &= ~(1 << card->number); mutex_unlock(&snd_card_mutex); - kfree(card); +#ifdef CONFIG_PM + wake_up(&card->power_sleep); +#endif + return 0; +} + +int snd_card_free_when_closed(struct snd_card *card) +{ + int free_now = 0; + int ret = snd_card_free_prepare(card); + if (ret) + return ret; + + spin_lock(&card->files_lock); + if (card->files == NULL) + free_now = 1; + else + card->free_on_last_close = 1; + spin_unlock(&card->files_lock); + + if (free_now) + snd_card_do_free(card); + return 0; +} + +EXPORT_SYMBOL(snd_card_free_when_closed); + +int snd_card_free(struct snd_card *card) +{ + int ret = snd_card_free_prepare(card); + if (ret) + return ret; + + /* wait, until all devices are ready for the free operation */ + wait_event(card->shutdown_sleep, card->files == NULL); + snd_card_do_free(card); return 0; } @@ -718,6 +751,7 @@ EXPORT_SYMBOL(snd_card_file_add); int snd_card_file_remove(struct snd_card *card, struct file *file) { struct snd_monitor_file *mfile, *pfile = NULL; + int last_close = 0; spin_lock(&card->files_lock); mfile = card->files; @@ -732,9 +766,14 @@ int snd_card_file_remove(struct snd_card *card, struct file *file) pfile = mfile; mfile = mfile->next; } - spin_unlock(&card->files_lock); if (card->files == NULL) + last_close = 1; + spin_unlock(&card->files_lock); + if (last_close) { wake_up(&card->shutdown_sleep); + if (card->free_on_last_close) + snd_card_do_free(card); + } if (!mfile) { snd_printk(KERN_ERR "ALSA card file remove problem (%p)\n", file); return -ENOENT; diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 00c95def95aa..f4c67042e3ac 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -1310,21 +1310,19 @@ static int snd_mixer_oss_notify_handler(struct snd_card *card, int cmd) card->mixer_oss = mixer; snd_mixer_oss_build(mixer); snd_mixer_oss_proc_init(mixer); - } else if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) { - mixer = card->mixer_oss; - if (mixer == NULL || !mixer->oss_dev_alloc) - return 0; - snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0); - mixer->oss_dev_alloc = 0; - } else { /* free */ + } else { mixer = card->mixer_oss; if (mixer == NULL) return 0; + if (mixer->oss_dev_alloc) { #ifdef SNDRV_OSS_INFO_DEV_MIXERS - snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number); + snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number); #endif - if (mixer->oss_dev_alloc) snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0); + mixer->oss_dev_alloc = 0; + } + if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) + return 0; snd_mixer_oss_proc_done(mixer); return snd_mixer_oss_free1(mixer); } diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index a92b93e5ebd5..505b23ec4058 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2929,6 +2929,12 @@ static int snd_pcm_oss_disconnect_minor(struct snd_pcm *pcm) snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM, pcm->card, 1); } + if (dsp_map[pcm->card->number] == (int)pcm->device) { +#ifdef SNDRV_OSS_INFO_DEV_AUDIO + snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number); +#endif + } + pcm->oss.reg = 0; } return 0; } @@ -2936,15 +2942,7 @@ static int snd_pcm_oss_disconnect_minor(struct snd_pcm *pcm) static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm) { snd_pcm_oss_disconnect_minor(pcm); - if (pcm->oss.reg) { - if (dsp_map[pcm->card->number] == (int)pcm->device) { -#ifdef SNDRV_OSS_INFO_DEV_AUDIO - snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number); -#endif - } - pcm->oss.reg = 0; - snd_pcm_oss_proc_done(pcm); - } + snd_pcm_oss_proc_done(pcm); return 0; } diff --git a/sound/core/pcm.c b/sound/core/pcm.c index b8602471f7e5..f52178abf120 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -42,7 +42,6 @@ static int snd_pcm_free(struct snd_pcm *pcm); static int snd_pcm_dev_free(struct snd_device *device); static int snd_pcm_dev_register(struct snd_device *device); static int snd_pcm_dev_disconnect(struct snd_device *device); -static int snd_pcm_dev_unregister(struct snd_device *device); static struct snd_pcm *snd_pcm_search(struct snd_card *card, int device) { @@ -680,7 +679,6 @@ int snd_pcm_new(struct snd_card *card, char *id, int device, .dev_free = snd_pcm_dev_free, .dev_register = snd_pcm_dev_register, .dev_disconnect = snd_pcm_dev_disconnect, - .dev_unregister = snd_pcm_dev_unregister }; snd_assert(rpcm != NULL, return -EINVAL); @@ -724,6 +722,7 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr) substream = pstr->substream; while (substream) { substream_next = substream->next; + snd_pcm_timer_done(substream); snd_pcm_substream_proc_done(substream); kfree(substream); substream = substream_next; @@ -740,7 +739,12 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr) static int snd_pcm_free(struct snd_pcm *pcm) { + struct snd_pcm_notify *notify; + snd_assert(pcm != NULL, return -ENXIO); + list_for_each_entry(notify, &snd_pcm_notify_list, list) { + notify->n_unregister(pcm); + } if (pcm->private_free) pcm->private_free(pcm); snd_pcm_lib_preallocate_free_for_all(pcm); @@ -955,35 +959,22 @@ static int snd_pcm_dev_register(struct snd_device *device) static int snd_pcm_dev_disconnect(struct snd_device *device) { struct snd_pcm *pcm = device->device_data; - struct list_head *list; + struct snd_pcm_notify *notify; struct snd_pcm_substream *substream; - int cidx; + int cidx, devtype; mutex_lock(®ister_mutex); + if (list_empty(&pcm->list)) + goto unlock; + list_del_init(&pcm->list); for (cidx = 0; cidx < 2; cidx++) for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) if (substream->runtime) substream->runtime->status->state = SNDRV_PCM_STATE_DISCONNECTED; - list_for_each(list, &snd_pcm_notify_list) { - struct snd_pcm_notify *notify; - notify = list_entry(list, struct snd_pcm_notify, list); + list_for_each_entry(notify, &snd_pcm_notify_list, list) { notify->n_disconnect(pcm); } - mutex_unlock(®ister_mutex); - return 0; -} - -static int snd_pcm_dev_unregister(struct snd_device *device) -{ - int cidx, devtype; - struct snd_pcm_substream *substream; - struct list_head *list; - struct snd_pcm *pcm = device->device_data; - - snd_assert(pcm != NULL, return -ENXIO); - mutex_lock(®ister_mutex); - list_del(&pcm->list); for (cidx = 0; cidx < 2; cidx++) { devtype = -1; switch (cidx) { @@ -995,23 +986,20 @@ static int snd_pcm_dev_unregister(struct snd_device *device) break; } snd_unregister_device(devtype, pcm->card, pcm->device); - for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) - snd_pcm_timer_done(substream); - } - list_for_each(list, &snd_pcm_notify_list) { - struct snd_pcm_notify *notify; - notify = list_entry(list, struct snd_pcm_notify, list); - notify->n_unregister(pcm); } + unlock: mutex_unlock(®ister_mutex); - return snd_pcm_free(pcm); + return 0; } int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree) { struct list_head *p; - snd_assert(notify != NULL && notify->n_register != NULL && notify->n_unregister != NULL, return -EINVAL); + snd_assert(notify != NULL && + notify->n_register != NULL && + notify->n_unregister != NULL && + notify->n_disconnect, return -EINVAL); mutex_lock(®ister_mutex); if (nfree) { list_del(¬ify->list); diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 51577c22f8ce..8a2bdfae63e3 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -55,7 +55,6 @@ static int snd_rawmidi_free(struct snd_rawmidi *rawmidi); static int snd_rawmidi_dev_free(struct snd_device *device); static int snd_rawmidi_dev_register(struct snd_device *device); static int snd_rawmidi_dev_disconnect(struct snd_device *device); -static int snd_rawmidi_dev_unregister(struct snd_device *device); static LIST_HEAD(snd_rawmidi_devices); static DEFINE_MUTEX(register_mutex); @@ -1426,7 +1425,6 @@ int snd_rawmidi_new(struct snd_card *card, char *id, int device, .dev_free = snd_rawmidi_dev_free, .dev_register = snd_rawmidi_dev_register, .dev_disconnect = snd_rawmidi_dev_disconnect, - .dev_unregister = snd_rawmidi_dev_unregister }; snd_assert(rrawmidi != NULL, return -EINVAL); @@ -1479,6 +1477,14 @@ static void snd_rawmidi_free_substreams(struct snd_rawmidi_str *stream) static int snd_rawmidi_free(struct snd_rawmidi *rmidi) { snd_assert(rmidi != NULL, return -ENXIO); + + snd_info_free_entry(rmidi->proc_entry); + rmidi->proc_entry = NULL; + mutex_lock(®ister_mutex); + if (rmidi->ops && rmidi->ops->dev_unregister) + rmidi->ops->dev_unregister(rmidi); + mutex_unlock(®ister_mutex); + snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]); snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]); if (rmidi->private_free) @@ -1587,21 +1593,6 @@ static int snd_rawmidi_dev_disconnect(struct snd_device *device) mutex_lock(®ister_mutex); list_del_init(&rmidi->list); - mutex_unlock(®ister_mutex); - return 0; -} - -static int snd_rawmidi_dev_unregister(struct snd_device *device) -{ - struct snd_rawmidi *rmidi = device->device_data; - - snd_assert(rmidi != NULL, return -ENXIO); - mutex_lock(®ister_mutex); - list_del(&rmidi->list); - if (rmidi->proc_entry) { - snd_info_free_entry(rmidi->proc_entry); - rmidi->proc_entry = NULL; - } #ifdef CONFIG_SND_OSSEMUL if (rmidi->ossreg) { if ((int)rmidi->device == midi_map[rmidi->card->number]) { @@ -1615,17 +1606,9 @@ static int snd_rawmidi_dev_unregister(struct snd_device *device) rmidi->ossreg = 0; } #endif /* CONFIG_SND_OSSEMUL */ - if (rmidi->ops && rmidi->ops->dev_unregister) - rmidi->ops->dev_unregister(rmidi); snd_unregister_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device); mutex_unlock(®ister_mutex); -#if defined(CONFIG_SND_SEQUENCER) || (defined(MODULE) && defined(CONFIG_SND_SEQUENCER_MODULE)) - if (rmidi->seq_dev) { - snd_device_free(rmidi->card, rmidi->seq_dev); - rmidi->seq_dev = NULL; - } -#endif - return snd_rawmidi_free(rmidi); + return 0; } /** diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c index 84704ccb1829..412dd62b654e 100644 --- a/sound/core/rtctimer.c +++ b/sound/core/rtctimer.c @@ -156,7 +156,7 @@ static int __init rtctimer_init(void) static void __exit rtctimer_exit(void) { if (rtctimer) { - snd_timer_global_unregister(rtctimer); + snd_timer_global_free(rtctimer); rtctimer = NULL; } } diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c index b85954e956d4..b79d011813c0 100644 --- a/sound/core/seq/seq_device.c +++ b/sound/core/seq/seq_device.c @@ -90,7 +90,6 @@ static int snd_seq_device_free(struct snd_seq_device *dev); static int snd_seq_device_dev_free(struct snd_device *device); static int snd_seq_device_dev_register(struct snd_device *device); static int snd_seq_device_dev_disconnect(struct snd_device *device); -static int snd_seq_device_dev_unregister(struct snd_device *device); static int init_device(struct snd_seq_device *dev, struct ops_list *ops); static int free_device(struct snd_seq_device *dev, struct ops_list *ops); @@ -189,7 +188,6 @@ int snd_seq_device_new(struct snd_card *card, int device, char *id, int argsize, .dev_free = snd_seq_device_dev_free, .dev_register = snd_seq_device_dev_register, .dev_disconnect = snd_seq_device_dev_disconnect, - .dev_unregister = snd_seq_device_dev_unregister }; if (result) @@ -308,15 +306,6 @@ static int snd_seq_device_dev_disconnect(struct snd_device *device) return 0; } -/* - * unregister the existing device - */ -static int snd_seq_device_dev_unregister(struct snd_device *device) -{ - struct snd_seq_device *dev = device->device_data; - return snd_seq_device_free(dev); -} - /* * register device driver * id = driver id diff --git a/sound/core/timer.c b/sound/core/timer.c index 52ecbe1e9abb..7e5e562fe356 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -88,7 +88,7 @@ static DEFINE_MUTEX(register_mutex); static int snd_timer_free(struct snd_timer *timer); static int snd_timer_dev_free(struct snd_device *device); static int snd_timer_dev_register(struct snd_device *device); -static int snd_timer_dev_unregister(struct snd_device *device); +static int snd_timer_dev_disconnect(struct snd_device *device); static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left); @@ -773,7 +773,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, static struct snd_device_ops ops = { .dev_free = snd_timer_dev_free, .dev_register = snd_timer_dev_register, - .dev_unregister = snd_timer_dev_unregister + .dev_disconnect = snd_timer_dev_disconnect, }; snd_assert(tid != NULL, return -EINVAL); @@ -813,6 +813,21 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid, static int snd_timer_free(struct snd_timer *timer) { snd_assert(timer != NULL, return -ENXIO); + + mutex_lock(®ister_mutex); + if (! list_empty(&timer->open_list_head)) { + struct list_head *p, *n; + struct snd_timer_instance *ti; + snd_printk(KERN_WARNING "timer %p is busy?\n", timer); + list_for_each_safe(p, n, &timer->open_list_head) { + list_del_init(p); + ti = list_entry(p, struct snd_timer_instance, open_list); + ti->timer = NULL; + } + } + list_del(&timer->device_list); + mutex_unlock(®ister_mutex); + if (timer->private_free) timer->private_free(timer); kfree(timer); @@ -867,30 +882,13 @@ static int snd_timer_dev_register(struct snd_device *dev) return 0; } -static int snd_timer_unregister(struct snd_timer *timer) -{ - struct list_head *p, *n; - struct snd_timer_instance *ti; - - snd_assert(timer != NULL, return -ENXIO); - mutex_lock(®ister_mutex); - if (! list_empty(&timer->open_list_head)) { - snd_printk(KERN_WARNING "timer 0x%lx is busy?\n", (long)timer); - list_for_each_safe(p, n, &timer->open_list_head) { - list_del_init(p); - ti = list_entry(p, struct snd_timer_instance, open_list); - ti->timer = NULL; - } - } - list_del(&timer->device_list); - mutex_unlock(®ister_mutex); - return snd_timer_free(timer); -} - -static int snd_timer_dev_unregister(struct snd_device *device) +static int snd_timer_dev_disconnect(struct snd_device *device) { struct snd_timer *timer = device->device_data; - return snd_timer_unregister(timer); + mutex_lock(®ister_mutex); + list_del_init(&timer->device_list); + mutex_unlock(®ister_mutex); + return 0; } void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstamp) @@ -955,11 +953,6 @@ int snd_timer_global_register(struct snd_timer *timer) return snd_timer_dev_register(&dev); } -int snd_timer_global_unregister(struct snd_timer *timer) -{ - return snd_timer_unregister(timer); -} - /* * System timer */ @@ -1982,7 +1975,7 @@ static void __exit alsa_timer_exit(void) /* unregister the system timer */ list_for_each_safe(p, n, &snd_timer_list) { struct snd_timer *timer = list_entry(p, struct snd_timer, device_list); - snd_timer_unregister(timer); + snd_timer_free(timer); } snd_timer_proc_done(); #ifdef SNDRV_OSS_INFO_DEV_TIMERS @@ -2005,5 +1998,4 @@ EXPORT_SYMBOL(snd_timer_notify); EXPORT_SYMBOL(snd_timer_global_new); EXPORT_SYMBOL(snd_timer_global_free); EXPORT_SYMBOL(snd_timer_global_register); -EXPORT_SYMBOL(snd_timer_global_unregister); EXPORT_SYMBOL(snd_timer_interrupt); diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 51e83d7a839a..b35280ca2465 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1817,13 +1817,13 @@ static int snd_ac97_dev_register(struct snd_device *device) return 0; } -/* unregister ac97 codec */ -static int snd_ac97_dev_unregister(struct snd_device *device) +/* disconnect ac97 codec */ +static int snd_ac97_dev_disconnect(struct snd_device *device) { struct snd_ac97 *ac97 = device->device_data; if (ac97->dev.bus) device_unregister(&ac97->dev); - return snd_ac97_free(ac97); + return 0; } /* build_ops to do nothing */ @@ -1860,7 +1860,7 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template, static struct snd_device_ops ops = { .dev_free = snd_ac97_dev_free, .dev_register = snd_ac97_dev_register, - .dev_unregister = snd_ac97_dev_unregister, + .dev_disconnect = snd_ac97_dev_disconnect, }; snd_assert(rac97 != NULL, return -EINVAL); diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 1b7f499c549d..314431385913 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -3499,7 +3499,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr) } usb_chip[chip->index] = NULL; mutex_unlock(®ister_mutex); - snd_card_free(card); + snd_card_free_when_closed(card); } else { mutex_unlock(®ister_mutex); } From 2b29b13c5794f648cd5e839796496704d787f5a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Jun 2006 14:38:26 +0200 Subject: [PATCH 0883/1063] [ALSA] Deprecate snd_card_free_in_thread() Deprecated snd_card_free_in_thread(), replaced with snd_card_free_when_closed(). Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../alsa/DocBook/writing-an-alsa-driver.tmpl | 5 +- include/sound/core.h | 3 - sound/core/init.c | 56 +------------------ sound/drivers/mpu401/mpu401.c | 2 +- sound/pcmcia/pdaudiocf/pdaudiocf.c | 2 +- sound/pcmcia/vx/vxpocket.c | 4 +- 6 files changed, 9 insertions(+), 63 deletions(-) diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index b8dc51ca776c..4807ef79a94d 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -1054,9 +1054,8 @@ For a device which allows hotplugging, you can use - snd_card_free_in_thread. This one will - postpone the destruction and wait in a kernel-thread until all - devices are closed. + snd_card_free_when_closed. This one will + postpone the destruction until all devices are closed. diff --git a/include/sound/core.h b/include/sound/core.h index cf4001cf6248..1359c532b68e 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -25,7 +25,6 @@ #include /* wake_up() */ #include /* struct mutex */ #include /* struct rw_semaphore */ -#include /* struct workqueue_struct */ #include /* pm_message_t */ /* forward declarations */ @@ -132,7 +131,6 @@ struct snd_card { int shutdown; /* this card is going down */ int free_on_last_close; /* free in context of file_release */ wait_queue_head_t shutdown_sleep; - struct work_struct free_workq; /* for free in workqueue */ struct device *dev; #ifdef CONFIG_PM @@ -245,7 +243,6 @@ struct snd_card *snd_card_new(int idx, const char *id, int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); -int snd_card_free_in_thread(struct snd_card *card); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); int snd_card_info_done(void); diff --git a/sound/core/init.c b/sound/core/init.c index 5850d99d21e3..d7607a25acdf 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -81,8 +81,6 @@ static inline int init_info_for_card(struct snd_card *card) #define init_info_for_card(card) #endif -static void snd_card_free_thread(void * __card); - /** * snd_card_new - create and initialize a soundcard structure * @idx: card index (address) [0 ... (SNDRV_CARDS-1)] @@ -145,7 +143,6 @@ struct snd_card *snd_card_new(int idx, const char *xid, INIT_LIST_HEAD(&card->ctl_files); spin_lock_init(&card->files_lock); init_waitqueue_head(&card->shutdown_sleep); - INIT_WORK(&card->free_workq, snd_card_free_thread, card); #ifdef CONFIG_PM mutex_init(&card->power_lock); init_waitqueue_head(&card->power_sleep); @@ -413,53 +410,6 @@ int snd_card_free(struct snd_card *card) EXPORT_SYMBOL(snd_card_free); -static void snd_card_free_thread(void * __card) -{ - struct snd_card *card = __card; - struct module * module = card->module; - - if (!try_module_get(module)) { - snd_printk(KERN_ERR "unable to lock toplevel module for card %i in free thread\n", card->number); - module = NULL; - } - - snd_card_free(card); - - module_put(module); -} - -/** - * snd_card_free_in_thread - call snd_card_free() in thread - * @card: soundcard structure - * - * This function schedules the call of snd_card_free() function in a - * work queue. When all devices are released (non-busy), the work - * is woken up and calls snd_card_free(). - * - * When a card can be disconnected at any time by hotplug service, - * this function should be used in disconnect (or detach) callback - * instead of calling snd_card_free() directly. - * - * Returns - zero otherwise a negative error code if the start of thread failed. - */ -int snd_card_free_in_thread(struct snd_card *card) -{ - if (card->files == NULL) { - snd_card_free(card); - return 0; - } - - if (schedule_work(&card->free_workq)) - return 0; - - snd_printk(KERN_ERR "schedule_work() failed in snd_card_free_in_thread for card %i\n", card->number); - /* try to free the structure immediately */ - snd_card_free(card); - return -EFAULT; -} - -EXPORT_SYMBOL(snd_card_free_in_thread); - static void choose_default_id(struct snd_card *card) { int i, len, idx_flag = 0, loops = SNDRV_CARDS; @@ -742,9 +692,9 @@ EXPORT_SYMBOL(snd_card_file_add); * * This function removes the file formerly added to the card via * snd_card_file_add() function. - * If all files are removed and the release of the card is - * scheduled, it will wake up the the thread to call snd_card_free() - * (see snd_card_free_in_thread() function). + * If all files are removed and snd_card_free_when_closed() was + * called beforehand, it processes the pending release of + * resources. * * Returns zero or a negative error code. */ diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c index 17cc105b26fc..2de181ad0b05 100644 --- a/sound/drivers/mpu401/mpu401.c +++ b/sound/drivers/mpu401/mpu401.c @@ -211,7 +211,7 @@ static void __devexit snd_mpu401_pnp_remove(struct pnp_dev *dev) struct snd_card *card = (struct snd_card *) pnp_get_drvdata(dev); snd_card_disconnect(card); - snd_card_free_in_thread(card); + snd_card_free_when_closed(card); } static struct pnp_driver snd_mpu401_pnp_driver = { diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 1c09e5f49da8..fd3590fcaedb 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -206,7 +206,7 @@ static void snd_pdacf_detach(struct pcmcia_device *link) snd_pdacf_powerdown(chip); chip->chip_status |= PDAUDIOCF_STAT_IS_STALE; /* to be sure */ snd_card_disconnect(chip->card); - snd_card_free_in_thread(chip->card); + snd_card_free_when_closed(chip->card); } /* diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index cafe6640cc1a..76c85cffb40e 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -65,7 +65,7 @@ static void vxpocket_release(struct pcmcia_device *link) } /* - * destructor, called from snd_card_free_in_thread() + * destructor, called from snd_card_free_when_closed() */ static int snd_vxpocket_dev_free(struct snd_device *device) { @@ -363,7 +363,7 @@ static void vxpocket_detach(struct pcmcia_device *link) chip->chip_status |= VX_STAT_IS_STALE; /* to be sure */ snd_card_disconnect(chip->card); vxpocket_release(link); - snd_card_free_in_thread(chip->card); + snd_card_free_when_closed(chip->card); } /* From 6dbe662874ba08585eaf732d126762c25ac8e3f7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 27 Jun 2006 18:28:53 +0200 Subject: [PATCH 0884/1063] [ALSA] Add experimental support of aggressive AC97 power-saving mode Added CONFIG_SND_AC97_POWER_SAVE kernel config to enable the support of aggressive AC97 power-saving mode. In this mode, the AC97 powerdown register bits are dynamically controlled at each open/close of PCM streams. The mode is activated via power_save option for snd-ac97-codec driver. As default it's off. It can be turned on/off on the fly via sysfs, too. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/ac97_codec.h | 32 +++++ sound/drivers/Kconfig | 13 ++ sound/pci/ac97/ac97_codec.c | 264 +++++++++++++++++++++++++++++++----- sound/pci/ac97/ac97_pcm.c | 18 +++ sound/pci/intel8x0.c | 14 +- sound/pci/via82xx.c | 13 +- 6 files changed, 315 insertions(+), 39 deletions(-) diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h index 758f8bf133c7..4c43521cc493 100644 --- a/include/sound/ac97_codec.h +++ b/include/sound/ac97_codec.h @@ -27,6 +27,7 @@ #include #include +#include #include "pcm.h" #include "control.h" #include "info.h" @@ -140,6 +141,20 @@ #define AC97_GP_DRSS_1011 0x0000 /* LR(C) 10+11(+12) */ #define AC97_GP_DRSS_78 0x0400 /* LR 7+8 */ +/* powerdown bits */ +#define AC97_PD_ADC_STATUS 0x0001 /* ADC status (RO) */ +#define AC97_PD_DAC_STATUS 0x0002 /* DAC status (RO) */ +#define AC97_PD_MIXER_STATUS 0x0004 /* Analog mixer status (RO) */ +#define AC97_PD_VREF_STATUS 0x0008 /* Vref status (RO) */ +#define AC97_PD_PR0 0x0100 /* Power down PCM ADCs and input MUX */ +#define AC97_PD_PR1 0x0200 /* Power down PCM front DAC */ +#define AC97_PD_PR2 0x0400 /* Power down Mixer (Vref still on) */ +#define AC97_PD_PR3 0x0800 /* Power down Mixer (Vref off) */ +#define AC97_PD_PR4 0x1000 /* Power down AC-Link */ +#define AC97_PD_PR5 0x2000 /* Disable internal clock usage */ +#define AC97_PD_PR6 0x4000 /* Headphone amplifier */ +#define AC97_PD_EAPD 0x8000 /* External Amplifer Power Down (EAPD) */ + /* extended audio ID bit defines */ #define AC97_EI_VRA 0x0001 /* Variable bit rate supported */ #define AC97_EI_DRA 0x0002 /* Double rate supported */ @@ -359,6 +374,7 @@ #define AC97_SCAP_INV_EAPD (1<<7) /* inverted EAPD */ #define AC97_SCAP_DETECT_BY_VENDOR (1<<8) /* use vendor registers for read tests */ #define AC97_SCAP_NO_SPDIF (1<<9) /* don't build SPDIF controls */ +#define AC97_SCAP_EAPD_LED (1<<10) /* EAPD as mute LED */ /* ac97->flags */ #define AC97_HAS_PC_BEEP (1<<0) /* force PC Speaker usage */ @@ -491,6 +507,12 @@ struct snd_ac97 { /* jack-sharing info */ unsigned char indep_surround; unsigned char channel_mode; + +#ifdef CONFIG_SND_AC97_POWER_SAVE + unsigned int power_up; /* power states */ + struct workqueue_struct *power_workq; + struct work_struct power_work; +#endif struct device dev; }; @@ -532,6 +554,15 @@ unsigned short snd_ac97_read(struct snd_ac97 *ac97, unsigned short reg); void snd_ac97_write_cache(struct snd_ac97 *ac97, unsigned short reg, unsigned short value); int snd_ac97_update(struct snd_ac97 *ac97, unsigned short reg, unsigned short value); int snd_ac97_update_bits(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value); +#ifdef CONFIG_SND_AC97_POWER_SAVE +int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup); +#else +static inline int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, + int powerup) +{ + return 0; +} +#endif #ifdef CONFIG_PM void snd_ac97_suspend(struct snd_ac97 *ac97); void snd_ac97_resume(struct snd_ac97 *ac97); @@ -583,6 +614,7 @@ struct ac97_pcm { copy_flag: 1, /* lowlevel driver must fill all entries */ spdif: 1; /* spdif pcm */ unsigned short aslots; /* active slots */ + unsigned short cur_dbl; /* current double-rate state */ unsigned int rates; /* available rates */ struct { unsigned short slots; /* driver input: requested AC97 slot numbers */ diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 395c4ef52ac9..897dc2dfd7dd 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -100,4 +100,17 @@ config SND_MPU401 To compile this driver as a module, choose M here: the module will be called snd-mpu401. +config SND_AC97_POWER_SAVE + bool "AC97 Power-Saving Mode" + depends on SND_AC97_CODEC && EXPERIMENTAL + default n + help + Say Y here to enable the aggressive power-saving support of + AC97 codecs. In this mode, the power-mode is dynamically + controlled at each open/close. + + The mode is activated by passing power_save=1 option to + snd-ac97-codec driver. You can toggle it dynamically over + sysfs, too. + endmenu diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index b35280ca2465..f82c636e99a9 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -47,6 +47,11 @@ static int enable_loopback; module_param(enable_loopback, bool, 0444); MODULE_PARM_DESC(enable_loopback, "Enable AC97 ADC/DAC Loopback Control"); +#ifdef CONFIG_SND_AC97_POWER_SAVE +static int power_save; +module_param(power_save, bool, 0644); +MODULE_PARM_DESC(power_save, "Enable AC97 power-saving control"); +#endif /* */ @@ -187,6 +192,8 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = { }; +static void update_power_regs(struct snd_ac97 *ac97); + /* * I/O routines */ @@ -554,6 +561,18 @@ int snd_ac97_put_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value } err = snd_ac97_update_bits(ac97, reg, val_mask, val); snd_ac97_page_restore(ac97, page_save); +#ifdef CONFIG_SND_AC97_POWER_SAVE + /* check analog mixer power-down */ + if ((val_mask & 0x8000) && + (kcontrol->private_value & (1<<30))) { + if (val & 0x8000) + ac97->power_up &= ~(1 << (reg>>1)); + else + ac97->power_up |= 1 << (reg>>1); + if (power_save) + update_power_regs(ac97); + } +#endif return err; } @@ -962,6 +981,10 @@ static int snd_ac97_bus_dev_free(struct snd_device *device) static int snd_ac97_free(struct snd_ac97 *ac97) { if (ac97) { +#ifdef CONFIG_SND_AC97_POWER_SAVE + if (ac97->power_workq) + destroy_workqueue(ac97->power_workq); +#endif snd_ac97_proc_done(ac97); if (ac97->bus) ac97->bus->codec[ac97->num] = NULL; @@ -1117,7 +1140,9 @@ struct snd_kcontrol *snd_ac97_cnew(const struct snd_kcontrol_new *_template, str /* * create mute switch(es) for normal stereo controls */ -static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, int check_stereo, struct snd_ac97 *ac97) +static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, + int check_stereo, int check_amix, + struct snd_ac97 *ac97) { struct snd_kcontrol *kctl; int err; @@ -1137,10 +1162,14 @@ static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, } if (mute_mask == 0x8080) { struct snd_kcontrol_new tmp = AC97_DOUBLE(name, reg, 15, 7, 1, 1); + if (check_amix) + tmp.private_value |= (1 << 30); tmp.index = ac97->num; kctl = snd_ctl_new1(&tmp, ac97); } else { struct snd_kcontrol_new tmp = AC97_SINGLE(name, reg, 15, 1, 1); + if (check_amix) + tmp.private_value |= (1 << 30); tmp.index = ac97->num; kctl = snd_ctl_new1(&tmp, ac97); } @@ -1186,7 +1215,9 @@ static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigne /* * create a mute-switch and a volume for normal stereo/mono controls */ -static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int reg, int check_stereo, struct snd_ac97 *ac97) +static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, + int reg, int check_stereo, int check_amix, + struct snd_ac97 *ac97) { int err; char name[44]; @@ -1197,7 +1228,9 @@ static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int if (snd_ac97_try_bit(ac97, reg, 15)) { sprintf(name, "%s Switch", pfx); - if ((err = snd_ac97_cmute_new_stereo(card, name, reg, check_stereo, ac97)) < 0) + if ((err = snd_ac97_cmute_new_stereo(card, name, reg, + check_stereo, check_amix, + ac97)) < 0) return err; } check_volume_resolution(ac97, reg, &lo_max, &hi_max); @@ -1209,8 +1242,10 @@ static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int return 0; } -#define snd_ac97_cmix_new(card, pfx, reg, ac97) snd_ac97_cmix_new_stereo(card, pfx, reg, 0, ac97) -#define snd_ac97_cmute_new(card, name, reg, ac97) snd_ac97_cmute_new_stereo(card, name, reg, 0, ac97) +#define snd_ac97_cmix_new(card, pfx, reg, acheck, ac97) \ + snd_ac97_cmix_new_stereo(card, pfx, reg, 0, acheck, ac97) +#define snd_ac97_cmute_new(card, name, reg, acheck, ac97) \ + snd_ac97_cmute_new_stereo(card, name, reg, 0, acheck, ac97) static unsigned int snd_ac97_determine_spdif_rates(struct snd_ac97 *ac97); @@ -1226,9 +1261,11 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* AD claims to remove this control from AD1887, although spec v2.2 does not allow this */ if (snd_ac97_try_volume_mix(ac97, AC97_MASTER)) { if (ac97->flags & AC97_HAS_NO_MASTER_VOL) - err = snd_ac97_cmute_new(card, "Master Playback Switch", AC97_MASTER, ac97); + err = snd_ac97_cmute_new(card, "Master Playback Switch", + AC97_MASTER, 0, ac97); else - err = snd_ac97_cmix_new(card, "Master Playback", AC97_MASTER, ac97); + err = snd_ac97_cmix_new(card, "Master Playback", + AC97_MASTER, 0, ac97); if (err < 0) return err; } @@ -1265,19 +1302,23 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) if ((snd_ac97_try_volume_mix(ac97, AC97_SURROUND_MASTER)) && !(ac97->flags & AC97_AD_MULTI)) { /* Surround Master (0x38) is with stereo mutes */ - if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback", AC97_SURROUND_MASTER, 1, ac97)) < 0) + if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback", + AC97_SURROUND_MASTER, 1, 0, + ac97)) < 0) return err; } /* build headphone controls */ if (snd_ac97_try_volume_mix(ac97, AC97_HEADPHONE)) { - if ((err = snd_ac97_cmix_new(card, "Headphone Playback", AC97_HEADPHONE, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Headphone Playback", + AC97_HEADPHONE, 0, ac97)) < 0) return err; } /* build master mono controls */ if (snd_ac97_try_volume_mix(ac97, AC97_MASTER_MONO)) { - if ((err = snd_ac97_cmix_new(card, "Master Mono Playback", AC97_MASTER_MONO, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Master Mono Playback", + AC97_MASTER_MONO, 0, ac97)) < 0) return err; } @@ -1310,7 +1351,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* build Phone controls */ if (!(ac97->flags & AC97_HAS_NO_PHONE)) { if (snd_ac97_try_volume_mix(ac97, AC97_PHONE)) { - if ((err = snd_ac97_cmix_new(card, "Phone Playback", AC97_PHONE, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Phone Playback", + AC97_PHONE, 1, ac97)) < 0) return err; } } @@ -1318,7 +1360,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* build MIC controls */ if (!(ac97->flags & AC97_HAS_NO_MIC)) { if (snd_ac97_try_volume_mix(ac97, AC97_MIC)) { - if ((err = snd_ac97_cmix_new(card, "Mic Playback", AC97_MIC, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Mic Playback", + AC97_MIC, 1, ac97)) < 0) return err; if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_boost, ac97))) < 0) return err; @@ -1327,14 +1370,16 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* build Line controls */ if (snd_ac97_try_volume_mix(ac97, AC97_LINE)) { - if ((err = snd_ac97_cmix_new(card, "Line Playback", AC97_LINE, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Line Playback", + AC97_LINE, 1, ac97)) < 0) return err; } /* build CD controls */ if (!(ac97->flags & AC97_HAS_NO_CD)) { if (snd_ac97_try_volume_mix(ac97, AC97_CD)) { - if ((err = snd_ac97_cmix_new(card, "CD Playback", AC97_CD, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "CD Playback", + AC97_CD, 1, ac97)) < 0) return err; } } @@ -1342,7 +1387,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* build Video controls */ if (!(ac97->flags & AC97_HAS_NO_VIDEO)) { if (snd_ac97_try_volume_mix(ac97, AC97_VIDEO)) { - if ((err = snd_ac97_cmix_new(card, "Video Playback", AC97_VIDEO, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Video Playback", + AC97_VIDEO, 1, ac97)) < 0) return err; } } @@ -1350,7 +1396,8 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) /* build Aux controls */ if (!(ac97->flags & AC97_HAS_NO_AUX)) { if (snd_ac97_try_volume_mix(ac97, AC97_AUX)) { - if ((err = snd_ac97_cmix_new(card, "Aux Playback", AC97_AUX, ac97)) < 0) + if ((err = snd_ac97_cmix_new(card, "Aux Playback", + AC97_AUX, 1, ac97)) < 0) return err; } } @@ -1385,9 +1432,12 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) } else { if (!(ac97->flags & AC97_HAS_NO_STD_PCM)) { if (ac97->flags & AC97_HAS_NO_PCM_VOL) - err = snd_ac97_cmute_new(card, "PCM Playback Switch", AC97_PCM, ac97); + err = snd_ac97_cmute_new(card, + "PCM Playback Switch", + AC97_PCM, 0, ac97); else - err = snd_ac97_cmix_new(card, "PCM Playback", AC97_PCM, ac97); + err = snd_ac97_cmix_new(card, "PCM Playback", + AC97_PCM, 0, ac97); if (err < 0) return err; } @@ -1398,7 +1448,9 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_src, ac97))) < 0) return err; if (snd_ac97_try_bit(ac97, AC97_REC_GAIN, 15)) { - if ((err = snd_ac97_cmute_new(card, "Capture Switch", AC97_REC_GAIN, ac97)) < 0) + err = snd_ac97_cmute_new(card, "Capture Switch", + AC97_REC_GAIN, 0, ac97); + if (err < 0) return err; } if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0) @@ -1829,6 +1881,13 @@ static int snd_ac97_dev_disconnect(struct snd_device *device) /* build_ops to do nothing */ static struct snd_ac97_build_ops null_build_ops; +#ifdef CONFIG_SND_AC97_POWER_SAVE +static void do_update_power(void *data) +{ + update_power_regs(data); +} +#endif + /** * snd_ac97_mixer - create an Codec97 component * @bus: the AC97 bus which codec is attached to @@ -1883,6 +1942,10 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template, bus->codec[ac97->num] = ac97; mutex_init(&ac97->reg_mutex); mutex_init(&ac97->page_mutex); +#ifdef CONFIG_SND_AC97_POWER_SAVE + ac97->power_workq = create_workqueue("ac97"); + INIT_WORK(&ac97->power_work, do_update_power, ac97); +#endif #ifdef CONFIG_PCI if (ac97->pci) { @@ -2117,15 +2180,8 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template, return -ENOMEM; } } - /* make sure the proper powerdown bits are cleared */ - if (ac97->scaps && ac97_is_audio(ac97)) { - reg = snd_ac97_read(ac97, AC97_EXTENDED_STATUS); - if (ac97->scaps & AC97_SCAP_SURROUND_DAC) - reg &= ~AC97_EA_PRJ; - if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) - reg &= ~(AC97_EA_PRI | AC97_EA_PRK); - snd_ac97_write_cache(ac97, AC97_EXTENDED_STATUS, reg); - } + if (ac97_is_audio(ac97)) + update_power_regs(ac97); snd_ac97_proc_init(ac97); if ((err = snd_device_new(card, SNDRV_DEV_CODEC, ac97, &ops)) < 0) { snd_ac97_free(ac97); @@ -2153,22 +2209,155 @@ static void snd_ac97_powerdown(struct snd_ac97 *ac97) snd_ac97_write(ac97, AC97_HEADPHONE, 0x9f9f); } - power = ac97->regs[AC97_POWERDOWN] | 0x8000; /* EAPD */ - power |= 0x4000; /* Headphone amplifier powerdown */ - power |= 0x0300; /* ADC & DAC powerdown */ + /* surround, CLFE, mic powerdown */ + power = ac97->regs[AC97_EXTENDED_STATUS]; + if (ac97->scaps & AC97_SCAP_SURROUND_DAC) + power |= AC97_EA_PRJ; + if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) + power |= AC97_EA_PRI | AC97_EA_PRK; + power |= AC97_EA_PRL; + snd_ac97_write(ac97, AC97_EXTENDED_STATUS, power); + + /* powerdown external amplifier */ + if (ac97->scaps & AC97_SCAP_INV_EAPD) + power = ac97->regs[AC97_POWERDOWN] & ~AC97_PD_EAPD; + else if (! (ac97->scaps & AC97_SCAP_EAPD_LED)) + power = ac97->regs[AC97_POWERDOWN] | AC97_PD_EAPD; + power |= AC97_PD_PR6; /* Headphone amplifier powerdown */ + power |= AC97_PD_PR0 | AC97_PD_PR1; /* ADC & DAC powerdown */ snd_ac97_write(ac97, AC97_POWERDOWN, power); udelay(100); - power |= 0x0400; /* Analog Mixer powerdown (Vref on) */ - snd_ac97_write(ac97, AC97_POWERDOWN, power); - udelay(100); -#if 0 - /* FIXME: this causes click noises on some boards at resume */ - power |= 0x3800; /* AC-link powerdown, internal Clk disable */ + power |= AC97_PD_PR2 | AC97_PD_PR3; /* Analog Mixer powerdown */ snd_ac97_write(ac97, AC97_POWERDOWN, power); +#ifdef CONFIG_SND_AC97_POWER_SAVE + if (power_save) { + udelay(100); + /* AC-link powerdown, internal Clk disable */ + /* FIXME: this may cause click noises on some boards */ + power |= AC97_PD_PR4 | AC97_PD_PR5; + snd_ac97_write(ac97, AC97_POWERDOWN, power); + } #endif } +struct ac97_power_reg { + unsigned short reg; + unsigned short power_reg; + unsigned short mask; +}; + +enum { PWIDX_ADC, PWIDX_FRONT, PWIDX_CLFE, PWIDX_SURR, PWIDX_MIC, PWIDX_SIZE }; + +static struct ac97_power_reg power_regs[PWIDX_SIZE] = { + [PWIDX_ADC] = { AC97_PCM_LR_ADC_RATE, AC97_POWERDOWN, AC97_PD_PR0}, + [PWIDX_FRONT] = { AC97_PCM_FRONT_DAC_RATE, AC97_POWERDOWN, AC97_PD_PR1}, + [PWIDX_CLFE] = { AC97_PCM_LFE_DAC_RATE, AC97_EXTENDED_STATUS, + AC97_EA_PRI | AC97_EA_PRK}, + [PWIDX_SURR] = { AC97_PCM_SURR_DAC_RATE, AC97_EXTENDED_STATUS, + AC97_EA_PRJ}, + [PWIDX_MIC] = { AC97_PCM_MIC_ADC_RATE, AC97_EXTENDED_STATUS, + AC97_EA_PRL}, +}; + +#ifdef CONFIG_SND_AC97_POWER_SAVE +/** + * snd_ac97_update_power - update the powerdown register + * @ac97: the codec instance + * @reg: the rate register, e.g. AC97_PCM_FRONT_DAC_RATE + * @powerup: non-zero when power up the part + * + * Update the AC97 powerdown register bits of the given part. + */ +int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup) +{ + int i; + + if (! ac97) + return 0; + + if (reg) { + /* SPDIF requires DAC power, too */ + if (reg == AC97_SPDIF) + reg = AC97_PCM_FRONT_DAC_RATE; + for (i = 0; i < PWIDX_SIZE; i++) { + if (power_regs[i].reg == reg) { + if (powerup) + ac97->power_up |= (1 << i); + else + ac97->power_up &= ~(1 << i); + break; + } + } + } + + if (! power_save) + return 0; + + if (! powerup && ac97->power_workq) + /* adjust power-down bits after two seconds delay + * (for avoiding loud click noises for many (OSS) apps + * that open/close frequently) + */ + queue_delayed_work(ac97->power_workq, &ac97->power_work, HZ*2); + else + update_power_regs(ac97); + + return 0; +} + +EXPORT_SYMBOL(snd_ac97_update_power); +#endif /* CONFIG_SND_AC97_POWER_SAVE */ + +static void update_power_regs(struct snd_ac97 *ac97) +{ + unsigned int power_up, bits; + int i; + +#ifdef CONFIG_SND_AC97_POWER_SAVE + if (power_save) + power_up = ac97->power_up; + else { +#endif + power_up = (1 << PWIDX_FRONT) | (1 << PWIDX_ADC); + power_up |= (1 << PWIDX_MIC); + if (ac97->scaps & AC97_SCAP_SURROUND_DAC) + power_up |= (1 << PWIDX_SURR); + if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) + power_up |= (1 << PWIDX_CLFE); +#ifdef CONFIG_SND_AC97_POWER_SAVE + } +#endif + if (power_up) { + if (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2) { + /* needs power-up analog mix and vref */ + snd_ac97_update_bits(ac97, AC97_POWERDOWN, + AC97_PD_PR3, 0); + msleep(1); + snd_ac97_update_bits(ac97, AC97_POWERDOWN, + AC97_PD_PR2, 0); + } + } + for (i = 0; i < PWIDX_SIZE; i++) { + if (power_up & (1 << i)) + bits = 0; + else + bits = power_regs[i].mask; + snd_ac97_update_bits(ac97, power_regs[i].power_reg, + power_regs[i].mask, bits); + } + if (! power_up) { + if (! (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2)) { + /* power down analog mix and vref */ + snd_ac97_update_bits(ac97, AC97_POWERDOWN, + AC97_PD_PR2, AC97_PD_PR2); + snd_ac97_update_bits(ac97, AC97_POWERDOWN, + AC97_PD_PR3, AC97_PD_PR3); + } + } +} + + #ifdef CONFIG_PM /** * snd_ac97_suspend - General suspend function for AC97 codec @@ -2484,6 +2673,7 @@ static int tune_mute_led(struct snd_ac97 *ac97) msw->put = master_mute_sw_put; snd_ac97_remove_ctl(ac97, "External Amplifier", NULL); snd_ac97_update_bits(ac97, AC97_POWERDOWN, 0x8000, 0x8000); /* mute LED on */ + ac97->scaps |= AC97_SCAP_EAPD_LED; return 0; } diff --git a/sound/pci/ac97/ac97_pcm.c b/sound/pci/ac97/ac97_pcm.c index f684aa2c0067..3758d07182f8 100644 --- a/sound/pci/ac97/ac97_pcm.c +++ b/sound/pci/ac97/ac97_pcm.c @@ -269,6 +269,7 @@ int snd_ac97_set_rate(struct snd_ac97 *ac97, int reg, unsigned int rate) return -EINVAL; } + snd_ac97_update_power(ac97, reg, 1); switch (reg) { case AC97_PCM_MIC_ADC_RATE: if ((ac97->regs[AC97_EXTENDED_STATUS] & AC97_EA_VRM) == 0) /* MIC VRA */ @@ -606,6 +607,7 @@ int snd_ac97_pcm_open(struct ac97_pcm *pcm, unsigned int rate, goto error; } } + pcm->cur_dbl = r; spin_unlock_irq(&pcm->bus->bus_lock); for (i = 3; i < 12; i++) { if (!(slots & (1 << i))) @@ -651,6 +653,21 @@ int snd_ac97_pcm_close(struct ac97_pcm *pcm) unsigned short slots = pcm->aslots; int i, cidx; +#ifdef CONFIG_SND_AC97_POWER_SAVE + int r = pcm->cur_dbl; + for (i = 3; i < 12; i++) { + if (!(slots & (1 << i))) + continue; + for (cidx = 0; cidx < 4; cidx++) { + if (pcm->r[r].rslots[cidx] & (1 << i)) { + int reg = get_slot_reg(pcm, cidx, i, r); + snd_ac97_update_power(pcm->r[r].codec[cidx], + reg, 0); + } + } + } +#endif + bus = pcm->bus; spin_lock_irq(&pcm->bus->bus_lock); for (i = 3; i < 12; i++) { @@ -660,6 +677,7 @@ int snd_ac97_pcm_close(struct ac97_pcm *pcm) bus->used_slots[pcm->stream][cidx] &= ~(1 << i); } pcm->aslots = 0; + pcm->cur_dbl = 0; spin_unlock_irq(&pcm->bus->bus_lock); return 0; } diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 6874263f1681..72dbaedcbdf5 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -2251,6 +2251,16 @@ static int snd_intel8x0_ich_chip_init(struct intel8x0 *chip, int probing) /* ACLink on, 2 channels */ cnt = igetdword(chip, ICHREG(GLOB_CNT)); cnt &= ~(ICH_ACLINK | ICH_PCM_246_MASK); +#ifdef CONFIG_SND_AC97_POWER_SAVE + /* do cold reset - the full ac97 powerdown may leave the controller + * in a warm state but actually it cannot communicate with the codec. + */ + iputdword(chip, ICHREG(GLOB_CNT), cnt & ~ICH_AC97COLD); + cnt = igetdword(chip, ICHREG(GLOB_CNT)); + udelay(10); + iputdword(chip, ICHREG(GLOB_CNT), cnt | ICH_AC97COLD); + msleep(1); +#else /* finish cold or do warm reset */ cnt |= (cnt & ICH_AC97COLD) == 0 ? ICH_AC97COLD : ICH_AC97WARM; iputdword(chip, ICHREG(GLOB_CNT), cnt); @@ -2265,6 +2275,7 @@ static int snd_intel8x0_ich_chip_init(struct intel8x0 *chip, int probing) return -EIO; __ok: +#endif if (probing) { /* wait for any codec ready status. * Once it becomes ready it should remain ready @@ -2485,7 +2496,7 @@ static int intel8x0_resume(struct pci_dev *pci) card->shortname, chip); chip->irq = pci->irq; synchronize_irq(chip->irq); - snd_intel8x0_chip_init(chip, 1); + snd_intel8x0_chip_init(chip, 0); /* re-initialize mixer stuff */ if (chip->device_type == DEVICE_INTEL_ICH4) { @@ -2615,6 +2626,7 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip) /* not 48000Hz, tuning the clock.. */ chip->ac97_bus->clock = (chip->ac97_bus->clock * 48000) / pos; printk(KERN_INFO "intel8x0: clocking to %d\n", chip->ac97_bus->clock); + snd_ac97_update_power(chip->ac97[0], AC97_PCM_FRONT_DAC_RATE, 0); } #ifdef CONFIG_PROC_FS diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 08da9234efb3..2c23a665c3e3 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -1277,7 +1277,18 @@ static int snd_via82xx_pcm_close(struct snd_pcm_substream *substream) if (! ratep->used) ratep->rate = 0; spin_unlock_irq(&ratep->lock); - + if (! ratep->rate) { + if (! viadev->direction) { + snd_ac97_update_power(chip->ac97, + AC97_PCM_FRONT_DAC_RATE, 0); + snd_ac97_update_power(chip->ac97, + AC97_PCM_SURR_DAC_RATE, 0); + snd_ac97_update_power(chip->ac97, + AC97_PCM_LFE_DAC_RATE, 0); + } else + snd_ac97_update_power(chip->ac97, + AC97_PCM_LR_ADC_RATE, 0); + } viadev->substream = NULL; return 0; } From 82466ad76d60c35bf1c48ba1b9c98c35d82fc385 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 29 Jun 2006 17:15:33 +0200 Subject: [PATCH 0885/1063] [ALSA] add codec-specific controls for UCB1400 This patch adds some codec-specific controls for Philips UCB1400 codec. Signed-off-by: Mike Rapoport Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_codec.c | 2 +- sound/pci/ac97/ac97_patch.c | 38 +++++++++++++++++++++++++++++++++++++ sound/pci/ac97/ac97_patch.h | 1 + 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index f82c636e99a9..e5d062d640df 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -156,7 +156,7 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = { { 0x4e534300, 0xffffffff, "LM4540,43,45,46,48", NULL, NULL }, // only guess --jk { 0x4e534331, 0xffffffff, "LM4549", NULL, NULL }, { 0x4e534350, 0xffffffff, "LM4550", patch_lm4550, NULL }, // volume wrap fix -{ 0x50534304, 0xffffffff, "UCB1400", NULL, NULL }, +{ 0x50534304, 0xffffffff, "UCB1400", patch_ucb1400, NULL }, { 0x53494c20, 0xffffffe0, "Si3036,8", mpatch_si3036, mpatch_si3036, AC97_MODEM_PATCH }, { 0x54524102, 0xffffffff, "TR28022", NULL, NULL }, { 0x54524106, 0xffffffff, "TR28026", NULL, NULL }, diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 094cfc1f3a19..5267b006c5c8 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -2872,3 +2872,41 @@ int patch_lm4550(struct snd_ac97 *ac97) ac97->res_table = lm4550_restbl; return 0; } + +/* + * UCB1400 codec (http://www.semiconductors.philips.com/acrobat_download/datasheets/UCB1400-02.pdf) + */ +static const struct snd_kcontrol_new snd_ac97_controls_ucb1400[] = { +/* enable/disable headphone driver which allows direct connection to + stereo headphone without the use of external DC blocking + capacitors */ +AC97_SINGLE("Headphone Driver", 0x6a, 6, 1, 0), +/* Filter used to compensate the DC offset is added in the ADC to remove idle + tones from the audio band. */ +AC97_SINGLE("DC Filter", 0x6a, 4, 1, 0), +/* Control smart-low-power mode feature. Allows automatic power down + of unused blocks in the ADC analog front end and the PLL. */ +AC97_SINGLE("Smart Low Power Mode", 0x6c, 4, 3, 0), +}; + +static int patch_ucb1400_specific(struct snd_ac97 * ac97) +{ + int idx, err; + for (idx = 0; idx < ARRAY_SIZE(snd_ac97_controls_ucb1400); idx++) + if ((err = snd_ctl_add(ac97->bus->card, snd_ctl_new1(&snd_ac97_controls_ucb1400[idx], ac97))) < 0) + return err; + return 0; +} + +static struct snd_ac97_build_ops patch_ucb1400_ops = { + .build_specific = patch_ucb1400_specific, +}; + +int patch_ucb1400(struct snd_ac97 * ac97) +{ + ac97->build_ops = &patch_ucb1400_ops; + /* enable headphone driver and smart low power mode by default */ + snd_ac97_write(ac97, 0x6a, 0x0050); + snd_ac97_write(ac97, 0x6c, 0x0030); + return 0; +} diff --git a/sound/pci/ac97/ac97_patch.h b/sound/pci/ac97/ac97_patch.h index adcaa04586cb..741979217207 100644 --- a/sound/pci/ac97/ac97_patch.h +++ b/sound/pci/ac97/ac97_patch.h @@ -58,5 +58,6 @@ int patch_cm9780(struct snd_ac97 * ac97); int patch_vt1616(struct snd_ac97 * ac97); int patch_vt1617a(struct snd_ac97 * ac97); int patch_it2646(struct snd_ac97 * ac97); +int patch_ucb1400(struct snd_ac97 * ac97); int mpatch_si3036(struct snd_ac97 * ac97); int patch_lm4550(struct snd_ac97 * ac97); From e0a5d82a966172c5f1dff6229d4a07be2222e8b3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Jul 2006 12:05:14 +0200 Subject: [PATCH 0886/1063] [ALSA] fm801: Support FM only card Signed-off-by: Andy Shevchenko Signed-off-by: Jaroslav Kysela --- sound/pci/fm801.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 13868c985126..a30257fa6dbc 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -2,6 +2,7 @@ * The driver for the ForteMedia FM801 based soundcards * Copyright (c) by Jaroslav Kysela * + * Support FM only card by Andy Shevchenko * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,6 +55,7 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card * * 1 = MediaForte 256-PCS * 2 = MediaForte 256-PCPR * 3 = MediaForte 64-PCR + * 16 = setup tuner only (this is additional bit), i.e. SF-64-PCR FM card * High 16-bits are video (radio) device number + 1 */ static int tea575x_tuner[SNDRV_CARDS]; @@ -1253,6 +1255,9 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume) int id; unsigned short cmdw; + if (tea575x_tuner & 0x0010) + goto __ac97_ok; + /* codec cold reset + AC'97 warm reset */ outw((1<<5) | (1<<6), FM801_REG(chip, CODEC_CTRL)); inw(FM801_REG(chip, CODEC_CTRL)); /* flush posting data */ @@ -1394,13 +1399,16 @@ static int __devinit snd_fm801_create(struct snd_card *card, snd_card_set_dev(card, &pci->dev); #ifdef TEA575X_RADIO - if (tea575x_tuner > 0 && (tea575x_tuner & 0xffff) < 4) { + if (tea575x_tuner > 0 && (tea575x_tuner & 0x000f) < 4) { chip->tea.dev_nr = tea575x_tuner >> 16; chip->tea.card = card; chip->tea.freq_fixup = 10700; chip->tea.private_data = chip; - chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0xffff) - 1]; + chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0x000f) - 1]; snd_tea575x_init(&chip->tea); + + /* Mute FM tuner */ + outw(0xf800, FM801_REG(chip, GPIO_CTRL)); } #endif @@ -1439,6 +1447,9 @@ static int __devinit snd_card_fm801_probe(struct pci_dev *pci, sprintf(card->longname, "%s at 0x%lx, irq %i", card->shortname, chip->port, chip->irq); + if (tea575x_tuner[dev] & 0x0010) + goto __fm801_tuner_only; + if ((err = snd_fm801_pcm(chip, 0, NULL)) < 0) { snd_card_free(card); return err; @@ -1465,6 +1476,7 @@ static int __devinit snd_card_fm801_probe(struct pci_dev *pci, return err; } + __fm801_tuner_only: if ((err = snd_card_register(card)) < 0) { snd_card_free(card); return err; From 6bbe13ecbbce4415a5a7959b3bc35b18313025e0 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 4 Jul 2006 13:39:55 +0200 Subject: [PATCH 0887/1063] [ALSA] fm801: fixed broken previous patch for the FM tuner only code - do not allocate and enable interrupt - do not do the FM tuner mute (it should be handled more cleanly) Signed-off-by: Jaroslav Kysela --- sound/pci/fm801.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index a30257fa6dbc..88a3e9f3224a 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -160,6 +160,7 @@ struct fm801 { unsigned int multichannel: 1, /* multichannel support */ secondary: 1; /* secondary codec */ unsigned char secondary_addr; /* address of the secondary codec */ + unsigned int tea575x_tuner; /* tuner flags */ unsigned short ply_ctrl; /* playback control */ unsigned short cap_ctrl; /* capture control */ @@ -1255,7 +1256,7 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume) int id; unsigned short cmdw; - if (tea575x_tuner & 0x0010) + if (chip->tea575x_tuner & 0x0010) goto __ac97_ok; /* codec cold reset + AC'97 warm reset */ @@ -1295,6 +1296,8 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume) wait_for_codec(chip, 0, AC97_VENDOR_ID1, msecs_to_jiffies(750)); } + __ac97_ok: + /* init volume */ outw(0x0808, FM801_REG(chip, PCM_VOL)); outw(0x9f1f, FM801_REG(chip, FM_VOL)); @@ -1303,9 +1306,12 @@ static int snd_fm801_chip_init(struct fm801 *chip, int resume) /* I2S control - I2S mode */ outw(0x0003, FM801_REG(chip, I2S_MODE)); - /* interrupt setup - unmask MPU, PLAYBACK & CAPTURE */ + /* interrupt setup */ cmdw = inw(FM801_REG(chip, IRQ_MASK)); - cmdw &= ~0x0083; + if (chip->irq < 0) + cmdw |= 0x00c3; /* mask everything, no PCM nor MPU */ + else + cmdw &= ~0x0083; /* unmask MPU, PLAYBACK & CAPTURE */ outw(cmdw, FM801_REG(chip, IRQ_MASK)); /* interrupt clear */ @@ -1370,20 +1376,23 @@ static int __devinit snd_fm801_create(struct snd_card *card, chip->card = card; chip->pci = pci; chip->irq = -1; + chip->tea575x_tuner = tea575x_tuner; if ((err = pci_request_regions(pci, "FM801")) < 0) { kfree(chip); pci_disable_device(pci); return err; } chip->port = pci_resource_start(pci, 0); - if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED, - "FM801", chip)) { - snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq); - snd_fm801_free(chip); - return -EBUSY; + if ((tea575x_tuner & 0x0010) == 0) { + if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED, + "FM801", chip)) { + snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq); + snd_fm801_free(chip); + return -EBUSY; + } + chip->irq = pci->irq; + pci_set_master(pci); } - chip->irq = pci->irq; - pci_set_master(pci); pci_read_config_byte(pci, PCI_REVISION_ID, &rev); if (rev >= 0xb1) /* FM801-AU */ @@ -1406,9 +1415,6 @@ static int __devinit snd_fm801_create(struct snd_card *card, chip->tea.private_data = chip; chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0x000f) - 1]; snd_tea575x_init(&chip->tea); - - /* Mute FM tuner */ - outw(0xf800, FM801_REG(chip, GPIO_CTRL)); } #endif From 8aa9b586e42099817163aba01d925c2660c4dbbe Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 5 Jul 2006 17:34:51 +0200 Subject: [PATCH 0888/1063] [ALSA] Control API - more robust TLV implementation - added callback option - added READ/WRITE/COMMAND flags to access member - added WRITE/COMMAND ioctls - added SNDRV_CTL_EVENT_MASK_TLV for TLV change notifications - added TLV support to ELEM_ADD ioctl Signed-off-by: Jaroslav Kysela --- include/sound/asound.h | 10 ++- include/sound/control.h | 16 ++++- sound/core/control.c | 139 +++++++++++++++++++++++++++++++--------- 3 files changed, 132 insertions(+), 33 deletions(-) diff --git a/include/sound/asound.h b/include/sound/asound.h index 76a20406bd18..c1621c650a9a 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -727,10 +727,15 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ -#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<2) /* when was control changed */ +#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<3) /* when was control changed */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) +#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND (1<<6) /* TLV command is possible */ #define SNDRV_CTL_ELEM_ACCESS_INACTIVE (1<<8) /* control does actually nothing, but may be updated */ #define SNDRV_CTL_ELEM_ACCESS_LOCK (1<<9) /* write lock */ #define SNDRV_CTL_ELEM_ACCESS_OWNER (1<<10) /* write lock owner */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */ #define SNDRV_CTL_ELEM_ACCESS_USER (1<<29) /* user space element */ #define SNDRV_CTL_ELEM_ACCESS_DINDIRECT (1<<30) /* indirect access for matrix dimensions in the info structure */ #define SNDRV_CTL_ELEM_ACCESS_INDIRECT (1<<31) /* indirect access for element value in the value structure */ @@ -838,6 +843,8 @@ enum { SNDRV_CTL_IOCTL_ELEM_REPLACE = _IOWR('U', 0x18, struct snd_ctl_elem_info), SNDRV_CTL_IOCTL_ELEM_REMOVE = _IOWR('U', 0x19, struct snd_ctl_elem_id), SNDRV_CTL_IOCTL_TLV_READ = _IOWR('U', 0x1a, struct snd_ctl_tlv), + SNDRV_CTL_IOCTL_TLV_WRITE = _IOWR('U', 0x1b, struct snd_ctl_tlv), + SNDRV_CTL_IOCTL_TLV_COMMAND = _IOWR('U', 0x1c, struct snd_ctl_tlv), SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE = _IOWR('U', 0x20, int), SNDRV_CTL_IOCTL_HWDEP_INFO = _IOR('U', 0x21, struct snd_hwdep_info), SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE = _IOR('U', 0x30, int), @@ -862,6 +869,7 @@ enum sndrv_ctl_event_type { #define SNDRV_CTL_EVENT_MASK_VALUE (1<<0) /* element value was changed */ #define SNDRV_CTL_EVENT_MASK_INFO (1<<1) /* element info was changed */ #define SNDRV_CTL_EVENT_MASK_ADD (1<<2) /* element was added */ +#define SNDRV_CTL_EVENT_MASK_TLV (1<<3) /* element TLV tree was changed */ #define SNDRV_CTL_EVENT_MASK_REMOVE (~0U) /* element was removed */ struct snd_ctl_event { diff --git a/include/sound/control.h b/include/sound/control.h index a93a58d0e688..e3905c5a0950 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -30,6 +30,11 @@ struct snd_kcontrol; typedef int (snd_kcontrol_info_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_info * uinfo); typedef int (snd_kcontrol_get_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol); typedef int (snd_kcontrol_put_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol); +typedef int (snd_kcontrol_tlv_rw_t)(struct snd_kcontrol *kcontrol, + int op_flag, /* 0=read,1=write,-1=command */ + unsigned int size, + unsigned int __user *tlv); + struct snd_kcontrol_new { snd_ctl_elem_iface_t iface; /* interface identifier */ @@ -42,7 +47,10 @@ struct snd_kcontrol_new { snd_kcontrol_info_t *info; snd_kcontrol_get_t *get; snd_kcontrol_put_t *put; - unsigned int *tlv; + union { + snd_kcontrol_tlv_rw_t *c; + unsigned int *p; + } tlv; unsigned long private_value; }; @@ -59,7 +67,11 @@ struct snd_kcontrol { snd_kcontrol_info_t *info; snd_kcontrol_get_t *get; snd_kcontrol_put_t *put; - unsigned int *tlv; + snd_kcontrol_tlv_rw_t *tlv_rw; + union { + snd_kcontrol_tlv_rw_t *c; + unsigned int *p; + } tlv; unsigned long private_value; void *private_data; void (*private_free)(struct snd_kcontrol *kcontrol); diff --git a/sound/core/control.c b/sound/core/control.c index f0c7272a2d48..31ad58154c06 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -236,12 +236,16 @@ struct snd_kcontrol *snd_ctl_new1(const struct snd_kcontrol_new *ncontrol, kctl.id.index = ncontrol->index; kctl.count = ncontrol->count ? ncontrol->count : 1; access = ncontrol->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE : - (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|SNDRV_CTL_ELEM_ACCESS_INACTIVE| - SNDRV_CTL_ELEM_ACCESS_DINDIRECT|SNDRV_CTL_ELEM_ACCESS_INDIRECT)); + (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE| + SNDRV_CTL_ELEM_ACCESS_INACTIVE| + SNDRV_CTL_ELEM_ACCESS_DINDIRECT| + SNDRV_CTL_ELEM_ACCESS_INDIRECT| + SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE| + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK)); kctl.info = ncontrol->info; kctl.get = ncontrol->get; kctl.put = ncontrol->put; - kctl.tlv = ncontrol->tlv; + kctl.tlv.p = ncontrol->tlv.p; kctl.private_value = ncontrol->private_value; kctl.private_data = private_data; return snd_ctl_new(&kctl, access); @@ -883,6 +887,8 @@ struct user_element { struct snd_ctl_elem_info info; void *elem_data; /* element data */ unsigned long elem_data_size; /* size of element data in bytes */ + void *tlv_data; /* TLV data */ + unsigned long tlv_data_size; /* TLV data size */ void *priv_data; /* private data (like strings for enumerated type) */ unsigned long priv_data_size; /* size of private data in bytes */ }; @@ -917,9 +923,46 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol, return change; } +static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, + int op_flag, + unsigned int size, + unsigned int __user *tlv) +{ + struct user_element *ue = kcontrol->private_data; + int change = 0; + void *new_data; + + if (op_flag > 0) { + if (size > 1024 * 128) /* sane value */ + return -EINVAL; + new_data = kmalloc(size, GFP_KERNEL); + if (new_data == NULL) + return -ENOMEM; + if (copy_from_user(new_data, tlv, size)) { + kfree(new_data); + return -EFAULT; + } + change = ue->tlv_data_size != size; + if (!change) + change = memcmp(ue->tlv_data, new_data, size); + kfree(ue->tlv_data); + ue->tlv_data = new_data; + ue->tlv_data_size = size; + } else { + if (size < ue->tlv_data_size) + return -ENOSPC; + if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size)) + return -EFAULT; + } + return change; +} + static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol) { - kfree(kcontrol->private_data); + struct user_element *ue = kcontrol->private_data; + if (ue->tlv_data) + kfree(ue->tlv_data); + kfree(ue); } static int snd_ctl_elem_add(struct snd_ctl_file *file, @@ -938,7 +981,8 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, return -EINVAL; access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE : (info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE| - SNDRV_CTL_ELEM_ACCESS_INACTIVE)); + SNDRV_CTL_ELEM_ACCESS_INACTIVE| + SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE)); info->id.numid = 0; memset(&kctl, 0, sizeof(kctl)); down_write(&card->controls_rwsem); @@ -964,6 +1008,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, kctl.get = snd_ctl_elem_user_get; if (access & SNDRV_CTL_ELEM_ACCESS_WRITE) kctl.put = snd_ctl_elem_user_put; + if (access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE) { + kctl.tlv.c = snd_ctl_elem_user_tlv; + access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; + } switch (info->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: private_size = sizeof(char); @@ -1068,38 +1116,65 @@ static int snd_ctl_subscribe_events(struct snd_ctl_file *file, int __user *ptr) return 0; } -static int snd_ctl_tlv_read(struct snd_card *card, - struct snd_ctl_tlv __user *_tlv) +static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, + struct snd_ctl_tlv __user *_tlv, + int op_flag) { + struct snd_card *card = file->card; struct snd_ctl_tlv tlv; struct snd_kcontrol *kctl; + struct snd_kcontrol_volatile *vd; unsigned int len; int err = 0; if (copy_from_user(&tlv, _tlv, sizeof(tlv))) return -EFAULT; - if (tlv.length < sizeof(unsigned int) * 3) - return -EINVAL; - down_read(&card->controls_rwsem); - kctl = snd_ctl_find_numid(card, tlv.numid); - if (kctl == NULL) { - err = -ENOENT; - goto __kctl_end; - } - if (kctl->tlv == NULL) { - err = -ENXIO; - goto __kctl_end; - } - len = kctl->tlv[1] + 2 * sizeof(unsigned int); - if (tlv.length < len) { - err = -ENOMEM; - goto __kctl_end; - } - if (copy_to_user(_tlv->tlv, kctl->tlv, len)) - err = -EFAULT; + if (tlv.length < sizeof(unsigned int) * 3) + return -EINVAL; + down_read(&card->controls_rwsem); + kctl = snd_ctl_find_numid(card, tlv.numid); + if (kctl == NULL) { + err = -ENOENT; + goto __kctl_end; + } + if (kctl->tlv.p == NULL) { + err = -ENXIO; + goto __kctl_end; + } + vd = &kctl->vd[tlv.numid - kctl->id.numid]; + if ((op_flag == 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) == 0) || + (op_flag > 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) == 0) || + (op_flag < 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND) == 0)) { + err = -ENXIO; + goto __kctl_end; + } + if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + if (file && vd->owner != NULL && vd->owner != file) { + err = -EPERM; + goto __kctl_end; + } + err = kctl->tlv.c(kctl, op_flag, tlv.length, _tlv->tlv); + if (err > 0) { + up_read(&card->controls_rwsem); + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_TLV, &kctl->id); + return 0; + } + } else { + if (op_flag) { + err = -ENXIO; + goto __kctl_end; + } + len = kctl->tlv.p[1] + 2 * sizeof(unsigned int); + if (tlv.length < len) { + err = -ENOMEM; + goto __kctl_end; + } + if (copy_to_user(_tlv->tlv, kctl->tlv.p, len)) + err = -EFAULT; + } __kctl_end: - up_read(&card->controls_rwsem); - return err; + up_read(&card->controls_rwsem); + return err; } static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1140,8 +1215,12 @@ static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg return snd_ctl_elem_remove(ctl, argp); case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS: return snd_ctl_subscribe_events(ctl, ip); - case SNDRV_CTL_IOCTL_TLV_READ: - return snd_ctl_tlv_read(card, argp); + case SNDRV_CTL_IOCTL_TLV_READ: + return snd_ctl_tlv_ioctl(ctl, argp, 0); + case SNDRV_CTL_IOCTL_TLV_WRITE: + return snd_ctl_tlv_ioctl(ctl, argp, 1); + case SNDRV_CTL_IOCTL_TLV_COMMAND: + return snd_ctl_tlv_ioctl(ctl, argp, -1); case SNDRV_CTL_IOCTL_POWER: return -ENOPROTOOPT; case SNDRV_CTL_IOCTL_POWER_STATE: From 7f0e2f8bb851f5e0a2e0fef465b7b6f36c7aa7be Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 5 Jul 2006 17:39:14 +0200 Subject: [PATCH 0889/1063] [ALSA] HDA codec - little code & comment cleanup Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.h | 2 +- sound/pci/hda/hda_proc.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 40520e9d5a4b..c12bc4e8840f 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -479,7 +479,7 @@ struct hda_codec_ops { struct hda_amp_info { u32 key; /* hash key */ u32 amp_caps; /* amp capabilities */ - u16 vol[2]; /* current volume & mute*/ + u16 vol[2]; /* current volume & mute */ u16 status; /* update flag */ u16 next; /* next link */ }; diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index c2f0fe85bf35..d737f17695a3 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -52,10 +52,9 @@ static void print_amp_caps(struct snd_info_buffer *buffer, struct hda_codec *codec, hda_nid_t nid, int dir) { unsigned int caps; - if (dir == HDA_OUTPUT) - caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_OUT_CAP); - else - caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_IN_CAP); + caps = snd_hda_param_read(codec, nid, + dir == HDA_OUTPUT ? + AC_PAR_AMP_OUT_CAP : AC_PAR_AMP_IN_CAP); if (caps == -1 || caps == 0) { snd_iprintf(buffer, "N/A\n"); return; @@ -74,10 +73,7 @@ static void print_amp_vals(struct snd_info_buffer *buffer, unsigned int val; int i; - if (dir == HDA_OUTPUT) - dir = AC_AMP_GET_OUTPUT; - else - dir = AC_AMP_GET_INPUT; + dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT; for (i = 0; i < indices; i++) { snd_iprintf(buffer, " ["); if (stereo) { From 302e9c5af4fb3ea258917ee6a32e9e45f578b231 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 5 Jul 2006 17:39:49 +0200 Subject: [PATCH 0890/1063] [ALSA] HDA codec & CA0106 - add/fix TLV support Signed-off-by: Jaroslav Kysela --- sound/pci/ca0106/ca0106_mixer.c | 4 +++- sound/pci/hda/hda_codec.c | 33 +++++++++++++++++++++++++++++++++ sound/pci/hda/hda_local.h | 5 +++++ sound/pci/hda/patch_analog.c | 17 +++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index 35309b3ed8c0..df75270939ac 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -472,10 +472,12 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol, #define CA_VOLUME(xname,chid,reg) \ { \ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ + SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ .info = snd_ca0106_volume_info, \ .get = snd_ca0106_volume_get, \ .put = snd_ca0106_volume_put, \ - .tlv = snd_ca0106_db_scale, \ + .tlv.p = snd_ca0106_db_scale, \ .private_value = ((chid) << 8) | (reg) \ } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 23201f3eeb12..78ff4575699d 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -29,6 +29,7 @@ #include #include "hda_codec.h" #include +#include #include #include "hda_local.h" @@ -841,6 +842,38 @@ int snd_hda_mixer_amp_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e return change; } +int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, + unsigned int size, unsigned int __user *_tlv) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + hda_nid_t nid = get_amp_nid(kcontrol); + int dir = get_amp_direction(kcontrol); + u32 caps, val1, val2; + + if (size < 4 * sizeof(unsigned int)) + return -ENOMEM; + caps = query_amp_caps(codec, nid, dir); + val2 = (((caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT) + 1) * 25; + val1 = -((caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT); + val1 = ((int)val1) * ((int)val2); + if (caps & AC_AMPCAP_MUTE) + val2 |= 0x10000; + if ((val2 & 0x10000) == 0 && dir == HDA_OUTPUT) { + caps = query_amp_caps(codec, nid, HDA_INPUT); + if (caps & AC_AMPCAP_MUTE) + val2 |= 0x10000; + } + if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv)) + return -EFAULT; + if (put_user(2 * sizeof(unsigned int), _tlv + 1)) + return -EFAULT; + if (put_user(val1, _tlv + 2)) + return -EFAULT; + if (put_user(val2, _tlv + 3)) + return -EFAULT; + return 0; +} + /* switch */ int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 14e8aa2806ed..0f0ae685a9c1 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -30,9 +30,13 @@ /* mono volume with index (index=0,1,...) (channel=1,2) */ #define HDA_CODEC_VOLUME_MONO_IDX(xname, xcidx, nid, channel, xindex, direction) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xcidx, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ + SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK, \ .info = snd_hda_mixer_amp_volume_info, \ .get = snd_hda_mixer_amp_volume_get, \ .put = snd_hda_mixer_amp_volume_put, \ + .tlv.c = snd_hda_mixer_amp_tlv, \ .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, xindex, direction) } /* stereo volume with index */ #define HDA_CODEC_VOLUME_IDX(xname, xcidx, nid, xindex, direction) \ @@ -63,6 +67,7 @@ int snd_hda_mixer_amp_volume_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); int snd_hda_mixer_amp_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_hda_mixer_amp_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); +int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *tlv); int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); int snd_hda_mixer_amp_switch_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_hda_mixer_amp_switch_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 6823f2bc10b3..54506d4e57d5 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -452,6 +452,19 @@ static int ad1986a_pcm_amp_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl return change; } +static int ad1986a_pcm_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, + unsigned int size, unsigned int __user *_tlv) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + struct ad198x_spec *ad = codec->spec; + + mutex_lock(&ad->amp_mutex); + snd_hda_mixer_amp_tlv(kcontrol, op_flag, size, _tlv); + mutex_unlock(&ad->amp_mutex); + return 0; +} + + #define ad1986a_pcm_amp_sw_info snd_hda_mixer_amp_switch_info static int ad1986a_pcm_amp_sw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -488,9 +501,13 @@ static struct snd_kcontrol_new ad1986a_mixers[] = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Playback Volume", + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ | + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK, .info = ad1986a_pcm_amp_vol_info, .get = ad1986a_pcm_amp_vol_get, .put = ad1986a_pcm_amp_vol_put, + .tlv.c = ad1986a_pcm_amp_tlv, .private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT) }, { From 22a27c7f8d0752b38b315d6a192c338d45ea28d5 Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Thu, 6 Jul 2006 18:49:10 +0200 Subject: [PATCH 0891/1063] [ALSA] hda: fix sigmatel 9227/8/9 codec support SigmaTel 9227/8/9 IDs must use the 927x patch. Signed-off-by: Matt Porter Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index ea99083a1024..ac96336f3484 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1525,12 +1525,12 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x83847681, .name = "STAC9220D/9223D A2", .patch = patch_stac922x }, { .id = 0x83847682, .name = "STAC9221 A2", .patch = patch_stac922x }, { .id = 0x83847683, .name = "STAC9221D A2", .patch = patch_stac922x }, - { .id = 0x83847618, .name = "STAC9227", .patch = patch_stac922x }, - { .id = 0x83847619, .name = "STAC9227", .patch = patch_stac922x }, - { .id = 0x83847616, .name = "STAC9228", .patch = patch_stac922x }, - { .id = 0x83847617, .name = "STAC9228", .patch = patch_stac922x }, - { .id = 0x83847614, .name = "STAC9229", .patch = patch_stac922x }, - { .id = 0x83847615, .name = "STAC9229", .patch = patch_stac922x }, + { .id = 0x83847618, .name = "STAC9227", .patch = patch_stac927x }, + { .id = 0x83847619, .name = "STAC9227", .patch = patch_stac927x }, + { .id = 0x83847616, .name = "STAC9228", .patch = patch_stac927x }, + { .id = 0x83847617, .name = "STAC9228", .patch = patch_stac927x }, + { .id = 0x83847614, .name = "STAC9229", .patch = patch_stac927x }, + { .id = 0x83847615, .name = "STAC9229", .patch = patch_stac927x }, { .id = 0x83847620, .name = "STAC9274", .patch = patch_stac927x }, { .id = 0x83847621, .name = "STAC9274D", .patch = patch_stac927x }, { .id = 0x83847622, .name = "STAC9273X", .patch = patch_stac927x }, From 11b3a7555aa1b1629614e919889a4479dfe6f37b Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Sat, 8 Jul 2006 16:39:30 +0100 Subject: [PATCH 0892/1063] [ALSA] snd-emu10k1: Implement 24bit capture via Philips 1361T ADC for SB0240 card. Signed-off-by: James Courtier-Dutton Signed-off-by: Jaroslav Kysela --- sound/pci/emu10k1/emu10k1_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 79f24cdf5fbf..be65d4db8e27 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -927,6 +927,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .ca0151_chip = 1, .spk71 = 1, .spdif_bug = 1, + .adc_1361t = 1, /* 24 bit capture instead of 16bit */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102, .driver = "Audigy2", .name = "Audigy 2 EX [1005]", From 6a65d793b0a82c7e190d9fd92a479401b6a127ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Jul 2006 14:39:34 +0200 Subject: [PATCH 0893/1063] [ALSA] Remove unused tlv_rw field from struct snd_kcontrol Remove unused tlv_rw field from struct snd_kcontrol. The callback is set in tlv.c field, instead. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/control.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/sound/control.h b/include/sound/control.h index e3905c5a0950..1de148b0fd94 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -67,7 +67,6 @@ struct snd_kcontrol { snd_kcontrol_info_t *info; snd_kcontrol_get_t *get; snd_kcontrol_put_t *put; - snd_kcontrol_tlv_rw_t *tlv_rw; union { snd_kcontrol_tlv_rw_t *c; unsigned int *p; From 7bc5ba7e02f63a5732fdf99e7471f54738f6f918 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Jul 2006 15:18:19 +0200 Subject: [PATCH 0894/1063] [ALSA] Add TLV support to snd-usb-audio driver Added TLV-read support to snd-usb-audio driver for passing the volume dB scale information to user-space. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/usb/usbmixer.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c index 491e975a0c87..e516d6adbb22 100644 --- a/sound/usb/usbmixer.c +++ b/sound/usb/usbmixer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "usbaudio.h" @@ -416,6 +417,26 @@ static inline int set_cur_mix_value(struct usb_mixer_elem_info *cval, int channe return set_ctl_value(cval, SET_CUR, (cval->control << 8) | channel, value); } +/* + * TLV callback for mixer volume controls + */ +static int mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, + unsigned int size, unsigned int __user *_tlv) +{ + struct usb_mixer_elem_info *cval = kcontrol->private_data; + DECLARE_TLV_DB_SCALE(scale, 0, 0, 0); + + if (size < sizeof(scale)) + return -ENOMEM; + /* USB descriptions contain the dB scale in 1/256 dB unit + * while ALSA TLV contains in 1/100 dB unit + */ + scale[2] = (convert_signed_value(cval, cval->min) * 100) / 256; + scale[3] = (convert_signed_value(cval, cval->res) * 100) / 256; + if (copy_to_user(_tlv, scale, sizeof(scale))) + return -EFAULT; + return 0; +} /* * parser routines begin here... @@ -933,6 +954,12 @@ static void build_feature_ctl(struct mixer_build *state, unsigned char *desc, } strlcat(kctl->id.name + len, control == USB_FEATURE_MUTE ? " Switch" : " Volume", sizeof(kctl->id.name)); + if (control == USB_FEATURE_VOLUME) { + kctl->tlv.c = mixer_vol_tlv; + kctl->vd[0].access |= + SNDRV_CTL_ELEM_ACCESS_TLV_READ | + SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK; + } break; default: From 17f48ec3f15ddb8080b151304ee887c68f7e4650 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 17 Jul 2006 16:50:56 +0200 Subject: [PATCH 0895/1063] [ALSA] system timer: fix lost ticks correction adjustment Fix the adjustment of the lost ticks correction variable in the case when the correction has been fully taken into account in the next timer expiration value. Subtracting the scheduled ticks value would result in an underflow. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 7e5e562fe356..86357007259d 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -987,7 +987,7 @@ static int snd_timer_s_start(struct snd_timer * timer) njiff++; } else { njiff += timer->sticks - priv->correction; - priv->correction -= timer->sticks; + priv->correction = 0; } priv->last_expires = priv->tlist.expires = njiff; add_timer(&priv->tlist); From 6ed5eff025b72cb84a884d4be05f854f13b1542f Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 17 Jul 2006 16:51:37 +0200 Subject: [PATCH 0896/1063] [ALSA] system timer: accumulate correction for multiple lost ticks When multiple timer interrupts arrive too late, correct for all delays instead of ignoring the earlier ones. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 86357007259d..0f6e6727ff7c 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -971,7 +971,7 @@ static void snd_timer_s_function(unsigned long data) struct snd_timer_system_private *priv = timer->private_data; unsigned long jiff = jiffies; if (time_after(jiff, priv->last_expires)) - priv->correction = (long)jiff - (long)priv->last_expires; + priv->correction += (long)jiff - (long)priv->last_expires; snd_timer_interrupt(timer, (long)jiff - (long)priv->last_jiffies); } From de2696d8bc9c81874b3743e0c27708760cb7fb52 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 17 Jul 2006 16:52:09 +0200 Subject: [PATCH 0897/1063] [ALSA] system timer: clear correction value when timer stops Do not retain the old correction value when the timer was stopped. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index 0f6e6727ff7c..4fcc8549e4a6 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1006,6 +1006,7 @@ static int snd_timer_s_stop(struct snd_timer * timer) timer->sticks = priv->last_expires - jiff; else timer->sticks = 1; + priv->correction = 0; return 0; } From cd93fe4770ca607c7f39260c02941deccbd77b8b Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 17 Jul 2006 16:53:57 +0200 Subject: [PATCH 0898/1063] [ALSA] timer: fix timer rescheduling When checking whether a hardware timer needs to be rescheduled, we have to compare against the previously scheduled interval and not against the actual interval between the last two interrupts. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 4fcc8549e4a6..3a2f8e2ca401 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -718,7 +718,7 @@ void snd_timer_interrupt(struct snd_timer * timer, unsigned long ticks_left) } } if (timer->flags & SNDRV_TIMER_FLG_RESCHED) - snd_timer_reschedule(timer, ticks_left); + snd_timer_reschedule(timer, timer->sticks); if (timer->running) { if (timer->hw.flags & SNDRV_TIMER_HW_STOP) { timer->hw.stop(timer); From 6e9059b05fa733045d7845ac70c5ba0a05e3c2d1 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 21 Jul 2006 10:45:19 +0200 Subject: [PATCH 0899/1063] [ALSA] system timer: remove unused snd_timer_system_private.timer field Remove the snd_timer_system_private structure's timer field that was never used. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/core/timer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 3a2f8e2ca401..10a79aed33f8 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -959,7 +959,6 @@ int snd_timer_global_register(struct snd_timer *timer) struct snd_timer_system_private { struct timer_list tlist; - struct timer * timer; unsigned long last_expires; unsigned long last_jiffies; unsigned long correction; From f1265391ea002a28933dc1a8a55948c0ed64c9d0 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 21 Jul 2006 10:46:18 +0200 Subject: [PATCH 0900/1063] [ALSA] usb-audio: add more Yamaha devices Add some quirks for some unknown Yamaha USB MIDI devices. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/usb/usbquirks.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h index 9351846d7a9d..a7e9563a01df 100644 --- a/sound/usb/usbquirks.h +++ b/sound/usb/usbquirks.h @@ -123,6 +123,10 @@ YAMAHA_DEVICE(0x103e, NULL), YAMAHA_DEVICE(0x103f, NULL), YAMAHA_DEVICE(0x1040, NULL), YAMAHA_DEVICE(0x1041, NULL), +YAMAHA_DEVICE(0x1042, NULL), +YAMAHA_DEVICE(0x1043, NULL), +YAMAHA_DEVICE(0x1044, NULL), +YAMAHA_DEVICE(0x1045, NULL), YAMAHA_DEVICE(0x2000, "DGP-7"), YAMAHA_DEVICE(0x2001, "DGP-5"), YAMAHA_DEVICE(0x2002, NULL), @@ -141,6 +145,7 @@ YAMAHA_DEVICE(0x500b, "DME64N"), YAMAHA_DEVICE(0x500c, "DME24N"), YAMAHA_DEVICE(0x500d, NULL), YAMAHA_DEVICE(0x500e, NULL), +YAMAHA_DEVICE(0x500f, NULL), YAMAHA_DEVICE(0x7000, "DTX"), YAMAHA_DEVICE(0x7010, "UB99"), #undef YAMAHA_DEVICE From fff36e472b4315df77513f4339c5c199c6aad28b Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Sat, 22 Jul 2006 15:02:48 +0100 Subject: [PATCH 0901/1063] [ALSA] snd-ca0106: Fix dB gain TLVs. Signed-off-by: James Courtier-Dutton Signed-off-by: Jaroslav Kysela --- sound/pci/ca0106/ca0106_mixer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index df75270939ac..6d64438cecc9 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -74,7 +74,8 @@ #include "ca0106.h" -static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale, -5150, 75, 1); +static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale1, -5175, 25, 1); +static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale2, -10350, 50, 1); static int snd_ca0106_shared_spdif_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) @@ -477,16 +478,19 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol, .info = snd_ca0106_volume_info, \ .get = snd_ca0106_volume_get, \ .put = snd_ca0106_volume_put, \ - .tlv.p = snd_ca0106_db_scale, \ + .tlv.p = snd_ca0106_db_scale1, \ .private_value = ((chid) << 8) | (reg) \ } #define I2C_VOLUME(xname,chid) \ { \ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ + SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ .info = snd_ca0106_i2c_volume_info, \ .get = snd_ca0106_i2c_volume_get, \ .put = snd_ca0106_i2c_volume_put, \ + .tlv.p = snd_ca0106_db_scale2, \ .private_value = chid \ } From 31508f83f591dc8764427b6321c89f8f9e84bad2 Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Sat, 22 Jul 2006 17:02:10 +0100 Subject: [PATCH 0902/1063] [ALSA] snd-emu10k1: Implement dB gain infomation. Signed-off-by: James Courtier-Dutton Signed-off-by: Jaroslav Kysela --- include/sound/emu10k1.h | 4 ++++ sound/pci/emu10k1/emufx.c | 11 +++++++++++ sound/pci/emu10k1/p16v.c | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 884bbf54cd36..892e310c504d 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1524,6 +1524,10 @@ struct snd_emu10k1_fx8010_control_gpr { unsigned int value[32]; /* initial values */ unsigned int min; /* minimum range */ unsigned int max; /* maximum range */ + union { + snd_kcontrol_tlv_rw_t *c; + unsigned int *p; + } tlv; unsigned int translation; /* translation type (EMU10K1_GPR_TRANSLATION*) */ }; diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index dfba00230d4d..00fc904c251d 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -35,6 +35,7 @@ #include #include +#include #include #if 0 /* for testing purposes - digital out -> capture */ @@ -290,6 +291,9 @@ static const u32 db_table[101] = { 0x7fffffff, }; +/* EMU10k1/EMU10k2 DSP control db gain */ +static DECLARE_TLV_DB_SCALE(snd_emu10k1_db_scale1, -4000, 40, 1); + static const u32 onoff_table[2] = { 0x00000000, 0x00000001 }; @@ -755,6 +759,11 @@ static int snd_emu10k1_add_controls(struct snd_emu10k1 *emu, knew.device = gctl->id.device; knew.subdevice = gctl->id.subdevice; knew.info = snd_emu10k1_gpr_ctl_info; + if (gctl->tlv.p) { + knew.tlv.p = gctl->tlv.p; + knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ; + } knew.get = snd_emu10k1_gpr_ctl_get; knew.put = snd_emu10k1_gpr_ctl_put; memset(nctl, 0, sizeof(*nctl)); @@ -1013,6 +1022,7 @@ snd_emu10k1_init_mono_control(struct snd_emu10k1_fx8010_control_gpr *ctl, ctl->gpr[0] = gpr + 0; ctl->value[0] = defval; ctl->min = 0; ctl->max = 100; + ctl->tlv.p = snd_emu10k1_db_scale1; ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100; } @@ -1027,6 +1037,7 @@ snd_emu10k1_init_stereo_control(struct snd_emu10k1_fx8010_control_gpr *ctl, ctl->gpr[1] = gpr + 1; ctl->value[1] = defval; ctl->min = 0; ctl->max = 100; + ctl->tlv.p = snd_emu10k1_db_scale1; ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100; } diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c index 9905651935fb..1e44714b8623 100644 --- a/sound/pci/emu10k1/p16v.c +++ b/sound/pci/emu10k1/p16v.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include "p16v.h" @@ -784,12 +785,16 @@ static int snd_p16v_capture_channel_put(struct snd_kcontrol *kcontrol, } return change; } +static DECLARE_TLV_DB_SCALE(snd_p16v_db_scale1, -5175, 25, 1); #define P16V_VOL(xname,xreg,xhl) { \ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \ + SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ .info = snd_p16v_volume_info, \ .get = snd_p16v_volume_get, \ .put = snd_p16v_volume_put, \ + .tlv.p = snd_p16v_db_scale1, \ .private_value = ((xreg) | ((xhl) << 8)) \ } From 0a197f005a27766f5c9e0d960e7650748ec1ee4f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 14:51:14 +0200 Subject: [PATCH 0903/1063] [ALSA] Add model entry for Samsung X10 laptop Added the proper model entry (laptop-eapd) for Samsung X10-T2300 Culesa laptop with AD1986A codec. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_analog.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 54506d4e57d5..e547442e6fed 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -820,6 +820,8 @@ static struct hda_board_config ad1986a_cfg_tbl[] = { .config = AD1986A_LAPTOP_EAPD }, /* Samsung X60 Chane */ { .pci_subvendor = 0x144d, .pci_subdevice = 0xc024, .config = AD1986A_LAPTOP_EAPD }, /* Samsung R65-T2300 Charis */ + { .pci_subvendor = 0x144d, .pci_subdevice = 0xc026, + .config = AD1986A_LAPTOP_EAPD }, /* Samsung X10-T2300 Culesa */ { .pci_subvendor = 0x1043, .pci_subdevice = 0x1153, .config = AD1986A_LAPTOP_EAPD }, /* ASUS M9 */ { .pci_subvendor = 0x1043, .pci_subdevice = 0x1213, From 5a053d012d0576e9306009939ca81a86547ef35a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 14:51:15 +0200 Subject: [PATCH 0904/1063] [ALSA] Add model entry for Clevo m665n laptop Added the proper model entry for Clevo m665n laptop with ALC880 codec. Also, added a model string 'clevo' to enable the clevo-type model option. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- Documentation/sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/patch_realtek.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index f61af23dd85d..74ea66d33cf9 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -783,6 +783,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. F1734 2-jack lg LG laptop (m1 express dual) lg-lw LG LW20 laptop + clevo Clevo laptops (m520G, m665n) test for testing/debugging purpose, almost all controls can be adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 18d105263fea..f4c96aa43be7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2156,8 +2156,13 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .modelname = "3stack-digout", .config = ALC880_3ST_DIG }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe308, .config = ALC880_3ST_DIG }, { .pci_subvendor = 0x1025, .pci_subdevice = 0x0070, .config = ALC880_3ST_DIG }, - /* Clevo m520G NB */ - { .pci_subvendor = 0x1558, .pci_subdevice = 0x0520, .config = ALC880_CLEVO }, + + /* Clevo laptops */ + { .modelname = "clevo", .config = ALC880_CLEVO }, + { .pci_subvendor = 0x1558, .pci_subdevice = 0x0520, + .config = ALC880_CLEVO }, /* Clevo m520G NB */ + { .pci_subvendor = 0x1558, .pci_subdevice = 0x0660, + .config = ALC880_CLEVO }, /* Clevo m665n */ /* Back 3 jack plus 1 SPDIF out jack, front 2 jack (Internal add Aux-In)*/ { .pci_subvendor = 0x8086, .pci_subdevice = 0xe305, .config = ALC880_3ST_DIG }, From 6d177ba7839dd7ed391c2f36b121eb09d1eaee4c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 14:51:15 +0200 Subject: [PATCH 0905/1063] [ALSA] Add hp-bpc model type for HP laptops Added 'hp-bpc' model type for HP xw4400-compatible laptops. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- Documentation/sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 74ea66d33cf9..c595acb3bf80 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -798,6 +798,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ALC262 fujitsu Fujitsu Laptop + hp-bpc HP xw4400/6400/8400/9400 laptops basic fixed pin assignment w/o SPDIF auto auto-config reading BIOS (default) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f4c96aa43be7..51f76eef9353 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5774,6 +5774,7 @@ static struct hda_board_config alc262_cfg_tbl[] = { { .modelname = "fujitsu", .config = ALC262_FUJITSU }, { .pci_subvendor = 0x10cf, .pci_subdevice = 0x1397, .config = ALC262_FUJITSU }, + { .modelname = "hp-bpc", .config = ALC262_HP_BPC }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x208c, .config = ALC262_HP_BPC }, /* xw4400 */ { .pci_subvendor = 0x103c, .pci_subdevice = 0x3014, From 304dcaac91f0d26543b31fd7e63726f096c826ee Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 14:51:16 +0200 Subject: [PATCH 0906/1063] [ALSA] Add support of Benq laptop with ALC262 Added the support of Benq laptop with ALC262 codec. A model string 'benq' is added, too. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/patch_realtek.c | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index c595acb3bf80..885d2ed88fd0 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -799,6 +799,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ALC262 fujitsu Fujitsu Laptop hp-bpc HP xw4400/6400/8400/9400 laptops + benq Benq ED8 basic fixed pin assignment w/o SPDIF auto auto-config reading BIOS (default) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 51f76eef9353..42c4f90a92b8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -79,6 +79,7 @@ enum { ALC262_BASIC, ALC262_FUJITSU, ALC262_HP_BPC, + ALC262_BENQ_ED8, ALC262_AUTO, ALC262_MODEL_LAST /* last tag */ }; @@ -5504,6 +5505,13 @@ static struct snd_kcontrol_new alc262_fujitsu_mixer[] = { { } /* end */ }; +/* additional init verbs for Benq laptops */ +static struct hda_verb alc262_EAPD_verbs[] = { + {0x20, AC_VERB_SET_COEF_INDEX, 0x07}, + {0x20, AC_VERB_SET_PROC_COEF, 0x3070}, + {} +}; + /* add playback controls from the parsed DAC table */ static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct auto_pin_cfg *cfg) { @@ -5783,6 +5791,9 @@ static struct hda_board_config alc262_cfg_tbl[] = { .config = ALC262_HP_BPC }, /* xw8400 */ { .pci_subvendor = 0x103c, .pci_subdevice = 0x12fe, .config = ALC262_HP_BPC }, /* xw9400 */ + { .modelname = "benq", .config = ALC262_BENQ_ED8 }, + { .pci_subvendor = 0x17ff, .pci_subdevice = 0x0560, + .config = ALC262_BENQ_ED8 }, { .modelname = "auto", .config = ALC262_AUTO }, {} }; @@ -5820,6 +5831,16 @@ static struct alc_config_preset alc262_presets[] = { .channel_mode = alc262_modes, .input_mux = &alc262_HP_capture_source, }, + [ALC262_BENQ_ED8] = { + .mixers = { alc262_base_mixer }, + .init_verbs = { alc262_init_verbs, alc262_EAPD_verbs }, + .num_dacs = ARRAY_SIZE(alc262_dac_nids), + .dac_nids = alc262_dac_nids, + .hp_nid = 0x03, + .num_channel_mode = ARRAY_SIZE(alc262_modes), + .channel_mode = alc262_modes, + .input_mux = &alc262_capture_source, + }, }; static int patch_alc262(struct hda_codec *codec) From 827a56ea3d9c3d5f80c5520ba9d487f9b7069238 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 14:51:16 +0200 Subject: [PATCH 0907/1063] [ALSA] Added model for ASUS M2NPV-VM mobo Added the proper model (3stack) for ASUS M2NPV-VM mobo with AD1986A codec. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_analog.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index e547442e6fed..8955397cca6f 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -808,6 +808,8 @@ static struct hda_board_config ad1986a_cfg_tbl[] = { .config = AD1986A_3STACK }, /* ASUS A8N-VM CSM */ { .pci_subvendor = 0x1043, .pci_subdevice = 0x81b3, .config = AD1986A_3STACK }, /* ASUS P5RD2-VM / P5GPL-X SE */ + { .pci_subvendor = 0x1043, .pci_subdevice = 0x81cb, + .config = AD1986A_3STACK }, /* ASUS M2NPV-VM */ { .modelname = "laptop", .config = AD1986A_LAPTOP }, { .pci_subvendor = 0x144d, .pci_subdevice = 0xc01e, .config = AD1986A_LAPTOP }, /* FSC V2060 */ From b7c6b03405896bc181e1e2c9c06628c3b1681af5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Jul 2006 15:29:37 +0200 Subject: [PATCH 0908/1063] [ALSA] via82xx - Add dxs_support entry for a FSC machine Added dxs_support=5 entry for a FSC machine. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/via82xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 2c23a665c3e3..e0e3bfd7a2db 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -2404,6 +2404,7 @@ static int __devinit check_dxs_list(struct pci_dev *pci, int revision) { .subvendor = 0x16f3, .subdevice = 0x6405, .action = VIA_DXS_SRC }, /* Jetway K8M8MS */ { .subvendor = 0x1734, .subdevice = 0x1078, .action = VIA_DXS_SRC }, /* FSC Amilo L7300 */ { .subvendor = 0x1734, .subdevice = 0x1093, .action = VIA_DXS_SRC }, /* FSC */ + { .subvendor = 0x1734, .subdevice = 0x10ab, .action = VIA_DXS_SRC }, /* FSC */ { .subvendor = 0x1849, .subdevice = 0x3059, .action = VIA_DXS_NO_VRA }, /* ASRock K7VM2 */ { .subvendor = 0x1849, .subdevice = 0x9739, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */ { .subvendor = 0x1849, .subdevice = 0x9761, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */ From bc6c531eb53de8a0ba355f76ce2bd28f58e46707 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 27 Jul 2006 10:44:30 +0200 Subject: [PATCH 0909/1063] [ALSA] HDA driver - do not set mute flag for dB scale (follow HDA specification) Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 78ff4575699d..399860c36be9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -856,13 +856,6 @@ int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, val2 = (((caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT) + 1) * 25; val1 = -((caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT); val1 = ((int)val1) * ((int)val2); - if (caps & AC_AMPCAP_MUTE) - val2 |= 0x10000; - if ((val2 & 0x10000) == 0 && dir == HDA_OUTPUT) { - caps = query_amp_caps(codec, nid, HDA_INPUT); - if (caps & AC_AMPCAP_MUTE) - val2 |= 0x10000; - } if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv)) return -EFAULT; if (put_user(2 * sizeof(unsigned int), _tlv + 1)) From 9265d199616630c2eb993ffe40c9daef3d6873b3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 27 Jul 2006 15:50:14 +0200 Subject: [PATCH 0910/1063] [ALSA] Fix Makefile of cs5535audio Use ifeq instead of ifdef in Makefile to make the maintenance of out-of-kernel tree easier. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/cs5535audio/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile index 2911a8adc1f2..ad947b4c04cc 100644 --- a/sound/pci/cs5535audio/Makefile +++ b/sound/pci/cs5535audio/Makefile @@ -4,7 +4,7 @@ snd-cs5535audio-objs := cs5535audio.o cs5535audio_pcm.o -ifdef CONFIG_PM +ifeq ($(CONFIG_PM),y) snd-cs5535audio-objs += cs5535audio_pm.o endif From 68ab801e32bbe2caac8b8c6e6e94f41fe7d687ad Mon Sep 17 00:00:00 2001 From: Matthias Koenig Date: Thu, 27 Jul 2006 16:59:23 +0200 Subject: [PATCH 0911/1063] [ALSA] Add snd-mts64 driver for ESI Miditerminal 4140 Added snd-mts64 driver for Ego Systems (ESI) Miditerminal 4140 by Matthias Koenig . The driver requires parport (CONFIG_PARPORT). Signed-off-by: Matthias Koenig Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 8 + sound/drivers/Kconfig | 13 + sound/drivers/Makefile | 2 + sound/drivers/mts64.c | 1091 +++++++++++++++++ 4 files changed, 1114 insertions(+) create mode 100644 sound/drivers/mts64.c diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 885d2ed88fd0..7344815b855e 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1216,6 +1216,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. Module supports only 1 card. This module has no enable option. + Module snd-mts64 + ---------------- + + Module for Ego Systems (ESI) Miditerminal 4140 + + This module supports multiple devices. + Requires parport (CONFIG_PARPORT). + Module snd-nm256 ---------------- diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 897dc2dfd7dd..952c7f170101 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -73,6 +73,19 @@ config SND_MTPAV To compile this driver as a module, choose M here: the module will be called snd-mtpav. +config SND_MTS64 + tristate "ESI Miditerminal 4140 driver" + depends on SND && PARPORT + select SND_RAWMIDI + help + The ESI Miditerminal 4140 is a 4 In 4 Out MIDI Interface with + additional SMPTE Timecode capabilities for the parallel port. + + Say 'Y' to include support for this device. + + To compile this driver as a module, chose 'M' here: the module + will be called snd-mts64. + config SND_SERIAL_U16550 tristate "UART16550 serial MIDI driver" depends on SND diff --git a/sound/drivers/Makefile b/sound/drivers/Makefile index cb98c3d662be..c9bad6d67e73 100644 --- a/sound/drivers/Makefile +++ b/sound/drivers/Makefile @@ -5,6 +5,7 @@ snd-dummy-objs := dummy.o snd-mtpav-objs := mtpav.o +snd-mts64-objs := mts64.o snd-serial-u16550-objs := serial-u16550.o snd-virmidi-objs := virmidi.o @@ -13,5 +14,6 @@ obj-$(CONFIG_SND_DUMMY) += snd-dummy.o obj-$(CONFIG_SND_VIRMIDI) += snd-virmidi.o obj-$(CONFIG_SND_SERIAL_U16550) += snd-serial-u16550.o obj-$(CONFIG_SND_MTPAV) += snd-mtpav.o +obj-$(CONFIG_SND_MTS64) += snd-mts64.o obj-$(CONFIG_SND) += opl3/ opl4/ mpu401/ vx/ diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c new file mode 100644 index 000000000000..169987302ae4 --- /dev/null +++ b/sound/drivers/mts64.c @@ -0,0 +1,1091 @@ +/* + * ALSA Driver for Ego Systems Inc. (ESI) Miditerminal 4140 + * Copyright (c) 2006 by Matthias König + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CARD_NAME "Miditerminal 4140" +#define DRIVER_NAME "MTS64" +#define PLATFORM_DRIVER "snd_mts64" + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; + +static struct platform_device *platform_devices[SNDRV_CARDS]; +static int device_count; + +module_param_array(index, int, NULL, S_IRUGO); +MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard."); +module_param_array(id, charp, NULL, S_IRUGO); +MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard."); +module_param_array(enable, bool, NULL, S_IRUGO); +MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard."); + +MODULE_AUTHOR("Matthias Koenig "); +MODULE_DESCRIPTION("ESI Miditerminal 4140"); +MODULE_LICENSE("GPL"); +MODULE_SUPPORTED_DEVICE("{{ESI,Miditerminal 4140}}"); + +/********************************************************************* + * Chip specific + *********************************************************************/ +#define MTS64_NUM_INPUT_PORTS 5 +#define MTS64_NUM_OUTPUT_PORTS 4 +#define MTS64_SMPTE_SUBSTREAM 4 + +struct mts64 { + spinlock_t lock; + struct snd_card *card; + struct snd_rawmidi *rmidi; + struct pardevice *pardev; + int pardev_claimed; + + int open_count; + int current_midi_output_port; + int current_midi_input_port; + u8 mode[MTS64_NUM_INPUT_PORTS]; + struct snd_rawmidi_substream *midi_input_substream[MTS64_NUM_INPUT_PORTS]; + int smpte_switch; + u8 time[4]; /* [0]=hh, [1]=mm, [2]=ss, [3]=ff */ + u8 fps; +}; + +static int snd_mts64_free(struct mts64 *mts) +{ + kfree(mts); + return 0; +} + +static int __devinit snd_mts64_create(struct snd_card *card, + struct pardevice *pardev, + struct mts64 **rchip) +{ + struct mts64 *mts; + + *rchip = NULL; + + mts = kzalloc(sizeof(struct mts64), GFP_KERNEL); + if (mts == NULL) + return -ENOMEM; + + /* Init chip specific data */ + spin_lock_init(&mts->lock); + mts->card = card; + mts->pardev = pardev; + mts->current_midi_output_port = -1; + mts->current_midi_input_port = -1; + + *rchip = mts; + + return 0; +} + +/********************************************************************* + * HW register related constants + *********************************************************************/ + +/* Status Bits */ +#define MTS64_STAT_BSY 0x80 +#define MTS64_STAT_BIT_SET 0x20 /* readout process, bit is set */ +#define MTS64_STAT_PORT 0x10 /* read byte is a port number */ + +/* Control Bits */ +#define MTS64_CTL_READOUT 0x08 /* enable readout */ +#define MTS64_CTL_WRITE_CMD 0x06 +#define MTS64_CTL_WRITE_DATA 0x02 +#define MTS64_CTL_STROBE 0x01 + +/* Command */ +#define MTS64_CMD_RESET 0xfe +#define MTS64_CMD_PROBE 0x8f /* Used in probing procedure */ +#define MTS64_CMD_SMPTE_SET_TIME 0xe8 +#define MTS64_CMD_SMPTE_SET_FPS 0xee +#define MTS64_CMD_SMPTE_STOP 0xef +#define MTS64_CMD_SMPTE_FPS_24 0xe3 +#define MTS64_CMD_SMPTE_FPS_25 0xe2 +#define MTS64_CMD_SMPTE_FPS_2997 0xe4 +#define MTS64_CMD_SMPTE_FPS_30D 0xe1 +#define MTS64_CMD_SMPTE_FPS_30 0xe0 +#define MTS64_CMD_COM_OPEN 0xf8 /* setting the communication mode */ +#define MTS64_CMD_COM_CLOSE1 0xff /* clearing communication mode */ +#define MTS64_CMD_COM_CLOSE2 0xf5 + +/********************************************************************* + * Hardware specific functions + *********************************************************************/ +static void mts64_enable_readout(struct parport *p); +static void mts64_disable_readout(struct parport *p); +static int mts64_device_ready(struct parport *p); +static int mts64_device_init(struct parport *p); +static int mts64_device_open(struct mts64 *mts); +static int mts64_device_close(struct mts64 *mts); +static u8 mts64_map_midi_input(u8 c); +static int mts64_probe(struct parport *p); +static u16 mts64_read(struct parport *p); +static u8 mts64_read_char(struct parport *p); +static void mts64_smpte_start(struct parport *p, + u8 hours, u8 minutes, + u8 seconds, u8 frames, + u8 idx); +static void mts64_smpte_stop(struct parport *p); +static void mts64_write_command(struct parport *p, u8 c); +static void mts64_write_data(struct parport *p, u8 c); +static void mts64_write_midi(struct mts64 *mts, u8 c, int midiport); + + +/* Enables the readout procedure + * + * Before we can read a midi byte from the device, we have to set + * bit 3 of control port. + */ +static void mts64_enable_readout(struct parport *p) +{ + u8 c; + + c = parport_read_control(p); + c |= MTS64_CTL_READOUT; + parport_write_control(p, c); +} + +/* Disables readout + * + * Readout is disabled by clearing bit 3 of control + */ +static void mts64_disable_readout(struct parport *p) +{ + u8 c; + + c = parport_read_control(p); + c &= ~MTS64_CTL_READOUT; + parport_write_control(p, c); +} + +/* waits for device ready + * + * Checks if BUSY (Bit 7 of status) is clear + * 1 device ready + * 0 failure + */ +static int mts64_device_ready(struct parport *p) +{ + int i; + u8 c; + + for (i = 0; i < 0xffff; ++i) { + c = parport_read_status(p); + c &= MTS64_STAT_BSY; + if (c != 0) + return 1; + } + + return 0; +} + +/* Init device (LED blinking startup magic) + * + * Returns: + * 0 init ok + * -EIO failure + */ +static int __devinit mts64_device_init(struct parport *p) +{ + int i; + + mts64_write_command(p, MTS64_CMD_RESET); + + for (i = 0; i < 64; ++i) { + msleep(100); + + if (mts64_probe(p) == 0) { + /* success */ + mts64_disable_readout(p); + return 0; + } + } + mts64_disable_readout(p); + + return -EIO; +} + +/* + * Opens the device (set communication mode) + */ +static int mts64_device_open(struct mts64 *mts) +{ + int i; + struct parport *p = mts->pardev->port; + + for (i = 0; i < 5; ++i) + mts64_write_command(p, MTS64_CMD_COM_OPEN); + + return 0; +} + +/* + * Close device (clear communication mode) + */ +static int mts64_device_close(struct mts64 *mts) +{ + int i; + struct parport *p = mts->pardev->port; + + for (i = 0; i < 5; ++i) { + mts64_write_command(p, MTS64_CMD_COM_CLOSE1); + mts64_write_command(p, MTS64_CMD_COM_CLOSE2); + } + + return 0; +} + +/* map hardware port to substream number + * + * When reading a byte from the device, the device tells us + * on what port the byte is. This HW port has to be mapped to + * the midiport (substream number). + * substream 0-3 are Midiports 1-4 + * substream 4 is SMPTE Timecode + * The mapping is done by the table: + * HW | 0 | 1 | 2 | 3 | 4 + * SW | 0 | 1 | 4 | 2 | 3 + */ +static u8 mts64_map_midi_input(u8 c) +{ + static u8 map[] = { 0, 1, 4, 2, 3 }; + + return map[c]; +} + + +/* Probe parport for device + * + * Do we have a Miditerminal 4140 on parport? + * Returns: + * 0 device found + * -ENODEV no device + */ +static int __devinit mts64_probe(struct parport *p) +{ + u8 c; + + mts64_smpte_stop(p); + mts64_write_command(p, MTS64_CMD_PROBE); + + msleep(50); + + c = mts64_read(p); + + c &= 0x00ff; + if (c != MTS64_CMD_PROBE) + return -ENODEV; + else + return 0; + +} + +/* Read byte incl. status from device + * + * Returns: + * data in lower 8 bits and status in upper 8 bits + */ +static u16 mts64_read(struct parport *p) +{ + u8 data, status; + + mts64_device_ready(p); + mts64_enable_readout(p); + status = parport_read_status(p); + data = mts64_read_char(p); + mts64_disable_readout(p); + + return (status << 8) | data; +} + +/* Read a byte from device + * + * Note, that readout mode has to be enabled. + * readout procedure is as follows: + * - Write number of the Bit to read to DATA + * - Read STATUS + * - Bit 5 of STATUS indicates if Bit is set + * + * Returns: + * Byte read from device + */ +static u8 mts64_read_char(struct parport *p) +{ + u8 c = 0; + u8 status; + u8 i; + + for (i = 0; i < 8; ++i) { + parport_write_data(p, i); + c >>= 1; + status = parport_read_status(p); + if (status & MTS64_STAT_BIT_SET) + c |= 0x80; + } + + return c; +} + +/* Starts SMPTE Timecode generation + * + * The device creates SMPTE Timecode by hardware. + * 0 24 fps + * 1 25 fps + * 2 29.97 fps + * 3 30 fps (Drop-frame) + * 4 30 fps + */ +static void mts64_smpte_start(struct parport *p, + u8 hours, u8 minutes, + u8 seconds, u8 frames, + u8 idx) +{ + static u8 fps[5] = { MTS64_CMD_SMPTE_FPS_24, + MTS64_CMD_SMPTE_FPS_25, + MTS64_CMD_SMPTE_FPS_2997, + MTS64_CMD_SMPTE_FPS_30D, + MTS64_CMD_SMPTE_FPS_30 }; + + mts64_write_command(p, MTS64_CMD_SMPTE_SET_TIME); + mts64_write_command(p, frames); + mts64_write_command(p, seconds); + mts64_write_command(p, minutes); + mts64_write_command(p, hours); + + mts64_write_command(p, MTS64_CMD_SMPTE_SET_FPS); + mts64_write_command(p, fps[idx]); +} + +/* Stops SMPTE Timecode generation + */ +static void mts64_smpte_stop(struct parport *p) +{ + mts64_write_command(p, MTS64_CMD_SMPTE_STOP); +} + +/* Write a command byte to device + */ +static void mts64_write_command(struct parport *p, u8 c) +{ + mts64_device_ready(p); + + parport_write_data(p, c); + + parport_write_control(p, MTS64_CTL_WRITE_CMD); + parport_write_control(p, MTS64_CTL_WRITE_CMD | MTS64_CTL_STROBE); + parport_write_control(p, MTS64_CTL_WRITE_CMD); +} + +/* Write a data byte to device + */ +static void mts64_write_data(struct parport *p, u8 c) +{ + mts64_device_ready(p); + + parport_write_data(p, c); + + parport_write_control(p, MTS64_CTL_WRITE_DATA); + parport_write_control(p, MTS64_CTL_WRITE_DATA | MTS64_CTL_STROBE); + parport_write_control(p, MTS64_CTL_WRITE_DATA); +} + +/* Write a MIDI byte to midiport + * + * midiport ranges from 0-3 and maps to Ports 1-4 + * assumptions: communication mode is on + */ +static void mts64_write_midi(struct mts64 *mts, u8 c, + int midiport) +{ + struct parport *p = mts->pardev->port; + + /* check current midiport */ + if (mts->current_midi_output_port != midiport) + mts64_write_command(p, midiport); + + /* write midi byte */ + mts64_write_data(p, c); +} + +/********************************************************************* + * Control elements + *********************************************************************/ + +/* SMPTE Switch */ +static int snd_mts64_ctl_smpte_switch_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int snd_mts64_ctl_smpte_switch_get(struct snd_kcontrol* kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + + spin_lock_irq(&mts->lock); + uctl->value.integer.value[0] = mts->smpte_switch; + spin_unlock_irq(&mts->lock); + + return 0; +} + +/* smpte_switch is not accessed from IRQ handler, so we just need + to protect the HW access */ +static int snd_mts64_ctl_smpte_switch_put(struct snd_kcontrol* kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + int changed = 0; + + spin_lock_irq(&mts->lock); + if (mts->smpte_switch == uctl->value.integer.value[0]) + goto __out; + + changed = 1; + mts->smpte_switch = uctl->value.integer.value[0]; + if (mts->smpte_switch) { + mts64_smpte_start(mts->pardev->port, + mts->time[0], mts->time[1], + mts->time[2], mts->time[3], + mts->fps); + } else { + mts64_smpte_stop(mts->pardev->port); + } +__out: + spin_unlock_irq(&mts->lock); + return changed; +} + +static struct snd_kcontrol_new mts64_ctl_smpte_switch __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Playback Switch", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 0, + .info = snd_mts64_ctl_smpte_switch_info, + .get = snd_mts64_ctl_smpte_switch_get, + .put = snd_mts64_ctl_smpte_switch_put +}; + +/* Time */ +static int snd_mts64_ctl_smpte_time_h_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 23; + return 0; +} + +static int snd_mts64_ctl_smpte_time_f_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 99; + return 0; +} + +static int snd_mts64_ctl_smpte_time_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 59; + return 0; +} + +static int snd_mts64_ctl_smpte_time_get(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + int idx = kctl->private_value; + + spin_lock_irq(&mts->lock); + uctl->value.integer.value[0] = mts->time[idx]; + spin_unlock_irq(&mts->lock); + + return 0; +} + +static int snd_mts64_ctl_smpte_time_put(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + int idx = kctl->private_value; + int changed = 0; + + spin_lock_irq(&mts->lock); + if (mts->time[idx] != uctl->value.integer.value[0]) { + changed = 1; + mts->time[idx] = uctl->value.integer.value[0]; + } + spin_unlock_irq(&mts->lock); + + return changed; +} + +static struct snd_kcontrol_new mts64_ctl_smpte_time_hours __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Time Hours", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 0, + .info = snd_mts64_ctl_smpte_time_h_info, + .get = snd_mts64_ctl_smpte_time_get, + .put = snd_mts64_ctl_smpte_time_put +}; + +static struct snd_kcontrol_new mts64_ctl_smpte_time_minutes __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Time Minutes", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 1, + .info = snd_mts64_ctl_smpte_time_info, + .get = snd_mts64_ctl_smpte_time_get, + .put = snd_mts64_ctl_smpte_time_put +}; + +static struct snd_kcontrol_new mts64_ctl_smpte_time_seconds __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Time Seconds", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 2, + .info = snd_mts64_ctl_smpte_time_info, + .get = snd_mts64_ctl_smpte_time_get, + .put = snd_mts64_ctl_smpte_time_put +}; + +static struct snd_kcontrol_new mts64_ctl_smpte_time_frames __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Time Frames", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 3, + .info = snd_mts64_ctl_smpte_time_f_info, + .get = snd_mts64_ctl_smpte_time_get, + .put = snd_mts64_ctl_smpte_time_put +}; + +/* FPS */ +static int snd_mts64_ctl_smpte_fps_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *uinfo) +{ + static char *texts[5] = { "24", + "25", + "29.97", + "30D", + "30" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 5; + if (uinfo->value.enumerated.item > 4) + uinfo->value.enumerated.item = 4; + strcpy(uinfo->value.enumerated.name, + texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int snd_mts64_ctl_smpte_fps_get(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + + spin_lock_irq(&mts->lock); + uctl->value.enumerated.item[0] = mts->fps; + spin_unlock_irq(&mts->lock); + + return 0; +} + +static int snd_mts64_ctl_smpte_fps_put(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *uctl) +{ + struct mts64 *mts = snd_kcontrol_chip(kctl); + int changed = 0; + + spin_lock_irq(&mts->lock); + if (mts->fps != uctl->value.enumerated.item[0]) { + changed = 1; + mts->fps = uctl->value.enumerated.item[0]; + } + spin_unlock_irq(&mts->lock); + + return changed; +} + +static struct snd_kcontrol_new mts64_ctl_smpte_fps __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI, + .name = "SMPTE Fps", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 0, + .info = snd_mts64_ctl_smpte_fps_info, + .get = snd_mts64_ctl_smpte_fps_get, + .put = snd_mts64_ctl_smpte_fps_put +}; + + +static int __devinit snd_mts64_ctl_create(struct snd_card *card, + struct mts64 *mts) +{ + int err, i; + static struct snd_kcontrol_new *control[] = { + &mts64_ctl_smpte_switch, + &mts64_ctl_smpte_time_hours, + &mts64_ctl_smpte_time_minutes, + &mts64_ctl_smpte_time_seconds, + &mts64_ctl_smpte_time_frames, + &mts64_ctl_smpte_fps, + 0 }; + + for (i = 0; control[i]; ++i) { + err = snd_ctl_add(card, snd_ctl_new1(control[i], mts)); + if (err < 0) { + snd_printd("Cannot create control: %s\n", + control[i]->name); + return err; + } + } + + return 0; +} + +/********************************************************************* + * Rawmidi + *********************************************************************/ +#define MTS64_MODE_INPUT_TRIGGERED 0x01 + +static int snd_mts64_rawmidi_open(struct snd_rawmidi_substream *substream) +{ + struct mts64 *mts = substream->rmidi->private_data; + + if (mts->open_count == 0) { + /* We don't need a spinlock here, because this is just called + if the device has not been opened before. + So there aren't any IRQs from the device */ + mts64_device_open(mts); + + msleep(50); + } + ++(mts->open_count); + + return 0; +} + +static int snd_mts64_rawmidi_close(struct snd_rawmidi_substream *substream) +{ + struct mts64 *mts = substream->rmidi->private_data; + unsigned long flags; + + --(mts->open_count); + if (mts->open_count == 0) { + /* We need the spinlock_irqsave here because we can still + have IRQs at this point */ + spin_lock_irqsave(&mts->lock, flags); + mts64_device_close(mts); + spin_unlock_irqrestore(&mts->lock, flags); + + msleep(500); + + } else if (mts->open_count < 0) + mts->open_count = 0; + + return 0; +} + +static void snd_mts64_rawmidi_output_trigger(struct snd_rawmidi_substream *substream, + int up) +{ + struct mts64 *mts = substream->rmidi->private_data; + u8 data; + unsigned long flags; + + spin_lock_irqsave(&mts->lock, flags); + while (snd_rawmidi_transmit_peek(substream, &data, 1) == 1) { + mts64_write_midi(mts, data, substream->number+1); + snd_rawmidi_transmit_ack(substream, 1); + } + spin_unlock_irqrestore(&mts->lock, flags); +} + +static void snd_mts64_rawmidi_input_trigger(struct snd_rawmidi_substream *substream, + int up) +{ + struct mts64 *mts = substream->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&mts->lock, flags); + if (up) + mts->mode[substream->number] |= MTS64_MODE_INPUT_TRIGGERED; + else + mts->mode[substream->number] &= ~MTS64_MODE_INPUT_TRIGGERED; + + spin_unlock_irqrestore(&mts->lock, flags); +} + +static struct snd_rawmidi_ops snd_mts64_rawmidi_output_ops = { + .open = snd_mts64_rawmidi_open, + .close = snd_mts64_rawmidi_close, + .trigger = snd_mts64_rawmidi_output_trigger +}; + +static struct snd_rawmidi_ops snd_mts64_rawmidi_input_ops = { + .open = snd_mts64_rawmidi_open, + .close = snd_mts64_rawmidi_close, + .trigger = snd_mts64_rawmidi_input_trigger +}; + +/* Create and initialize the rawmidi component */ +static int __devinit snd_mts64_rawmidi_create(struct snd_card *card) +{ + struct mts64 *mts = card->private_data; + struct snd_rawmidi *rmidi; + struct snd_rawmidi_substream *substream; + struct list_head *list; + int err; + + err = snd_rawmidi_new(card, CARD_NAME, 0, + MTS64_NUM_OUTPUT_PORTS, + MTS64_NUM_INPUT_PORTS, + &rmidi); + if (err < 0) + return err; + + rmidi->private_data = mts; + strcpy(rmidi->name, CARD_NAME); + rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT | + SNDRV_RAWMIDI_INFO_INPUT | + SNDRV_RAWMIDI_INFO_DUPLEX; + + mts->rmidi = rmidi; + + /* register rawmidi ops */ + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, + &snd_mts64_rawmidi_output_ops); + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, + &snd_mts64_rawmidi_input_ops); + + /* name substreams */ + /* output */ + list_for_each(list, + &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams) { + substream = list_entry(list, struct snd_rawmidi_substream, list); + sprintf(substream->name, + "Miditerminal %d", substream->number+1); + } + /* input */ + list_for_each(list, + &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams) { + substream = list_entry(list, struct snd_rawmidi_substream, list); + mts->midi_input_substream[substream->number] = substream; + switch(substream->number) { + case MTS64_SMPTE_SUBSTREAM: + strcpy(substream->name, "Miditerminal SMPTE"); + break; + default: + sprintf(substream->name, + "Miditerminal %d", substream->number+1); + } + } + + /* controls */ + err = snd_mts64_ctl_create(card, mts); + + return err; +} + +/********************************************************************* + * parport stuff + *********************************************************************/ +static void snd_mts64_interrupt(int irq, void *private, struct pt_regs *r) +{ + struct mts64 *mts = ((struct snd_card*)private)->private_data; + u16 ret; + u8 status, data; + struct snd_rawmidi_substream *substream; + + spin_lock(&mts->lock); + ret = mts64_read(mts->pardev->port); + data = ret & 0x00ff; + status = ret >> 8; + + if (status & MTS64_STAT_PORT) { + mts->current_midi_input_port = mts64_map_midi_input(data); + } else { + if (mts->current_midi_input_port == -1) + goto __out; + substream = mts->midi_input_substream[mts->current_midi_input_port]; + if (mts->mode[substream->number] & MTS64_MODE_INPUT_TRIGGERED) + snd_rawmidi_receive(substream, &data, 1); + } +__out: + spin_unlock(&mts->lock); +} + +static int __devinit snd_mts64_probe_port(struct parport *p) +{ + struct pardevice *pardev; + int res; + + pardev = parport_register_device(p, DRIVER_NAME, + NULL, NULL, NULL, + 0, NULL); + if (!pardev) + return -EIO; + + if (parport_claim(pardev)) { + parport_unregister_device(pardev); + return -EIO; + } + + res = mts64_probe(p); + + parport_release(pardev); + parport_unregister_device(pardev); + + return res; +} + +static void __devinit snd_mts64_attach(struct parport *p) +{ + struct platform_device *device; + + device = platform_device_alloc(PLATFORM_DRIVER, device_count); + if (!device) + return; + + /* Temporary assignment to forward the parport */ + platform_set_drvdata(device, p); + + if (platform_device_register(device) < 0) { + platform_device_put(device); + return; + } + + /* Since we dont get the return value of probe + * We need to check if device probing succeeded or not */ + if (!platform_get_drvdata(device)) { + platform_device_unregister(device); + return; + } + + /* register device in global table */ + platform_devices[device_count] = device; + device_count++; +} + +static void snd_mts64_detach(struct parport *p) +{ + /* nothing to do here */ +} + +static struct parport_driver mts64_parport_driver = { + .name = "mts64", + .attach = snd_mts64_attach, + .detach = snd_mts64_detach +}; + +/********************************************************************* + * platform stuff + *********************************************************************/ +static void snd_mts64_card_private_free(struct snd_card *card) +{ + struct mts64 *mts = card->private_data; + struct pardevice *pardev = mts->pardev; + + if (pardev) { + if (mts->pardev_claimed) + parport_release(pardev); + parport_unregister_device(pardev); + } + + snd_mts64_free(mts); +} + +static int __devinit snd_mts64_probe(struct platform_device *pdev) +{ + struct pardevice *pardev; + struct parport *p; + int dev = pdev->id; + struct snd_card *card = NULL; + struct mts64 *mts = NULL; + int err; + + p = platform_get_drvdata(pdev); + platform_set_drvdata(pdev, NULL); + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) + return -ENOENT; + if ((err = snd_mts64_probe_port(p)) < 0) + return err; + + card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); + if (card == NULL) { + snd_printd("Cannot create card\n"); + return -ENOMEM; + } + strcpy(card->driver, DRIVER_NAME); + strcpy(card->shortname, "ESI " CARD_NAME); + sprintf(card->longname, "%s at 0x%lx, irq %i", + card->shortname, p->base, p->irq); + + pardev = parport_register_device(p, /* port */ + DRIVER_NAME, /* name */ + NULL, /* preempt */ + NULL, /* wakeup */ + snd_mts64_interrupt, /* ISR */ + PARPORT_DEV_EXCL, /* flags */ + (void *)card); /* private */ + if (pardev == NULL) { + snd_printd("Cannot register pardevice\n"); + err = -EIO; + goto __err; + } + + if ((err = snd_mts64_create(card, pardev, &mts)) < 0) { + snd_printd("Cannot create main component\n"); + parport_unregister_device(pardev); + goto __err; + } + card->private_data = mts; + card->private_free = snd_mts64_card_private_free; + + if ((err = snd_mts64_rawmidi_create(card)) < 0) { + snd_printd("Creating Rawmidi component failed\n"); + goto __err; + } + + /* claim parport */ + if (parport_claim(pardev)) { + snd_printd("Cannot claim parport 0x%lx\n", pardev->port->base); + err = -EIO; + goto __err; + } + mts->pardev_claimed = 1; + + /* init device */ + if ((err = mts64_device_init(p)) < 0) + goto __err; + + platform_set_drvdata(pdev, card); + + /* At this point card will be usable */ + if ((err = snd_card_register(card)) < 0) { + snd_printd("Cannot register card\n"); + goto __err; + } + + snd_printk("ESI Miditerminal 4140 on 0x%lx\n", p->base); + return 0; + +__err: + snd_card_free(card); + return err; +} + +static int snd_mts64_remove(struct platform_device *pdev) +{ + struct snd_card *card = platform_get_drvdata(pdev); + + if (card) + snd_card_free(card); + + return 0; +} + + +static struct platform_driver snd_mts64_driver = { + .probe = snd_mts64_probe, + .remove = snd_mts64_remove, + .driver = { + .name = PLATFORM_DRIVER + } +}; + +/********************************************************************* + * module init stuff + *********************************************************************/ +static void snd_mts64_unregister_all(void) +{ + int i; + + for (i = 0; i < SNDRV_CARDS; ++i) { + if (platform_devices[i]) { + platform_device_unregister(platform_devices[i]); + platform_devices[i] = NULL; + } + } + platform_driver_unregister(&snd_mts64_driver); + parport_unregister_driver(&mts64_parport_driver); +} + +static int __init snd_mts64_module_init(void) +{ + int err; + + if ((err = platform_driver_register(&snd_mts64_driver)) < 0) + return err; + + if (parport_register_driver(&mts64_parport_driver) != 0) { + platform_driver_unregister(&snd_mts64_driver); + return -EIO; + } + + if (device_count == 0) { + snd_mts64_unregister_all(); + return -ENODEV; + } + + return 0; +} + +static void __exit snd_mts64_module_exit(void) +{ + snd_mts64_unregister_all(); +} + +module_init(snd_mts64_module_init); +module_exit(snd_mts64_module_exit); From 4b146cb087b4a668511f6c991da1dc40e2e04b0d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Jul 2006 14:42:36 +0200 Subject: [PATCH 0912/1063] [ALSA] Misc fixes for Realtek HD-audio codecs - Added model=arima for Arima W820Di1 with ALC882 codec chip - Added EAPD-control verbs to TCL S700 init verbs - Added missing model strings for Realtek codecs (to be specified via module option explicitly for testing/debugging) Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 13 ++++- sound/pci/hda/patch_realtek.c | 48 ++++++++++++++++--- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 7344815b855e..d0dbc3fb20c2 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -778,11 +778,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. 6stack-digout 6-jack with a SPDIF out w810 3-jack z71v 3-jack (HP shared SPDIF) - asus 3-jack + asus 3-jack (ASUS Mobo) + asus-w1v ASUS W1V + asus-dig ASUS with SPDIF out + asus-dig2 ASUS with SPDIF out (using GPIO2) uniwill 3-jack F1734 2-jack lg LG laptop (m1 express dual) lg-lw LG LW20 laptop + tcl TCL S700 clevo Clevo laptops (m520G, m665n) test for testing/debugging purpose, almost all controls can be adjusted. Appearing only when compiled with @@ -791,6 +795,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ALC260 hp HP machines + hp-3013 HP machines (3013-variant) fujitsu Fujitsu S7020 acer Acer TravelMate basic fixed pin assignment (old default model) @@ -806,18 +811,22 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ALC882/885 3stack-dig 3-jack with SPDIF I/O 6stck-dig 6-jack digital with SPDIF I/O + arima Arima W820Di1 auto auto-config reading BIOS (default) ALC883/888 3stack-dig 3-jack with SPDIF I/O 6stack-dig 6-jack digital with SPDIF I/O - 6stack-dig-demo 6-stack digital for Intel demo board + 3stack-6ch 3-jack 6-channel + 3stack-6ch-dig 3-jack 6-channel with SPDIF I/O + 6stack-dig-demo 6-jack digital for Intel demo board auto auto-config reading BIOS (default) ALC861/660 3stack 3-jack 3stack-dig 3-jack with SPDIF I/O 6stack-dig 6-jack with SPDIF I/O + 3stack-660 3-jack (for ALC660) auto auto-config reading BIOS (default) CMI9880 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 42c4f90a92b8..378e5f111e34 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -98,6 +98,7 @@ enum { enum { ALC882_3ST_DIG, ALC882_6ST_DIG, + ALC882_ARIMA, ALC882_AUTO, ALC882_MODEL_LAST, }; @@ -1349,6 +1350,10 @@ static struct hda_verb alc880_pin_clevo_init_verbs[] = { }; static struct hda_verb alc880_pin_tcl_S700_init_verbs[] = { + /* change to EAPD mode */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x07}, + {0x20, AC_VERB_SET_PROC_COEF, 0x3060}, + /* Headphone output */ {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, /* Front output*/ @@ -2146,6 +2151,7 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .pci_subvendor = 0x107b, .pci_subdevice = 0x4040, .config = ALC880_3ST }, { .pci_subvendor = 0x107b, .pci_subdevice = 0x4041, .config = ALC880_3ST }, /* TCL S700 */ + { .modelname = "tcl", .config = ALC880_TCL_S700 }, { .pci_subvendor = 0x19db, .pci_subdevice = 0x4188, .config = ALC880_TCL_S700 }, /* Back 3 jack, front 2 jack (Internal add Aux-In) */ @@ -2232,8 +2238,11 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1143, .config = ALC880_ASUS }, + { .modelname = "asus-w1v", .config = ALC880_ASUS_W1V }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x10b3, .config = ALC880_ASUS_W1V }, + { .modelname = "asus-dig", .config = ALC880_ASUS_DIG }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x8181, .config = ALC880_ASUS_DIG }, /* ASUS P4GPL-X */ + { .modelname = "asus-dig2", .config = ALC880_ASUS_DIG2 }, { .pci_subvendor = 0x1558, .pci_subdevice = 0x5401, .config = ALC880_ASUS_DIG2 }, { .modelname = "uniwill", .config = ALC880_UNIWILL_DIG }, @@ -3906,6 +3915,7 @@ static struct hda_board_config alc260_cfg_tbl[] = { { .pci_subvendor = 0x152d, .pci_subdevice = 0x0729, .config = ALC260_BASIC }, /* CTL Travel Master U553W */ { .modelname = "hp", .config = ALC260_HP }, + { .modelname = "hp-3013", .config = ALC260_HP_3013 }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 }, @@ -4272,6 +4282,13 @@ static struct hda_verb alc882_init_verbs[] = { { } }; +static struct hda_verb alc882_eapd_verbs[] = { + /* change to EAPD mode */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x07}, + {0x20, AC_VERB_SET_PROC_COEF, 0x3060}, + { } +}; + /* * generic initialization of ADC, input mixers and output mixers */ @@ -4403,6 +4420,9 @@ static struct hda_board_config alc882_cfg_tbl[] = { .config = ALC882_6ST_DIG }, /* Foxconn */ { .pci_subvendor = 0x1019, .pci_subdevice = 0x6668, .config = ALC882_6ST_DIG }, /* ECS to Intel*/ + { .modelname = "arima", .config = ALC882_ARIMA }, + { .pci_subvendor = 0x161f, .pci_subdevice = 0x2054, + .config = ALC882_ARIMA }, /* Arima W820Di1 */ { .modelname = "auto", .config = ALC882_AUTO }, {} }; @@ -4430,6 +4450,15 @@ static struct alc_config_preset alc882_presets[] = { .channel_mode = alc882_sixstack_modes, .input_mux = &alc882_capture_source, }, + [ALC882_ARIMA] = { + .mixers = { alc882_base_mixer, alc882_chmode_mixer }, + .init_verbs = { alc882_init_verbs, alc882_eapd_verbs }, + .num_dacs = ARRAY_SIZE(alc882_dac_nids), + .dac_nids = alc882_dac_nids, + .num_channel_mode = ARRAY_SIZE(alc882_sixstack_modes), + .channel_mode = alc882_sixstack_modes, + .input_mux = &alc882_capture_source, + }, }; @@ -5005,16 +5034,18 @@ static struct snd_kcontrol_new alc883_capture_mixer[] = { */ static struct hda_board_config alc883_cfg_tbl[] = { { .modelname = "3stack-dig", .config = ALC883_3ST_2ch_DIG }, + { .modelname = "3stack-6ch-dig", .config = ALC883_3ST_6ch_DIG }, + { .pci_subvendor = 0x1019, .pci_subdevice = 0x6668, + .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/ + { .modelname = "3stack-6ch", .config = ALC883_3ST_6ch }, + { .pci_subvendor = 0x108e, .pci_subdevice = 0x534d, + .config = ALC883_3ST_6ch }, { .modelname = "6stack-dig", .config = ALC883_6ST_DIG }, - { .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD }, { .pci_subvendor = 0x1462, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* MSI */ { .pci_subvendor = 0x105b, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* Foxconn */ - { .pci_subvendor = 0x1019, .pci_subdevice = 0x6668, - .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/ - { .pci_subvendor = 0x108e, .pci_subdevice = 0x534d, - .config = ALC883_3ST_6ch }, + { .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD }, { .modelname = "auto", .config = ALC883_AUTO }, {} }; @@ -5223,8 +5254,10 @@ static int patch_alc883(struct hda_codec *codec) spec->stream_digital_playback = &alc883_pcm_digital_playback; spec->stream_digital_capture = &alc883_pcm_digital_capture; - spec->adc_nids = alc883_adc_nids; - spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids); + if (! spec->adc_nids && spec->input_mux) { + spec->adc_nids = alc883_adc_nids; + spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids); + } codec->patch_ops = alc_patch_ops; if (board_config == ALC883_AUTO) @@ -6504,6 +6537,7 @@ static struct hda_board_config alc861_cfg_tbl[] = { { .modelname = "3stack", .config = ALC861_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xd600, .config = ALC861_3ST }, + { .modelname = "3stack-660", .config = ALC660_3ST }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x81e7, .config = ALC660_3ST }, { .modelname = "3stack-dig", .config = ALC861_3ST_DIG }, From 35aec4e2affb99d52b4b744ddb09767eb6e05580 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Jul 2006 14:44:31 +0200 Subject: [PATCH 0913/1063] [ALSA] Don't set up the same PID twice in snd_hda_multi_out_analog_prepare Check the hp_nid whether it's identical with front pin to avoid the setup of the same widget node twice. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 399860c36be9..ff29d0f16903 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1942,7 +1942,7 @@ int snd_hda_multi_out_analog_prepare(struct hda_codec *codec, struct hda_multi_o /* front */ snd_hda_codec_setup_stream(codec, nids[HDA_FRONT], stream_tag, 0, format); - if (mout->hp_nid) + if (mout->hp_nid && mout->hp_nid != nids[HDA_FRONT]) /* headphone out will just decode front left/right (stereo) */ snd_hda_codec_setup_stream(codec, mout->hp_nid, stream_tag, 0, format); /* extra outputs copied from front */ From 4e195a7b78618c89b06547f3140e67a69ec23272 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 28 Jul 2006 14:47:34 +0200 Subject: [PATCH 0914/1063] [ALSA] Fix noisy output with shared channel mode with hd-audio - Fix the wrong initialization of num_dacs when changing the channel mode between 2 and multi-channel modes. It must be evaluated after calling snd_hda_ch_mode_put() - Added the similar check of num_dacs fix in Realtek code. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_analog.c | 8 +++++--- sound/pci/hda/patch_realtek.c | 27 ++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 8955397cca6f..077f1ce01ee1 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1647,10 +1647,12 @@ static int ad198x_ch_mode_put(struct snd_kcontrol *kcontrol, { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct ad198x_spec *spec = codec->spec; - if (spec->need_dac_fix) + int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode, + spec->num_channel_mode, + &spec->multiout.max_channels); + if (! err && spec->need_dac_fix) spec->multiout.num_dacs = spec->multiout.max_channels / 2; - return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode, - spec->num_channel_mode, &spec->multiout.max_channels); + return err; } /* 6-stack mode */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 378e5f111e34..991f1079116b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -155,6 +155,7 @@ struct alc_spec { /* channel model */ const struct hda_channel_mode *channel_mode; int num_channel_mode; + int need_dac_fix; /* PCM information */ struct hda_pcm pcm_rec[3]; /* used in alc_build_pcms() */ @@ -192,6 +193,7 @@ struct alc_config_preset { hda_nid_t dig_in_nid; unsigned int num_channel_mode; const struct hda_channel_mode *channel_mode; + int need_dac_fix; unsigned int num_mux_defs; const struct hda_input_mux *input_mux; void (*unsol_event)(struct hda_codec *, unsigned int); @@ -264,9 +266,12 @@ static int alc_ch_mode_put(struct snd_kcontrol *kcontrol, { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); struct alc_spec *spec = codec->spec; - return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode, - spec->num_channel_mode, - &spec->multiout.max_channels); + int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode, + spec->num_channel_mode, + &spec->multiout.max_channels); + if (! err && spec->need_dac_fix) + spec->multiout.num_dacs = spec->multiout.max_channels / 2; + return err; } /* @@ -546,6 +551,7 @@ static void setup_preset(struct alc_spec *spec, spec->channel_mode = preset->channel_mode; spec->num_channel_mode = preset->num_channel_mode; + spec->need_dac_fix = preset->need_dac_fix; spec->multiout.max_channels = spec->channel_mode[0].channels; @@ -2278,6 +2284,7 @@ static struct alc_config_preset alc880_presets[] = { .dac_nids = alc880_dac_nids, .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes), .channel_mode = alc880_threestack_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_3ST_DIG] = { @@ -2288,6 +2295,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes), .channel_mode = alc880_threestack_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_TCL_S700] = { @@ -2380,6 +2388,7 @@ static struct alc_config_preset alc880_presets[] = { .dac_nids = alc880_asus_dac_nids, .num_channel_mode = ARRAY_SIZE(alc880_asus_modes), .channel_mode = alc880_asus_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_ASUS_DIG] = { @@ -2391,6 +2400,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_asus_modes), .channel_mode = alc880_asus_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_ASUS_DIG2] = { @@ -2402,6 +2412,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_asus_modes), .channel_mode = alc880_asus_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_ASUS_W1V] = { @@ -2413,6 +2424,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_asus_modes), .channel_mode = alc880_asus_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_UNIWILL_DIG] = { @@ -2423,6 +2435,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_asus_modes), .channel_mode = alc880_asus_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_CLEVO] = { @@ -2434,6 +2447,7 @@ static struct alc_config_preset alc880_presets[] = { .hp_nid = 0x03, .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes), .channel_mode = alc880_threestack_modes, + .need_dac_fix = 1, .input_mux = &alc880_capture_source, }, [ALC880_LG] = { @@ -2445,6 +2459,7 @@ static struct alc_config_preset alc880_presets[] = { .dig_out_nid = ALC880_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc880_lg_ch_modes), .channel_mode = alc880_lg_ch_modes, + .need_dac_fix = 1, .input_mux = &alc880_lg_capture_source, .unsol_event = alc880_lg_unsol_event, .init_hook = alc880_lg_automute, @@ -4437,6 +4452,7 @@ static struct alc_config_preset alc882_presets[] = { .dig_in_nid = ALC882_DIGIN_NID, .num_channel_mode = ARRAY_SIZE(alc882_ch_modes), .channel_mode = alc882_ch_modes, + .need_dac_fix = 1, .input_mux = &alc882_capture_source, }, [ALC882_6ST_DIG] = { @@ -5075,6 +5091,7 @@ static struct alc_config_preset alc883_presets[] = { .dig_in_nid = ALC883_DIGIN_NID, .num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes), .channel_mode = alc883_3ST_6ch_modes, + .need_dac_fix = 1, .input_mux = &alc883_capture_source, }, [ALC883_3ST_6ch] = { @@ -5086,6 +5103,7 @@ static struct alc_config_preset alc883_presets[] = { .adc_nids = alc883_adc_nids, .num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes), .channel_mode = alc883_3ST_6ch_modes, + .need_dac_fix = 1, .input_mux = &alc883_capture_source, }, [ALC883_6ST_DIG] = { @@ -6554,6 +6572,7 @@ static struct alc_config_preset alc861_presets[] = { .dac_nids = alc861_dac_nids, .num_channel_mode = ARRAY_SIZE(alc861_threestack_modes), .channel_mode = alc861_threestack_modes, + .need_dac_fix = 1, .num_adc_nids = ARRAY_SIZE(alc861_adc_nids), .adc_nids = alc861_adc_nids, .input_mux = &alc861_capture_source, @@ -6566,6 +6585,7 @@ static struct alc_config_preset alc861_presets[] = { .dig_out_nid = ALC861_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc861_threestack_modes), .channel_mode = alc861_threestack_modes, + .need_dac_fix = 1, .num_adc_nids = ARRAY_SIZE(alc861_adc_nids), .adc_nids = alc861_adc_nids, .input_mux = &alc861_capture_source, @@ -6589,6 +6609,7 @@ static struct alc_config_preset alc861_presets[] = { .dac_nids = alc660_dac_nids, .num_channel_mode = ARRAY_SIZE(alc861_threestack_modes), .channel_mode = alc861_threestack_modes, + .need_dac_fix = 1, .num_adc_nids = ARRAY_SIZE(alc861_adc_nids), .adc_nids = alc861_adc_nids, .input_mux = &alc861_capture_source, From 7012b2dac71988f61b520b33c70c63be372b5994 Mon Sep 17 00:00:00 2001 From: James Courtier-Dutton Date: Fri, 28 Jul 2006 22:27:56 +0100 Subject: [PATCH 0915/1063] [ALSA] snd-emu10k1: Add a comment explaining the conversion function for dB gain. Signed-off-by: James Courtier-Dutton Signed-off-by: Jaroslav Kysela --- sound/pci/emu10k1/emufx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index 00fc904c251d..13cd6ce89811 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -267,6 +267,7 @@ static const u32 treble_table[41][5] = { { 0x37c4448b, 0xa45ef51d, 0x262f3267, 0x081e36dc, 0xfd8f5d14 } }; +/* dB gain = (float) 20 * log10( float(db_table_value) / 0x8000000 ) */ static const u32 db_table[101] = { 0x00000000, 0x01571f82, 0x01674b41, 0x01783a1b, 0x0189f540, 0x019c8651, 0x01aff763, 0x01c45306, 0x01d9a446, 0x01eff6b8, From f3302a59cf6961712658db63b66ea5902c17d5e1 Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Mon, 31 Jul 2006 12:49:34 +0200 Subject: [PATCH 0916/1063] [ALSA] hda: sigmatel 9205 family support Adds support for the '9205 family' which includes some other part numbers but 9205 is the first one. These are 4 channel codecs, some have digital mic capability. Support for the digital mic feature will come later. Signed-off-by: Matt Porter Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 102 +++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index ac96336f3484..d572f030c3e9 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -136,6 +136,14 @@ static hda_nid_t stac927x_mux_nids[3] = { 0x15, 0x16, 0x17 }; +static hda_nid_t stac9205_adc_nids[2] = { + 0x12, 0x13 +}; + +static hda_nid_t stac9205_mux_nids[2] = { + 0x19, 0x1a +}; + static hda_nid_t stac9200_pin_nids[8] = { 0x08, 0x09, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, }; @@ -151,6 +159,13 @@ static hda_nid_t stac927x_pin_nids[14] = { 0x14, 0x21, 0x22, 0x23, }; +static hda_nid_t stac9205_pin_nids[12] = { + 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0x14, 0x16, 0x17, 0x18, + 0x21, 0x22, + +}; + static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); @@ -214,6 +229,12 @@ static struct hda_verb stac927x_core_init[] = { {} }; +static struct hda_verb stac9205_core_init[] = { + /* set master volume and direct control */ + { 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff}, + {} +}; + static struct snd_kcontrol_new stac9200_mixer[] = { HDA_CODEC_VOLUME("Master Playback Volume", 0xb, 0, HDA_OUTPUT), HDA_CODEC_MUTE("Master Playback Switch", 0xb, 0, HDA_OUTPUT), @@ -277,6 +298,21 @@ static snd_kcontrol_new_t stac927x_mixer[] = { { } /* end */ }; +static snd_kcontrol_new_t stac9205_mixer[] = { + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Input Source", + .count = 1, + .info = stac92xx_mux_enum_info, + .get = stac92xx_mux_enum_get, + .put = stac92xx_mux_enum_put, + }, + HDA_CODEC_VOLUME("InMux Capture Volume", 0x19, 0x0, HDA_OUTPUT), + HDA_CODEC_VOLUME("InVol Capture Volume", 0x1b, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("ADCMux Capture Switch", 0x1d, 0x0, HDA_OUTPUT), + { } /* end */ +}; + static int stac92xx_build_controls(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; @@ -415,6 +451,24 @@ static struct hda_board_config stac927x_cfg_tbl[] = { {} /* terminator */ }; +static unsigned int ref9205_pin_configs[12] = { + 0x40000100, 0x40000100, 0x01016011, 0x01014010, + 0x01813122, 0x01a19021, 0x40000100, 0x40000100, + 0x40000100, 0x40000100, 0x01441030, 0x01c41030 +}; + +static unsigned int *stac9205_brd_tbl[] = { + ref9205_pin_configs, +}; + +static struct hda_board_config stac9205_cfg_tbl[] = { + { .modelname = "ref", + .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2668, /* DFI LanParty */ + .config = STAC_REF }, /* SigmaTel reference board */ + {} /* terminator */ +}; + static void stac92xx_set_config_regs(struct hda_codec *codec) { int i; @@ -1354,6 +1408,46 @@ static int patch_stac927x(struct hda_codec *codec) return 0; } +static int patch_stac9205(struct hda_codec *codec) +{ + struct sigmatel_spec *spec; + int err; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (spec == NULL) + return -ENOMEM; + + codec->spec = spec; + spec->board_config = snd_hda_check_board_config(codec, stac9205_cfg_tbl); + if (spec->board_config < 0) + snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n"); + else { + spec->num_pins = 14; + spec->pin_nids = stac9205_pin_nids; + spec->pin_configs = stac9205_brd_tbl[spec->board_config]; + stac92xx_set_config_regs(codec); + } + + spec->adc_nids = stac9205_adc_nids; + spec->mux_nids = stac9205_mux_nids; + spec->num_muxes = 3; + + spec->init = stac9205_core_init; + spec->mixer = stac9205_mixer; + + spec->multiout.dac_nids = spec->dac_nids; + + err = stac92xx_parse_auto_config(codec, 0x1f, 0x20); + if (err < 0) { + stac92xx_free(codec); + return err; + } + + codec->patch_ops = stac92xx_patch_ops; + + return 0; +} + /* * STAC 7661(?) hack */ @@ -1542,5 +1636,13 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x }, { .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x }, { .id = 0x83847661, .name = "STAC7661", .patch = patch_stac7661 }, + { .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 }, + { .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 }, + { .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 }, + { .id = 0x838476a3, .name = "STAC9204D", .patch = patch_stac9205 }, + { .id = 0x838476a4, .name = "STAC9255", .patch = patch_stac9205 }, + { .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 }, + { .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 }, + { .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 }, {} /* terminator */ }; From 1c3985580445ef9225c1ea7714d6d963f7626eeb Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 31 Jul 2006 12:51:57 +0200 Subject: [PATCH 0917/1063] [ALSA] es18xx - Add PnP BIOS support This patch adds PnP BIOS support to es18xx driver. It allows ESS ES18xx sound chips integrated in some notebooks (such as DTK FortisPro TOP-5A) that don't appear as ISA cards (they aren't recognized by ISA PnP, only by PnP BIOS) to 'just work' automatically. Signed-off-by: Ondrej Zary Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/isa/es18xx.c | 219 ++++++++++++++++++++++++++++++++------------- 1 file changed, 157 insertions(+), 62 deletions(-) diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index 34998de9968c..85818200333f 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -2038,7 +2038,80 @@ MODULE_PARM_DESC(dma2, "DMA 2 # for ES18xx driver."); static struct platform_device *platform_devices[SNDRV_CARDS]; #ifdef CONFIG_PNP -static int pnp_registered; +static int pnp_registered, pnpc_registered; + +static struct pnp_device_id snd_audiodrive_pnpbiosids[] = { + { .id = "ESS1869" }, + { .id = "" } /* end */ +}; + +MODULE_DEVICE_TABLE(pnp, snd_audiodrive_pnpbiosids); + +/* PnP main device initialization */ +static int __devinit snd_audiodrive_pnp_init_main(int dev, struct pnp_dev *pdev, + struct pnp_resource_table *cfg) +{ + int err; + + pnp_init_resource_table(cfg); + if (port[dev] != SNDRV_AUTO_PORT) + pnp_resource_change(&cfg->port_resource[0], port[dev], 16); + if (fm_port[dev] != SNDRV_AUTO_PORT) + pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4); + if (mpu_port[dev] != SNDRV_AUTO_PORT) + pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2); + if (dma1[dev] != SNDRV_AUTO_DMA) + pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1); + if (dma2[dev] != SNDRV_AUTO_DMA) + pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1); + if (irq[dev] != SNDRV_AUTO_IRQ) + pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1); + if (pnp_device_is_isapnp(pdev)) { + err = pnp_manual_config_dev(pdev, cfg, 0); + if (err < 0) + snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n"); + } + err = pnp_activate_dev(pdev); + if (err < 0) { + snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n"); + return -EBUSY; + } + /* ok. hack using Vendor-Defined Card-Level registers */ + /* skip csn and logdev initialization - already done in isapnp_configure */ + if (pnp_device_is_isapnp(pdev)) { + isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev)); + isapnp_write_byte(0x27, pnp_irq(pdev, 0)); /* Hardware Volume IRQ Number */ + if (mpu_port[dev] != SNDRV_AUTO_PORT) + isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */ + isapnp_write_byte(0x72, pnp_irq(pdev, 0)); /* second IRQ */ + isapnp_cfg_end(); + } + port[dev] = pnp_port_start(pdev, 0); + fm_port[dev] = pnp_port_start(pdev, 1); + mpu_port[dev] = pnp_port_start(pdev, 2); + dma1[dev] = pnp_dma(pdev, 0); + dma2[dev] = pnp_dma(pdev, 1); + irq[dev] = pnp_irq(pdev, 0); + snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]); + snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]); + return 0; +} + +static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard, + struct pnp_dev *pdev) +{ + struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); + + if (!cfg) + return -ENOMEM; + acard->dev = pdev; + if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) { + kfree(cfg); + return -EBUSY; + } + kfree(cfg); + return 0; +} static struct pnp_card_device_id snd_audiodrive_pnpids[] = { /* ESS 1868 (integrated on Compaq dual P-Pro motherboard and Genius 18PnP 3D) */ @@ -2061,13 +2134,11 @@ static struct pnp_card_device_id snd_audiodrive_pnpids[] = { MODULE_DEVICE_TABLE(pnp_card, snd_audiodrive_pnpids); -static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard, +static int __devinit snd_audiodrive_pnpc(int dev, struct snd_audiodrive *acard, struct pnp_card_link *card, const struct pnp_card_device_id *id) { - struct pnp_dev *pdev; struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL); - int err; if (!cfg) return -ENOMEM; @@ -2082,58 +2153,16 @@ static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard, return -EBUSY; } /* Control port initialization */ - err = pnp_activate_dev(acard->devc); - if (err < 0) { + if (pnp_activate_dev(acard->devc) < 0) { snd_printk(KERN_ERR PFX "PnP control configure failure (out of resources?)\n"); - kfree(cfg); return -EAGAIN; } snd_printdd("pnp: port=0x%llx\n", (unsigned long long)pnp_port_start(acard->devc, 0)); - /* PnP initialization */ - pdev = acard->dev; - pnp_init_resource_table(cfg); - if (port[dev] != SNDRV_AUTO_PORT) - pnp_resource_change(&cfg->port_resource[0], port[dev], 16); - if (fm_port[dev] != SNDRV_AUTO_PORT) - pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4); - if (mpu_port[dev] != SNDRV_AUTO_PORT) - pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2); - if (dma1[dev] != SNDRV_AUTO_DMA) - pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1); - if (dma2[dev] != SNDRV_AUTO_DMA) - pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1); - if (irq[dev] != SNDRV_AUTO_IRQ) - pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1); - err = pnp_manual_config_dev(pdev, cfg, 0); - if (err < 0) - snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n"); - err = pnp_activate_dev(pdev); - if (err < 0) { - snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n"); + if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) { kfree(cfg); return -EBUSY; } - /* ok. hack using Vendor-Defined Card-Level registers */ - /* skip csn and logdev initialization - already done in isapnp_configure */ - if (pnp_device_is_isapnp(pdev)) { - isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev)); - isapnp_write_byte(0x27, pnp_irq(pdev, 0)); /* Hardware Volume IRQ Number */ - if (mpu_port[dev] != SNDRV_AUTO_PORT) - isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */ - isapnp_write_byte(0x72, pnp_irq(pdev, 0)); /* second IRQ */ - isapnp_cfg_end(); - } else { - snd_printk(KERN_ERR PFX "unable to install ISA PnP hack, expect malfunction\n"); - } - port[dev] = pnp_port_start(pdev, 0); - fm_port[dev] = pnp_port_start(pdev, 1); - mpu_port[dev] = pnp_port_start(pdev, 2); - dma1[dev] = pnp_dma(pdev, 0); - dma2[dev] = pnp_dma(pdev, 1); - irq[dev] = pnp_irq(pdev, 0); - snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]); - snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]); kfree(cfg); return 0; } @@ -2302,7 +2331,69 @@ static struct platform_driver snd_es18xx_nonpnp_driver = { #ifdef CONFIG_PNP static unsigned int __devinitdata es18xx_pnp_devices; -static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard, +static int __devinit snd_audiodrive_pnp_detect(struct pnp_dev *pdev, + const struct pnp_device_id *id) +{ + static int dev; + int err; + struct snd_card *card; + + if (pnp_device_is_isapnp(pdev)) + return -ENOENT; /* we have another procedure - card */ + for (; dev < SNDRV_CARDS; dev++) { + if (enable[dev] && isapnp[dev]) + break; + } + if (dev >= SNDRV_CARDS) + return -ENODEV; + + card = snd_es18xx_card_new(dev); + if (! card) + return -ENOMEM; + if ((err = snd_audiodrive_pnp(dev, card->private_data, pdev)) < 0) { + snd_card_free(card); + return err; + } + snd_card_set_dev(card, &pdev->dev); + if ((err = snd_audiodrive_probe(card, dev)) < 0) { + snd_card_free(card); + return err; + } + pnp_set_drvdata(pdev, card); + dev++; + es18xx_pnp_devices++; + return 0; +} + +static void __devexit snd_audiodrive_pnp_remove(struct pnp_dev * pdev) +{ + snd_card_free(pnp_get_drvdata(pdev)); + pnp_set_drvdata(pdev, NULL); +} + +#ifdef CONFIG_PM +static int snd_audiodrive_pnp_suspend(struct pnp_dev *pdev, pm_message_t state) +{ + return snd_es18xx_suspend(pnp_get_drvdata(pdev), state); +} +static int snd_audiodrive_pnp_resume(struct pnp_dev *pdev) +{ + return snd_es18xx_resume(pnp_get_drvdata(pdev)); +} +#endif + +static struct pnp_driver es18xx_pnp_driver = { + .name = "es18xx-pnpbios", + .id_table = snd_audiodrive_pnpbiosids, + .probe = snd_audiodrive_pnp_detect, + .remove = __devexit_p(snd_audiodrive_pnp_remove), +#ifdef CONFIG_PM + .suspend = snd_audiodrive_pnp_suspend, + .resume = snd_audiodrive_pnp_resume, +#endif +}; + +static int __devinit snd_audiodrive_pnpc_detect(struct pnp_card_link *pcard, const struct pnp_card_device_id *pid) { static int dev; @@ -2320,7 +2411,7 @@ static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard, if (! card) return -ENOMEM; - if ((res = snd_audiodrive_pnp(dev, card->private_data, pcard, pid)) < 0) { + if ((res = snd_audiodrive_pnpc(dev, card->private_data, pcard, pid)) < 0) { snd_card_free(card); return res; } @@ -2336,19 +2427,19 @@ static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard, return 0; } -static void __devexit snd_audiodrive_pnp_remove(struct pnp_card_link * pcard) +static void __devexit snd_audiodrive_pnpc_remove(struct pnp_card_link * pcard) { snd_card_free(pnp_get_card_drvdata(pcard)); pnp_set_card_drvdata(pcard, NULL); } #ifdef CONFIG_PM -static int snd_audiodrive_pnp_suspend(struct pnp_card_link *pcard, pm_message_t state) +static int snd_audiodrive_pnpc_suspend(struct pnp_card_link *pcard, pm_message_t state) { return snd_es18xx_suspend(pnp_get_card_drvdata(pcard), state); } -static int snd_audiodrive_pnp_resume(struct pnp_card_link *pcard) +static int snd_audiodrive_pnpc_resume(struct pnp_card_link *pcard) { return snd_es18xx_resume(pnp_get_card_drvdata(pcard)); } @@ -2359,11 +2450,11 @@ static struct pnp_card_driver es18xx_pnpc_driver = { .flags = PNP_DRIVER_RES_DISABLE, .name = "es18xx", .id_table = snd_audiodrive_pnpids, - .probe = snd_audiodrive_pnp_detect, - .remove = __devexit_p(snd_audiodrive_pnp_remove), + .probe = snd_audiodrive_pnpc_detect, + .remove = __devexit_p(snd_audiodrive_pnpc_remove), #ifdef CONFIG_PM - .suspend = snd_audiodrive_pnp_suspend, - .resume = snd_audiodrive_pnp_resume, + .suspend = snd_audiodrive_pnpc_suspend, + .resume = snd_audiodrive_pnpc_resume, #endif }; #endif /* CONFIG_PNP */ @@ -2373,8 +2464,10 @@ static void __init_or_module snd_es18xx_unregister_all(void) int i; #ifdef CONFIG_PNP - if (pnp_registered) + if (pnpc_registered) pnp_unregister_card_driver(&es18xx_pnpc_driver); + if (pnp_registered) + pnp_unregister_driver(&es18xx_pnp_driver); #endif for (i = 0; i < ARRAY_SIZE(platform_devices); ++i) platform_device_unregister(platform_devices[i]); @@ -2405,11 +2498,13 @@ static int __init alsa_card_es18xx_init(void) } #ifdef CONFIG_PNP - err = pnp_register_card_driver(&es18xx_pnpc_driver); - if (!err) { + err = pnp_register_driver(&es18xx_pnp_driver); + if (!err) pnp_registered = 1; - cards += es18xx_pnp_devices; - } + err = pnp_register_card_driver(&es18xx_pnpc_driver); + if (!err) + pnpc_registered = 1; + cards += es18xx_pnp_devices; #endif if(!cards) { From 548a648b98318e4b843b636dd2c7f42377e19a00 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 31 Jul 2006 16:51:51 +0200 Subject: [PATCH 0918/1063] [ALSA] Fix control/status mmap with shared PCM substream The flag to avoid 32bit-incompatible mmap for control/status records should be outside the pcm substream instance since a substream can be shared among multiple opens. Now it's flagged in pcm_file list that is directly assigned to file->private_data. Also, removed snd_pcm_add_file() and remove_file() functions and substream.files field that are not really used in the code. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/pcm.h | 4 +--- sound/core/pcm_compat.c | 2 +- sound/core/pcm_native.c | 49 +++++++++-------------------------------- 3 files changed, 12 insertions(+), 43 deletions(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index f84d84993a31..60d40b34efc0 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -190,7 +190,7 @@ struct snd_pcm_ops { struct snd_pcm_file { struct snd_pcm_substream *substream; - struct snd_pcm_file *next; + int no_compat_mmap; }; struct snd_pcm_hw_rule; @@ -384,7 +384,6 @@ struct snd_pcm_substream { struct snd_info_entry *proc_prealloc_entry; #endif /* misc flags */ - unsigned int no_mmap_ctrl: 1; unsigned int hw_opened: 1; }; @@ -402,7 +401,6 @@ struct snd_pcm_str { /* -- OSS things -- */ struct snd_pcm_oss_stream oss; #endif - struct snd_pcm_file *files; #ifdef CONFIG_SND_VERBOSE_PROCFS struct snd_info_entry *proc_root; struct snd_info_entry *proc_info_entry; diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 2b8aab6fd6cd..2b539799d23b 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -478,7 +478,7 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l * mmap of PCM status/control records because of the size * incompatibility. */ - substream->no_mmap_ctrl = 1; + pcm_file->no_compat_mmap = 1; switch (cmd) { case SNDRV_PCM_IOCTL_PVERSION: diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 439f047929e1..0224c70414f5 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1992,35 +1992,9 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) return 0; } -static void snd_pcm_add_file(struct snd_pcm_str *str, - struct snd_pcm_file *pcm_file) -{ - pcm_file->next = str->files; - str->files = pcm_file; -} - -static void snd_pcm_remove_file(struct snd_pcm_str *str, - struct snd_pcm_file *pcm_file) -{ - struct snd_pcm_file * pcm_file1; - if (str->files == pcm_file) { - str->files = pcm_file->next; - } else { - pcm_file1 = str->files; - while (pcm_file1 && pcm_file1->next != pcm_file) - pcm_file1 = pcm_file1->next; - if (pcm_file1 != NULL) - pcm_file1->next = pcm_file->next; - } -} - static void pcm_release_private(struct snd_pcm_substream *substream) { - struct snd_pcm_file *pcm_file = substream->file; - snd_pcm_unlink(substream); - snd_pcm_remove_file(substream->pstr, pcm_file); - kfree(pcm_file); } void snd_pcm_release_substream(struct snd_pcm_substream *substream) @@ -2060,7 +2034,6 @@ int snd_pcm_open_substream(struct snd_pcm *pcm, int stream, return 0; } - substream->no_mmap_ctrl = 0; err = snd_pcm_hw_constraints_init(substream); if (err < 0) { snd_printd("snd_pcm_hw_constraints_init failed\n"); @@ -2105,19 +2078,16 @@ static int snd_pcm_open_file(struct file *file, if (err < 0) return err; - if (substream->ref_count > 1) - pcm_file = substream->file; - else { - pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL); - if (pcm_file == NULL) { - snd_pcm_release_substream(substream); - return -ENOMEM; - } + pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL); + if (pcm_file == NULL) { + snd_pcm_release_substream(substream); + return -ENOMEM; + } + pcm_file->substream = substream; + if (substream->ref_count == 1) { str = substream->pstr; substream->file = pcm_file; substream->pcm_release = pcm_release_private; - pcm_file->substream = substream; - snd_pcm_add_file(str, pcm_file); } file->private_data = pcm_file; *rpcm_file = pcm_file; @@ -2209,6 +2179,7 @@ static int snd_pcm_release(struct inode *inode, struct file *file) fasync_helper(-1, file, 0, &substream->runtime->fasync); mutex_lock(&pcm->open_mutex); snd_pcm_release_substream(substream); + kfree(pcm_file); mutex_unlock(&pcm->open_mutex); wake_up(&pcm->open_wait); module_put(pcm->card->module); @@ -3270,11 +3241,11 @@ static int snd_pcm_mmap(struct file *file, struct vm_area_struct *area) offset = area->vm_pgoff << PAGE_SHIFT; switch (offset) { case SNDRV_PCM_MMAP_OFFSET_STATUS: - if (substream->no_mmap_ctrl) + if (pcm_file->no_compat_mmap) return -ENXIO; return snd_pcm_mmap_status(substream, file, area); case SNDRV_PCM_MMAP_OFFSET_CONTROL: - if (substream->no_mmap_ctrl) + if (pcm_file->no_compat_mmap) return -ENXIO; return snd_pcm_mmap_control(substream, file, area); default: From f03d68fe343d70bb06ecdb3d70dcf0e678ed99f9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 3 Aug 2006 15:06:14 +0200 Subject: [PATCH 0919/1063] [ALSA] ppc-beep - handle errors from input_register_device() ppc-beep: handle errors from input_register_device() (Also fixed the wrong memory release in the error path.) Signed-off-by: Dmitry Torokhov Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/ppc/beep.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c index 5fec1e58f310..5f38f670102c 100644 --- a/sound/ppc/beep.c +++ b/sound/ppc/beep.c @@ -215,15 +215,18 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip) { struct pmac_beep *beep; struct input_dev *input_dev; + struct snd_kcontrol *beep_ctl; void *dmabuf; int err = -ENOMEM; beep = kzalloc(sizeof(*beep), GFP_KERNEL); + if (! beep) + return -ENOMEM; dmabuf = dma_alloc_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4, &beep->addr, GFP_KERNEL); input_dev = input_allocate_device(); - if (!beep || !dmabuf || !input_dev) - goto fail; + if (! dmabuf || ! input_dev) + goto fail1; /* FIXME: set more better values */ input_dev->name = "PowerMac Beep"; @@ -244,17 +247,24 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip) beep->volume = BEEP_VOLUME; beep->running = 0; - err = snd_ctl_add(chip->card, snd_ctl_new1(&snd_pmac_beep_mixer, chip)); + beep_ctl = snd_ctl_new1(&snd_pmac_beep_mixer, chip); + err = snd_ctl_add(chip->card, beep_ctl); if (err < 0) - goto fail; + goto fail1; + + chip->beep = beep; - chip->beep = beep; - input_register_device(beep->dev); - - return 0; - - fail: input_free_device(input_dev); - kfree(dmabuf); + err = input_register_device(beep->dev); + if (err) + goto fail2; + + return 0; + + fail2: snd_ctl_remove(chip->card, beep_ctl); + fail1: input_free_device(input_dev); + if (dmabuf) + dma_free_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4, + dmabuf, beep->addr); kfree(beep); return err; } From 2529bba7606b23c1b7161d3c2ad486162e8650f9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 4 Aug 2006 12:57:19 +0200 Subject: [PATCH 0920/1063] [ALSA] Fix substream selection in PCM and rawmidi The PCM and rawmidi substreams can be selected explicitly by opening control handle and set via *_PREFER_SUBDEVICE ioctl. But, when multiple controls are opened, the driver gets confused. The patch fixes the initialization of prefer_*_subdevice and the check of multiple controls. The first set subdevice is picked up as the valid one. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/core/control.c | 2 ++ sound/core/pcm.c | 3 ++- sound/core/rawmidi.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 31ad58154c06..ac1442682eac 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -75,6 +75,8 @@ static int snd_ctl_open(struct inode *inode, struct file *file) init_waitqueue_head(&ctl->change_sleep); spin_lock_init(&ctl->read_lock); ctl->card = card; + ctl->prefer_pcm_subdevice = -1; + ctl->prefer_rawmidi_subdevice = -1; ctl->pid = current->pid; file->private_data = ctl; write_lock_irqsave(&card->ctl_files_rwlock, flags); diff --git a/sound/core/pcm.c b/sound/core/pcm.c index f52178abf120..ed3b09469560 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -792,7 +792,8 @@ int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, kctl = snd_ctl_file(list); if (kctl->pid == current->pid) { prefer_subdevice = kctl->prefer_pcm_subdevice; - break; + if (prefer_subdevice != -1) + break; } } up_read(&card->controls_rwsem); diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 8a2bdfae63e3..269c467ca9bb 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -430,7 +430,8 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) kctl = snd_ctl_file(list); if (kctl->pid == current->pid) { subdevice = kctl->prefer_rawmidi_subdevice; - break; + if (subdevice != -1) + break; } } up_read(&card->controls_rwsem); From 727f317a10da74b4e5c6d968bbba07767bfea794 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 4 Aug 2006 19:08:03 +0200 Subject: [PATCH 0921/1063] [ALSA] usb-audio - Fix a typo of CONFIG_PROC_FS Fixed a typo of CONFIG_PROC_FS in usbaudio.c. The stream proc file appears again. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/usb/usbaudio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 314431385913..087f9b64d8a0 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -2049,7 +2049,7 @@ static struct usb_driver usb_audio_driver = { }; -#if defined(CONFIG_PROCFS) && defined(CONFIG_SND_VERBOSE_PROCFS) +#if defined(CONFIG_PROC_FS) && defined(CONFIG_SND_VERBOSE_PROCFS) /* * proc interface for list the supported pcm formats From 25b6c43b3d6258f3e87244eeb2b9347dc5e83c40 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 8 Aug 2006 13:01:14 +0200 Subject: [PATCH 0922/1063] [ALSA] Fix the preselected model for HP machine Fixed the preselected model for a HP machine with SSID 103c:3010 to use hp-3013 (ALSA bug#2157). Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 991f1079116b..ac561a5d8667 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3931,7 +3931,7 @@ static struct hda_board_config alc260_cfg_tbl[] = { .config = ALC260_BASIC }, /* CTL Travel Master U553W */ { .modelname = "hp", .config = ALC260_HP }, { .modelname = "hp-3013", .config = ALC260_HP_3013 }, - { .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP }, + { .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP_3013 }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 }, { .pci_subvendor = 0x103c, .pci_subdevice = 0x3013, .config = ALC260_HP_3013 }, From f5a5ffad072ec3c1fd636174c30f0ba52fe0259f Mon Sep 17 00:00:00 2001 From: Danny Tholen Date: Tue, 8 Aug 2006 18:59:07 +0200 Subject: [PATCH 0923/1063] [ALSA] [snd-hda-intel] fix sound on some Asus W6A chips This patch adds support in ALSA snd-hda-intel driver for Asus W6A motherboard as reported in MDV Bugzilla #19962 (see http://qa.mandriva.com/show_bug.cgi?id=19962) Signed-off-by: Danny Tholen Signed-off-by: Thomas Backlund Signed-off-by: Thierry Vignaud Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ac561a5d8667..a91757316765 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2240,6 +2240,7 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .pci_subvendor = 0x1043, .pci_subdevice = 0x1113, .config = ALC880_ASUS_DIG }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1173, .config = ALC880_ASUS_DIG }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1993, .config = ALC880_ASUS }, + { .pci_subvendor = 0x1043, .pci_subdevice = 0x10c2, .config = ALC880_ASUS_DIG }, /* Asus W6A */ { .pci_subvendor = 0x1043, .pci_subdevice = 0x10c3, .config = ALC880_ASUS_DIG }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS }, { .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG }, From 683fe1537e660c322c8af953773921e814791193 Mon Sep 17 00:00:00 2001 From: Jochen Voss Date: Tue, 8 Aug 2006 21:12:44 +0200 Subject: [PATCH 0924/1063] [ALSA] Revolution 5.1 - add AK5365 ADC support Add support for the AK5365 ADC. Signed-off-by: Jochen Voss Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/ak4xxx-adda.h | 3 ++- sound/i2c/other/ak4xxx-adda.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h index 3d9888492026..65ddfa3cac1f 100644 --- a/include/sound/ak4xxx-adda.h +++ b/include/sound/ak4xxx-adda.h @@ -53,7 +53,8 @@ struct snd_akm4xxx { unsigned int idx_offset; /* control index offset */ enum { SND_AK4524, SND_AK4528, SND_AK4529, - SND_AK4355, SND_AK4358, SND_AK4381 + SND_AK4355, SND_AK4358, SND_AK4381, + SND_AK5365 } type; unsigned int *num_stereo; /* array of combined counts * for the mixer diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index dc7cc2001b74..7d562f084207 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -598,6 +598,31 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) if (err < 0) goto __error; } + + if (ak->type == SND_AK5365) { + memset(ctl, 0, sizeof(*ctl)); + if (ak->channel_names == NULL) + strcpy(ctl->id.name, "Capture Volume"); + else + strcpy(ctl->id.name, ak->channel_names[0]); + ctl->id.index = ak->idx_offset * 2; + ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->count = 1; + ctl->info = snd_akm4xxx_stereo_volume_info; + ctl->get = snd_akm4xxx_stereo_volume_get; + ctl->put = snd_akm4xxx_stereo_volume_put; + /* Registers 4 & 5 (see AK5365 data sheet, pages 34 and 35): + * valid values are from 0x00 (mute) to 0x98 (+12dB). */ + ctl->private_value = + AK_COMPOSE(0, 4, 0, 0x98); + ctl->private_data = ak; + err = snd_ctl_add(ak->card, + snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| + SNDRV_CTL_ELEM_ACCESS_WRITE)); + if (err < 0) + goto __error; + } + if (ak->type == SND_AK4355 || ak->type == SND_AK4358) num_emphs = 1; else From 96d9e9347c9c5ca980bef22b4add7d437d79034f Mon Sep 17 00:00:00 2001 From: Jochen Voss Date: Tue, 8 Aug 2006 21:13:42 +0200 Subject: [PATCH 0925/1063] [ALSA] Revolution 5.1 - register the AK5365 ADC with ALSA Enable capture support for the M-Audio Revolution 5.1 card, by registering the ADC with ALSA. Signed-off-by: Jochen Voss Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ice1712/revo.c | 30 ++++++++++++++++++++++++++++-- sound/pci/ice1712/revo.h | 2 +- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c index fec9440cb310..ef64be49a898 100644 --- a/sound/pci/ice1712/revo.c +++ b/sound/pci/ice1712/revo.c @@ -98,6 +98,9 @@ static unsigned int revo51_num_stereo[] = {2, 1, 1, 2}; static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playback Volume", "PCM LFE Playback Volume", "PCM Rear Playback Volume"}; +static unsigned int revo51_adc_num_stereo[] = {2}; +static char *revo51_adc_channel_names[] = {"PCM Capture Volume"}; + static struct snd_akm4xxx akm_revo_front __devinitdata = { .type = SND_AK4381, .num_dacs = 2, @@ -159,7 +162,26 @@ static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = { .data_mask = VT1724_REVO_CDOUT, .clk_mask = VT1724_REVO_CCLK, .cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2, - .cs_addr = 0, + .cs_addr = VT1724_REVO_CS1 | VT1724_REVO_CS2, + .cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2, + .add_flags = VT1724_REVO_CCLK, /* high at init */ + .mask_flags = 0, +}; + +static struct snd_akm4xxx akm_revo51_adc __devinitdata = { + .type = SND_AK5365, + .num_adcs = 2, + .num_stereo = revo51_adc_num_stereo, + .channel_names = revo51_adc_channel_names +}; + +static struct snd_ak4xxx_private akm_revo51_adc_priv __devinitdata = { + .caddr = 2, + .cif = 0, + .data_mask = VT1724_REVO_CDOUT, + .clk_mask = VT1724_REVO_CCLK, + .cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2, + .cs_addr = VT1724_REVO_CS0 | VT1724_REVO_CS2, .cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2, .add_flags = VT1724_REVO_CCLK, /* high at init */ .mask_flags = 0, @@ -202,9 +224,13 @@ static int __devinit revo_init(struct snd_ice1712 *ice) snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE); break; case VT1724_SUBDEVICE_REVOLUTION51: - ice->akm_codecs = 1; + ice->akm_codecs = 2; if ((err = snd_ice1712_akm4xxx_init(ak, &akm_revo51, &akm_revo51_priv, ice)) < 0) return err; + err = snd_ice1712_akm4xxx_init(ak + 1, &akm_revo51_adc, + &akm_revo51_adc_priv, ice); + if (err < 0) + return err; /* unmute all codecs - needed! */ snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE); break; diff --git a/sound/pci/ice1712/revo.h b/sound/pci/ice1712/revo.h index dea52ea219df..efbb86ec3289 100644 --- a/sound/pci/ice1712/revo.h +++ b/sound/pci/ice1712/revo.h @@ -42,7 +42,7 @@ extern struct snd_ice1712_card_info snd_vt1724_revo_cards[]; #define VT1724_REVO_CCLK 0x02 #define VT1724_REVO_CDIN 0x04 /* not used */ #define VT1724_REVO_CDOUT 0x08 -#define VT1724_REVO_CS0 0x10 /* not used */ +#define VT1724_REVO_CS0 0x10 /* AK5365 chipselect for Rev. 5.1 */ #define VT1724_REVO_CS1 0x20 /* front AKM4381 chipselect */ #define VT1724_REVO_CS2 0x40 /* surround AKM4355 chipselect */ #define VT1724_REVO_MUTE (1<<22) /* 0 = all mute, 1 = normal operation */ From 30ba6e207a915a6c70f22ccb3f9169d1cce88466 Mon Sep 17 00:00:00 2001 From: Jochen Voss Date: Wed, 9 Aug 2006 14:26:26 +0200 Subject: [PATCH 0926/1063] [ALSA] Revolution 5.1 - complete the AK5365 support Complete the AK5365 support. This adds a boolean control to toggle the soft mute feature of the AK5365 chip. Signed-off-by: Jochen Voss Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/i2c/other/ak4xxx-adda.c | 72 +++++++++++++++++++++++++++++++++++ sound/pci/ice1712/revo.c | 2 +- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index 7d562f084207..d76d8b078a81 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -472,6 +472,57 @@ static int snd_akm4xxx_deemphasis_put(struct snd_kcontrol *kcontrol, return change; } +static int ak4xxx_switch_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int ak4xxx_switch_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); + int chip = AK_GET_CHIP(kcontrol->private_value); + int addr = AK_GET_ADDR(kcontrol->private_value); + int shift = AK_GET_SHIFT(kcontrol->private_value); + int invert = AK_GET_INVERT(kcontrol->private_value); + unsigned char val = snd_akm4xxx_get(ak, chip, addr); + + if (invert) + val = ! val; + ucontrol->value.integer.value[0] = (val & (1<private_value); + int addr = AK_GET_ADDR(kcontrol->private_value); + int shift = AK_GET_SHIFT(kcontrol->private_value); + int invert = AK_GET_INVERT(kcontrol->private_value); + long flag = ucontrol->value.integer.value[0]; + unsigned char val, oval; + int change; + + if (invert) + flag = ! flag; + oval = snd_akm4xxx_get(ak, chip, addr); + if (flag) + val = oval | (1<channel_names == NULL) + strcpy(ctl->id.name, "Capture Switch"); + else + strcpy(ctl->id.name, ak->channel_names[1]); + ctl->id.index = ak->idx_offset * 2; + ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + ctl->count = 1; + ctl->info = ak4xxx_switch_info; + ctl->get = ak4xxx_switch_get; + ctl->put = ak4xxx_switch_put; + /* register 2, bit 0 (SMUTE): 0 = normal operation, 1 = mute */ + ctl->private_value = + AK_COMPOSE(0, 2, 0, 0) | AK_INVERT; + ctl->private_data = ak; + err = snd_ctl_add(ak->card, + snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| + SNDRV_CTL_ELEM_ACCESS_WRITE)); + if (err < 0) + goto __error; } if (ak->type == SND_AK4355 || ak->type == SND_AK4358) diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c index ef64be49a898..1134a57f9e65 100644 --- a/sound/pci/ice1712/revo.c +++ b/sound/pci/ice1712/revo.c @@ -99,7 +99,7 @@ static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playba "PCM LFE Playback Volume", "PCM Rear Playback Volume"}; static unsigned int revo51_adc_num_stereo[] = {2}; -static char *revo51_adc_channel_names[] = {"PCM Capture Volume"}; +static char *revo51_adc_channel_names[] = {"PCM Capture Volume","PCM Capture Switch"}; static struct snd_akm4xxx akm_revo_front __devinitdata = { .type = SND_AK4381, From cf93907b98c82c2157e5bbe766bee8f1c5bb87b2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Aug 2006 14:33:27 +0200 Subject: [PATCH 0927/1063] [ALSA] Fix compile warnings in ak4xxx-adda.c Fixed compile warnings in ak4xxx-adda.c reagarding missing enum cases in switch. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/i2c/other/ak4xxx-adda.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index d76d8b078a81..0aea536a3371 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -137,6 +137,8 @@ void snd_akm4xxx_reset(struct snd_akm4xxx *ak, int state) case SND_AK4381: ak4381_reset(ak, state); break; + default: + break; } } @@ -727,6 +729,9 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) case SND_AK4381: ctl->private_value = AK_COMPOSE(idx, 1, 1, 0); break; + default: + err = -EINVAL; + goto __error; } ctl->private_data = ak; err = snd_ctl_add(ak->card, From f24e9f586b377749dff37554696cf3a105540c94 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Aug 2006 14:51:14 +0200 Subject: [PATCH 0928/1063] [ALSA] Select I2C and I2C_POWERMAC in aoa/codecs/Kconfig Added the missing selection of I2C and I2C_POWERMAC for Onyx and TAS codecs in aoa/codecs/Kconfig. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/aoa/codecs/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/aoa/codecs/Kconfig b/sound/aoa/codecs/Kconfig index 90cf58f68630..d5fbd6016e93 100644 --- a/sound/aoa/codecs/Kconfig +++ b/sound/aoa/codecs/Kconfig @@ -1,6 +1,8 @@ config SND_AOA_ONYX tristate "support Onyx chip" depends on SND_AOA + select I2C + select I2C_POWERMAC ---help--- This option enables support for the Onyx (pcm3052) codec chip found in the latest Apple machines @@ -18,6 +20,8 @@ config SND_AOA_ONYX config SND_AOA_TAS tristate "support TAS chips" depends on SND_AOA + select I2C + select I2C_POWERMAC ---help--- This option enables support for the tas chips found in a lot of Apple Machines, especially From 22309c3e0c8911865cad0aa94f53a9afadaad7ee Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 9 Aug 2006 16:57:28 +0200 Subject: [PATCH 0929/1063] [ALSA] Added model for Uniwill laptop with ALC861 Added a new model 'uniwill-m31' for Uniwill laptops with ALC861 codec chip. The patch is taken from ALSA bug#2035, and modifeid. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/patch_realtek.c | 137 ++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index d0dbc3fb20c2..74be228596ad 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -827,6 +827,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. 3stack-dig 3-jack with SPDIF I/O 6stack-dig 6-jack with SPDIF I/O 3stack-660 3-jack (for ALC660) + uniwill-m31 Uniwill M31 laptop auto auto-config reading BIOS (default) CMI9880 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a91757316765..f857e963ff45 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -90,6 +90,7 @@ enum { ALC660_3ST, ALC861_3ST_DIG, ALC861_6ST_DIG, + ALC861_UNIWILL_M31, ALC861_AUTO, ALC861_MODEL_LAST, }; @@ -6021,6 +6022,23 @@ static struct hda_channel_mode alc861_threestack_modes[2] = { { 2, alc861_threestack_ch2_init }, { 6, alc861_threestack_ch6_init }, }; +/* Set mic1 as input and unmute the mixer */ +static struct hda_verb alc861_uniwill_m31_ch2_init[] = { + { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, + { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8)) }, /*mic*/ + { } /* end */ +}; +/* Set mic1 as output and mute mixer */ +static struct hda_verb alc861_uniwill_m31_ch4_init[] = { + { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 }, + { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8)) }, /*mic*/ + { } /* end */ +}; + +static struct hda_channel_mode alc861_uniwill_m31_modes[2] = { + { 2, alc861_uniwill_m31_ch2_init }, + { 4, alc861_uniwill_m31_ch4_init }, +}; /* patch-ALC861 */ @@ -6099,6 +6117,47 @@ static struct snd_kcontrol_new alc861_3ST_mixer[] = { }, { } /* end */ }; +static struct snd_kcontrol_new alc861_uniwill_m31_mixer[] = { + /* output mixer control */ + HDA_CODEC_MUTE("Front Playback Switch", 0x03, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Surround Playback Switch", 0x06, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x05, 1, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x05, 2, 0x0, HDA_OUTPUT), + /*HDA_CODEC_MUTE("Side Playback Switch", 0x04, 0x0, HDA_OUTPUT), */ + + /* Input mixer control */ + /* HDA_CODEC_VOLUME("Input Playback Volume", 0x15, 0x0, HDA_OUTPUT), + HDA_CODEC_MUTE("Input Playback Switch", 0x15, 0x0, HDA_OUTPUT), */ + HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_INPUT), + HDA_CODEC_VOLUME("Line Playback Volume", 0x15, 0x02, HDA_INPUT), + HDA_CODEC_MUTE("Line Playback Switch", 0x15, 0x02, HDA_INPUT), + HDA_CODEC_VOLUME("Mic Playback Volume", 0x15, 0x01, HDA_INPUT), + HDA_CODEC_MUTE("Mic Playback Switch", 0x15, 0x01, HDA_INPUT), + HDA_CODEC_MUTE("Front Mic Playback Switch", 0x10, 0x01, HDA_OUTPUT), + HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x03, HDA_INPUT), + + /* Capture mixer control */ + HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT), + HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT), + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Capture Source", + .count = 1, + .info = alc_mux_enum_info, + .get = alc_mux_enum_get, + .put = alc_mux_enum_put, + }, + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Channel Mode", + .info = alc_ch_mode_info, + .get = alc_ch_mode_get, + .put = alc_ch_mode_put, + .private_value = ARRAY_SIZE(alc861_uniwill_m31_modes), + }, + { } /* end */ +}; /* * generic initialization of ADC, input mixers and output mixers @@ -6227,6 +6286,67 @@ static struct hda_verb alc861_threestack_init_verbs[] = { {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)}, { } }; + +static struct hda_verb alc861_uniwill_m31_init_verbs[] = { + /* + * Unmute ADC0 and set the default input to mic-in + */ + /* port-A for surround (rear panel) */ + { 0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 }, + /* port-B for mic-in (rear panel) with vref */ + { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, + /* port-C for line-in (rear panel) */ + { 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 }, + /* port-D for Front */ + { 0x0b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 }, + { 0x0b, AC_VERB_SET_CONNECT_SEL, 0x00 }, + /* port-E for HP out (front panel) */ + { 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, // this has to be set to VREF80 + /* route front PCM to HP */ + { 0x0f, AC_VERB_SET_CONNECT_SEL, 0x01 }, + /* port-F for mic-in (front panel) with vref */ + { 0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, + /* port-G for CLFE (rear panel) */ + { 0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 }, + /* port-H for side (rear panel) */ + { 0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 }, + /* CD-in */ + { 0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 }, + /* route front mic to ADC1*/ + {0x08, AC_VERB_SET_CONNECT_SEL, 0x00}, + {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + /* Unmute DAC0~3 & spdif out*/ + {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + + /* Unmute Mixer 14 (mic) 1c (Line in)*/ + {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + + /* Unmute Stereo Mixer 15 */ + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)}, + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c }, //Output 0~12 step + + {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, + {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)}, + {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)}, // hp used DAC 3 (Front) + {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)}, + { } +}; + /* * generic initialization of ADC, input mixers and output mixers */ @@ -6561,6 +6681,9 @@ static struct hda_board_config alc861_cfg_tbl[] = { .config = ALC660_3ST }, { .modelname = "3stack-dig", .config = ALC861_3ST_DIG }, { .modelname = "6stack-dig", .config = ALC861_6ST_DIG }, + { .modelname = "uniwill-m31", .config = ALC861_UNIWILL_M31}, + { .pci_subvendor = 0x1584, .pci_subdevice = 0x9072, + .config = ALC861_UNIWILL_M31 }, { .modelname = "auto", .config = ALC861_AUTO }, {} }; @@ -6615,6 +6738,20 @@ static struct alc_config_preset alc861_presets[] = { .adc_nids = alc861_adc_nids, .input_mux = &alc861_capture_source, }, + [ALC861_UNIWILL_M31] = { + .mixers = { alc861_uniwill_m31_mixer }, + .init_verbs = { alc861_uniwill_m31_init_verbs }, + .num_dacs = ARRAY_SIZE(alc861_dac_nids), + .dac_nids = alc861_dac_nids, + .dig_out_nid = ALC861_DIGOUT_NID, + .num_channel_mode = ARRAY_SIZE(alc861_uniwill_m31_modes), + .channel_mode = alc861_uniwill_m31_modes, + .need_dac_fix = 1, + .num_adc_nids = ARRAY_SIZE(alc861_adc_nids), + .adc_nids = alc861_adc_nids, + .input_mux = &alc861_capture_source, + }, + }; From fe25befde9723ba7d921c100bf00d7643323e5a7 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 15 Aug 2006 14:39:07 +0200 Subject: [PATCH 0930/1063] [ALSA] ice1712 - fix 1600->16000Hz value typo Signed-off-by: Jaroslav Kysela --- sound/pci/ice1712/ice1712.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index bf20858d9f19..9b8325d65d8d 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -1857,7 +1857,7 @@ static int snd_ice1712_pro_internal_clock_put(struct snd_kcontrol *kcontrol, { struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol); static unsigned int xrate[13] = { - 8000, 9600, 11025, 12000, 1600, 22050, 24000, + 8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000 }; unsigned char oval; @@ -1924,7 +1924,7 @@ static int snd_ice1712_pro_internal_clock_default_get(struct snd_kcontrol *kcont { int val; static unsigned int xrate[13] = { - 8000, 9600, 11025, 12000, 1600, 22050, 24000, + 8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000 }; @@ -1941,7 +1941,7 @@ static int snd_ice1712_pro_internal_clock_default_put(struct snd_kcontrol *kcont struct snd_ctl_elem_value *ucontrol) { static unsigned int xrate[13] = { - 8000, 9600, 11025, 12000, 1600, 22050, 24000, + 8000, 9600, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 64000, 88200, 96000 }; unsigned char oval; From 42fe7647911d0bcaf81aac46db73a3b24387df6d Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 16 Aug 2006 12:53:34 +0200 Subject: [PATCH 0931/1063] [ALSA] dbri driver cleanup This is a small clean up of the dbri driver for sparc machines. It contains also a fix to DBRI interrupt queue initialization. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index f3ae6e23610e..652f433a3f0d 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -104,17 +104,15 @@ static char *cmds[] = { #define dprintk(a, x...) if(dbri_debug & a) printk(KERN_DEBUG x) -#define DBRI_CMD(cmd, intr, value) ((cmd << 28) | \ - (1 << 27) | \ - value) #else #define dprintk(a, x...) -#define DBRI_CMD(cmd, intr, value) ((cmd << 28) | \ - (intr << 27) | \ - value) #endif /* DBRI_DEBUG */ +#define DBRI_CMD(cmd, intr, value) ((cmd << 28) | \ + (intr << 27) | \ + value) + /*************************************************************************** CS4215 specific definitions and structures ****************************************************************************/ @@ -690,7 +688,6 @@ static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get) static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd) { volatile s32 *ptr; - u32 reg; for (ptr = &dbri->dma->cmd[0]; ptr < cmd; ptr++) { dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); @@ -709,9 +706,6 @@ static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd) /* Set command pointer and signal it is valid. */ sbus_writel(dbri->dma_dvma, dbri->regs + REG8); - reg = sbus_readl(dbri->regs + REG0); - reg |= D_P; - sbus_writel(reg, dbri->regs + REG0); /*spin_unlock(&dbri->lock); */ } @@ -752,7 +746,7 @@ static void dbri_initialize(struct snd_dbri * dbri) */ for (n = 0; n < DBRI_NO_INTS - 1; n++) { dma_addr = dbri->dma_dvma; - dma_addr += dbri_dma_off(intr, ((n + 1) & DBRI_INT_BLK)); + dma_addr += dbri_dma_off(intr, ((n + 1) * DBRI_INT_BLK)); dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr; } dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0); From 6fb982803522bc86ca61774c6edf317f77165453 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 16 Aug 2006 12:54:29 +0200 Subject: [PATCH 0932/1063] [ALSA] sparc dbri removal of DBRI_NO_INTS This patch removes define DBR_NO_INTS and all code related to handling more than one dbri irq statuses block. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 652f433a3f0d..4651ff513513 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -238,12 +238,6 @@ static struct { #define REG9 0x24UL /* Interrupt Queue Pointer */ #define DBRI_NO_CMDS 64 -#define DBRI_NO_INTS 1 /* Note: the value of this define was - * originally 2. The ringbuffer to store - * interrupts in dma is currently broken. - * This is a temporary fix until the ringbuffer - * is fixed. - */ #define DBRI_INT_BLK 64 #define DBRI_NO_DESCS 64 #define DBRI_NO_PIPES 32 @@ -268,7 +262,7 @@ struct dbri_mem { */ struct dbri_dma { volatile s32 cmd[DBRI_NO_CMDS]; /* Place for commands */ - volatile s32 intr[DBRI_NO_INTS * DBRI_INT_BLK]; /* Interrupt field */ + volatile s32 intr[DBRI_INT_BLK]; /* Interrupt field */ struct dbri_mem desc[DBRI_NO_DESCS]; /* Xmit/receive descriptors */ }; @@ -741,18 +735,6 @@ static void dbri_initialize(struct snd_dbri * dbri) dprintk(D_GEN, "init: cmd: %p, int: %p\n", &dbri->dma->cmd[0], &dbri->dma->intr[0]); - /* - * Initialize the interrupt ringbuffer. - */ - for (n = 0; n < DBRI_NO_INTS - 1; n++) { - dma_addr = dbri->dma_dvma; - dma_addr += dbri_dma_off(intr, ((n + 1) * DBRI_INT_BLK)); - dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr; - } - dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0); - dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr; - dbri->dbri_irqp = 1; - /* Initialize pipes */ for (n = 0; n < DBRI_NO_PIPES; n++) dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1; @@ -765,9 +747,14 @@ static void dbri_initialize(struct snd_dbri * dbri) sbus_writel(tmp, dbri->regs + REG0); /* - * Set up the interrupt queue + * Initialize the interrupt ringbuffer. */ dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0); + dbri->dma->intr[0] = dma_addr; + dbri->dbri_irqp = 1; + /* + * Set up the interrupt queue + */ *(cmd++) = DBRI_CMD(D_IIQ, 0, 0); *(cmd++) = dma_addr; @@ -1951,10 +1938,8 @@ static void dbri_process_interrupt_buffer(struct snd_dbri * dbri) while ((x = dbri->dma->intr[dbri->dbri_irqp]) != 0) { dbri->dma->intr[dbri->dbri_irqp] = 0; dbri->dbri_irqp++; - if (dbri->dbri_irqp == (DBRI_NO_INTS * DBRI_INT_BLK)) + if (dbri->dbri_irqp == DBRI_INT_BLK) dbri->dbri_irqp = 1; - else if ((dbri->dbri_irqp & (DBRI_INT_BLK - 1)) == 0) - dbri->dbri_irqp++; dbri_process_one_interrupt(dbri, x); } From 5e4968e24ced93b7b130e7e1fc947a79f82776bf Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 16 Aug 2006 12:56:16 +0200 Subject: [PATCH 0933/1063] [ALSA] sound/pci/fm801: Use ARRAY_SIZE macro Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0]) Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/fm801.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index 88a3e9f3224a..f3f2b2c99723 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -321,10 +321,8 @@ static unsigned int channels[] = { 2, 4, 6 }; -#define CHANNELS sizeof(channels) / sizeof(channels[0]) - static struct snd_pcm_hw_constraint_list hw_constraints_channels = { - .count = CHANNELS, + .count = ARRAY_SIZE(channels), .list = channels, .mask = 0, }; From 80b556f26b3830ad5bd6ff9f701675ac8afcb263 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 16 Aug 2006 12:56:53 +0200 Subject: [PATCH 0934/1063] [ALSA] emu10k1x: simplify around pci_register_driver() Report errors to modprobe as side effect. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/emu10k1/emu10k1x.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index bda8bdf59935..da1610a571b8 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1626,12 +1626,7 @@ static struct pci_driver driver = { // initialization of the module static int __init alsa_card_emu10k1x_init(void) { - int err; - - if ((err = pci_register_driver(&driver)) > 0) - return err; - - return 0; + return pci_register_driver(&driver); } // clean up the module From d244bf897b2e7933112067ec8d8dc1d47b86145f Mon Sep 17 00:00:00 2001 From: Magnus Sandin Date: Wed, 16 Aug 2006 15:25:23 +0200 Subject: [PATCH 0935/1063] [ALSA] Fix for LG K1 Express Laptop Attached is the patch for the LG K1 Express (K1-2333V) laptop that enables sound output. Signed-off-by: Magnus Sandin Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_patch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 5267b006c5c8..37c6be481c4a 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -2208,7 +2208,8 @@ int patch_alc655(struct snd_ac97 * ac97) val &= ~(1 << 1); /* Pin 47 is spdif input pin */ else { /* ALC655 */ if (ac97->subsystem_vendor == 0x1462 && - ac97->subsystem_device == 0x0131) /* MSI S270 laptop */ + (ac97->subsystem_device == 0x0131 || /* MSI S270 laptop */ + ac97->subsystem_device == 0x0161)) /* LG K1 Express */ val &= ~(1 << 1); /* Pin 47 is EAPD (for internal speaker) */ else val |= (1 << 1); /* Pin 47 is spdif input pin */ From 99ccc560b73ff7381153dc1391d18391373931d3 Mon Sep 17 00:00:00 2001 From: Guillaume Munch Date: Wed, 16 Aug 2006 19:35:12 +0200 Subject: [PATCH 0936/1063] [ALSA] Add support for Sony Vaio AR 11B This patch adds automatic detection for Sigmatel ID 7664, the sound chip in Sony Vaio AR 11B (european name). - patch_stac7661 becomes patch_stac766x - .id = 0x83847664 is added Signed-off-by: Guillaume Munch Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 6 ++-- sound/pci/hda/patch_sigmatel.c | 29 ++++++++++--------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 74be228596ad..d7e95f144569 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -856,10 +856,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. 3stack-dig ditto with SPDIF laptop 3-jack with hp-jack automute laptop-dig ditto with SPDIF - auto auto-confgi reading BIOS (default) + auto auto-config reading BIOS (default) - STAC7661(?) - vaio Setup for VAIO FE550G/SZ110 + STAC7664/7661(?) + vaio Setup for VAIO FE550G/SZ110/AR11B If the default configuration doesn't work and one of the above matches with your device, report it together with the PCI diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index d572f030c3e9..7eaf755b014b 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1449,10 +1449,10 @@ static int patch_stac9205(struct hda_codec *codec) } /* - * STAC 7661(?) hack + * STAC 7661(?) and 7664 hack */ -/* static config for Sony VAIO FE550G */ +/* static config for Sony VAIO FE550G and Sony VAIO AR */ static hda_nid_t vaio_dacs[] = { 0x2 }; #define VAIO_HP_DAC 0x5 static hda_nid_t vaio_adcs[] = { 0x8 /*,0x6*/ }; @@ -1552,7 +1552,7 @@ static struct snd_kcontrol_new vaio_mixer[] = { {} }; -static struct hda_codec_ops stac7661_patch_ops = { +static struct hda_codec_ops stac766x_patch_ops = { .build_controls = stac92xx_build_controls, .build_pcms = stac92xx_build_pcms, .init = stac92xx_init, @@ -1562,23 +1562,25 @@ static struct hda_codec_ops stac7661_patch_ops = { #endif }; -enum { STAC7661_VAIO }; +enum { STAC766x_VAIO }; -static struct hda_board_config stac7661_cfg_tbl[] = { - { .modelname = "vaio", .config = STAC7661_VAIO }, +static struct hda_board_config stac766x_cfg_tbl[] = { + { .modelname = "vaio", .config = STAC766x_VAIO }, { .pci_subvendor = 0x104d, .pci_subdevice = 0x81e6, - .config = STAC7661_VAIO }, + .config = STAC766x_VAIO }, { .pci_subvendor = 0x104d, .pci_subdevice = 0x81ef, - .config = STAC7661_VAIO }, + .config = STAC766x_VAIO }, + { .pci_subvendor = 0x104d, .pci_subdevice = 0x81fd, + .config = STAC766x_VAIO }, {} }; -static int patch_stac7661(struct hda_codec *codec) +static int patch_stac766x(struct hda_codec *codec) { struct sigmatel_spec *spec; int board_config; - board_config = snd_hda_check_board_config(codec, stac7661_cfg_tbl); + board_config = snd_hda_check_board_config(codec, stac766x_cfg_tbl); if (board_config < 0) /* unknown config, let generic-parser do its job... */ return snd_hda_parse_generic_codec(codec); @@ -1589,7 +1591,7 @@ static int patch_stac7661(struct hda_codec *codec) codec->spec = spec; switch (board_config) { - case STAC7661_VAIO: + case STAC766x_VAIO: spec->mixer = vaio_mixer; spec->init = vaio_init; spec->multiout.max_channels = 2; @@ -1603,7 +1605,7 @@ static int patch_stac7661(struct hda_codec *codec) break; } - codec->patch_ops = stac7661_patch_ops; + codec->patch_ops = stac766x_patch_ops; return 0; } @@ -1635,7 +1637,7 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x83847627, .name = "STAC9271D", .patch = patch_stac927x }, { .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x }, { .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x }, - { .id = 0x83847661, .name = "STAC7661", .patch = patch_stac7661 }, + { .id = 0x83847661, .name = "STAC7661", .patch = patch_stac766x }, { .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 }, { .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 }, { .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 }, @@ -1644,5 +1646,6 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 }, { .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 }, { .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 }, + { .id = 0x83847664, .name = "STAC7664", .patch = patch_stac766x }, {} /* terminator */ }; From 7cf0a95310f21f3c986288a483801b1d5694dee1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Aug 2006 16:23:07 +0200 Subject: [PATCH 0937/1063] [ALSA] Fix compile errors with older gcc Fixed compile errors with older gcc for initialization of a union. sound/pci/ca0106/ca0106_mixer.c: At top level: sound/pci/ca0106/ca0106_mixer.c:499: unknown field 'p' specified in initializer sound/pci/ca0106/ca0106_mixer.c:499: warning: missing braces around initializer sound/pci/ca0106/ca0106_mixer.c:499: warning: (near initialization for 'snd_ca0106_volume_ctls[0].tlv') Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ca0106/ca0106_mixer.c | 4 ++-- sound/pci/emu10k1/p16v.c | 2 +- sound/pci/hda/hda_local.h | 2 +- sound/pci/hda/patch_analog.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index 6d64438cecc9..9855f528ea78 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -478,7 +478,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol, .info = snd_ca0106_volume_info, \ .get = snd_ca0106_volume_get, \ .put = snd_ca0106_volume_put, \ - .tlv.p = snd_ca0106_db_scale1, \ + .tlv = { .p = snd_ca0106_db_scale1 }, \ .private_value = ((chid) << 8) | (reg) \ } @@ -490,7 +490,7 @@ static int snd_ca0106_i2c_volume_put(struct snd_kcontrol *kcontrol, .info = snd_ca0106_i2c_volume_info, \ .get = snd_ca0106_i2c_volume_get, \ .put = snd_ca0106_i2c_volume_put, \ - .tlv.p = snd_ca0106_db_scale2, \ + .tlv = { .p = snd_ca0106_db_scale2 }, \ .private_value = chid \ } diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c index 1e44714b8623..4e0f95438f47 100644 --- a/sound/pci/emu10k1/p16v.c +++ b/sound/pci/emu10k1/p16v.c @@ -794,7 +794,7 @@ static DECLARE_TLV_DB_SCALE(snd_p16v_db_scale1, -5175, 25, 1); .info = snd_p16v_volume_info, \ .get = snd_p16v_volume_get, \ .put = snd_p16v_volume_put, \ - .tlv.p = snd_p16v_db_scale1, \ + .tlv = { .p = snd_p16v_db_scale1 }, \ .private_value = ((xreg) | ((xhl) << 8)) \ } diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 0f0ae685a9c1..ff24266fe353 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -36,7 +36,7 @@ .info = snd_hda_mixer_amp_volume_info, \ .get = snd_hda_mixer_amp_volume_get, \ .put = snd_hda_mixer_amp_volume_put, \ - .tlv.c = snd_hda_mixer_amp_tlv, \ + .tlv = { .c = snd_hda_mixer_amp_tlv }, \ .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, xindex, direction) } /* stereo volume with index */ #define HDA_CODEC_VOLUME_IDX(xname, xcidx, nid, xindex, direction) \ diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 077f1ce01ee1..043256c67d1f 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -507,7 +507,7 @@ static struct snd_kcontrol_new ad1986a_mixers[] = { .info = ad1986a_pcm_amp_vol_info, .get = ad1986a_pcm_amp_vol_get, .put = ad1986a_pcm_amp_vol_put, - .tlv.c = ad1986a_pcm_amp_tlv, + .tlv = { .c = ad1986a_pcm_amp_tlv }, .private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT) }, { From 5fc3a2b250716b34ca7c0128475bbedf795f1ac2 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 17 Aug 2006 16:58:45 +0200 Subject: [PATCH 0938/1063] [ALSA] sparc dbri: removal of unused struct members It removes unused or rarely used members of defined structures. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 4651ff513513..66b4d45cf8bf 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -34,7 +34,7 @@ * (the second one is a monitor/tee pipe, valid only for serial input). * * The mmcodec is connected via the CHI bus and needs the data & some - * parameters (volume, balance, output selection) timemultiplexed in 8 byte + * parameters (volume, output selection) timemultiplexed in 8 byte * chunks. It also has a control mode, which serves for audio format setting. * * Looking at the CS4215 data sheet it is easy to set up 2 or 4 codecs on @@ -274,9 +274,7 @@ enum in_or_out { PIPEinput, PIPEoutput }; struct dbri_pipe { u32 sdp; /* SDP command word */ - enum in_or_out direction; int nextpipe; /* Next pipe in linked list */ - int prevpipe; int cycle; /* Offset of timeslot (bits) */ int length; /* Length of timeslot (bits) */ int first_desc; /* Index of first descriptor */ @@ -300,13 +298,11 @@ struct dbri_streaminfo { int pipe; /* Data pipe used */ int left_gain; /* mixer elements */ int right_gain; - int balance; }; /* This structure holds the information for both chips (DBRI & CS4215) */ struct snd_dbri { struct snd_card *card; /* ALSA card */ - struct snd_pcm *pcm; int regs_size, irq; /* Needed for unload */ struct sbus_dev *sdev; /* SBUS device info */ @@ -316,7 +312,6 @@ struct snd_dbri { u32 dma_dvma; /* DBRI visible DMA address */ void __iomem *regs; /* dbri HW regs */ - int dbri_version; /* 'e' and up is OK */ int dbri_irqp; /* intr queue pointer */ int wait_send; /* sequence of command buffers send */ int wait_ackd; /* sequence of command buffers acknowledged */ @@ -337,8 +332,6 @@ struct snd_dbri { #define DBRI_MAX_VOLUME 63 /* Output volume */ #define DBRI_MAX_GAIN 15 /* Input gain */ -#define DBRI_RIGHT_BALANCE 255 -#define DBRI_MID_BALANCE (DBRI_RIGHT_BALANCE >> 1) /* DBRI Reg0 - Status Control Register - defines. (Page 17) */ #define D_P (1<<15) /* Program command & queue pointer valid */ @@ -841,10 +834,6 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp) dbri->pipes[pipe].sdp = sdp; dbri->pipes[pipe].desc = -1; dbri->pipes[pipe].first_desc = -1; - if (sdp & D_SDP_TO_SER) - dbri->pipes[pipe].direction = PIPEoutput; - else - dbri->pipes[pipe].direction = PIPEinput; reset_pipe(dbri, pipe); } @@ -1363,14 +1352,6 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted) int left_gain = info->left_gain % 64; int right_gain = info->right_gain % 64; - if (info->balance < DBRI_MID_BALANCE) { - right_gain *= info->balance; - right_gain /= DBRI_MID_BALANCE; - } else { - left_gain *= DBRI_RIGHT_BALANCE - info->balance; - left_gain /= DBRI_MID_BALANCE; - } - dbri->mm.data[0] &= ~0x3f; /* Reset the volume bits */ dbri->mm.data[1] &= ~0x3f; dbri->mm.data[0] |= (DBRI_MAX_VOLUME - left_gain); @@ -2233,7 +2214,6 @@ static int __devinit snd_dbri_pcm(struct snd_dbri * dbri) pcm->private_data = dbri; pcm->info_flags = 0; strcpy(pcm->name, dbri->card->shortname); - dbri->pcm = pcm; if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_CONTINUOUS, @@ -2452,7 +2432,6 @@ static int __init snd_dbri_mixer(struct snd_dbri * dbri) for (idx = DBRI_REC; idx < DBRI_NO_STREAMS; idx++) { dbri->stream_info[idx].left_gain = 0; dbri->stream_info[idx].right_gain = 0; - dbri->stream_info[idx].balance = DBRI_MID_BALANCE; } return 0; @@ -2484,12 +2463,11 @@ static void dbri_debug_read(struct snd_info_entry * entry, struct dbri_pipe *pptr = &dbri->pipes[pipe]; snd_iprintf(buffer, "Pipe %d: %s SDP=0x%x desc=%d, " - "len=%d @ %d prev: %d next %d\n", + "len=%d @ %d next %d\n", pipe, - (pptr->direction == - PIPEinput ? "input" : "output"), pptr->sdp, - pptr->desc, pptr->length, pptr->cycle, - pptr->prevpipe, pptr->nextpipe); + ((pptr->sdp & D_SDP_TO_SER) ? "output" : "input"), + pptr->sdp, pptr->desc, + pptr->length, pptr->cycle, pptr->nextpipe); } } } @@ -2528,7 +2506,6 @@ static int __init snd_dbri_create(struct snd_card *card, dbri->card = card; dbri->sdev = sdev; dbri->irq = irq->pri; - dbri->dbri_version = sdev->prom_name[9]; dbri->dma = sbus_alloc_consistent(sdev, sizeof(struct dbri_dma), &dbri->dma_dvma); @@ -2648,7 +2625,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev) printk(KERN_INFO "audio%d at %p (irq %d) is DBRI(%c)+CS4215(%d)\n", dev, dbri->regs, - dbri->irq, dbri->dbri_version, dbri->mm.version); + dbri->irq, sdev->prom_name[9], dbri->mm.version); dev++; return 0; From 16727d94adf9a1376775fd34d982778c7f3506df Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 17 Aug 2006 16:59:28 +0200 Subject: [PATCH 0939/1063] [ALSA] sparc dbri: removal of redudant volatile keywords It removes redudant volatile keywords. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 66b4d45cf8bf..405c603717b6 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -252,8 +252,8 @@ static struct { /* One transmit/receive descriptor */ struct dbri_mem { volatile __u32 word1; - volatile __u32 ba; /* Transmit/Receive Buffer Address */ - volatile __u32 nda; /* Next Descriptor Address */ + __u32 ba; /* Transmit/Receive Buffer Address */ + __u32 nda; /* Next Descriptor Address */ volatile __u32 word4; }; @@ -308,7 +308,7 @@ struct snd_dbri { struct sbus_dev *sdev; /* SBUS device info */ spinlock_t lock; - volatile struct dbri_dma *dma; /* Pointer to our DMA block */ + struct dbri_dma *dma; /* Pointer to our DMA block */ u32 dma_dvma; /* DBRI visible DMA address */ void __iomem *regs; /* dbri HW regs */ From e05d696424f21b59eccff35d04938f0d6588cd94 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Aug 2006 17:12:19 +0200 Subject: [PATCH 0940/1063] [ALSA] Fix some typos in snd-dummy driver Fixed some typos in snd-dummy driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/drivers/dummy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index ffeafaf2ecca..73b16134a434 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -285,7 +285,7 @@ static struct snd_pcm_hardware snd_card_dummy_playback = .channels_max = USE_CHANNELS_MAX, .buffer_bytes_max = MAX_BUFFER_SIZE, .period_bytes_min = 64, - .period_bytes_max = MAX_BUFFER_SIZE, + .period_bytes_max = MAX_PERIOD_SIZE, .periods_min = USE_PERIODS_MIN, .periods_max = USE_PERIODS_MAX, .fifo_size = 0, @@ -547,13 +547,13 @@ static struct snd_kcontrol_new snd_dummy_controls[] = { DUMMY_VOLUME("Master Volume", 0, MIXER_ADDR_MASTER), DUMMY_CAPSRC("Master Capture Switch", 0, MIXER_ADDR_MASTER), DUMMY_VOLUME("Synth Volume", 0, MIXER_ADDR_SYNTH), -DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_MASTER), +DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_SYNTH), DUMMY_VOLUME("Line Volume", 0, MIXER_ADDR_LINE), -DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_MASTER), +DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_LINE), DUMMY_VOLUME("Mic Volume", 0, MIXER_ADDR_MIC), -DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MASTER), +DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MIC), DUMMY_VOLUME("CD Volume", 0, MIXER_ADDR_CD), -DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_MASTER) +DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_CD) }; static int __init snd_card_dummy_new_mixer(struct snd_dummy *dummy) From c256652466127872f1b2e510431dc25524ba40ba Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 17 Aug 2006 18:21:36 +0200 Subject: [PATCH 0941/1063] [ALSA] Add missing TLV callbacks for HD-audio codecs Added missing TLV callbacks for HD-audio codec supports. Also cleaned up the tlv callback for ad1986a (no mutex is needed there). Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_analog.c | 16 ++-------------- sound/pci/hda/patch_realtek.c | 1 + sound/pci/hda/patch_sigmatel.c | 1 + 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 043256c67d1f..71abc2aa61a6 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -452,19 +452,6 @@ static int ad1986a_pcm_amp_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl return change; } -static int ad1986a_pcm_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, - unsigned int size, unsigned int __user *_tlv) -{ - struct hda_codec *codec = snd_kcontrol_chip(kcontrol); - struct ad198x_spec *ad = codec->spec; - - mutex_lock(&ad->amp_mutex); - snd_hda_mixer_amp_tlv(kcontrol, op_flag, size, _tlv); - mutex_unlock(&ad->amp_mutex); - return 0; -} - - #define ad1986a_pcm_amp_sw_info snd_hda_mixer_amp_switch_info static int ad1986a_pcm_amp_sw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -507,7 +494,7 @@ static struct snd_kcontrol_new ad1986a_mixers[] = { .info = ad1986a_pcm_amp_vol_info, .get = ad1986a_pcm_amp_vol_get, .put = ad1986a_pcm_amp_vol_put, - .tlv = { .c = ad1986a_pcm_amp_tlv }, + .tlv = { .c = snd_hda_mixer_amp_tlv }, .private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT) }, { @@ -654,6 +641,7 @@ static struct snd_kcontrol_new ad1986a_laptop_eapd_mixers[] = { .info = snd_hda_mixer_amp_volume_info, .get = snd_hda_mixer_amp_volume_get, .put = ad1986a_laptop_master_vol_put, + .tlv = { .c = snd_hda_mixer_amp_tlv }, .private_value = HDA_COMPOSE_AMP_VAL(0x1a, 3, 0, HDA_OUTPUT), }, { diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f857e963ff45..79d361260b27 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5540,6 +5540,7 @@ static struct snd_kcontrol_new alc262_fujitsu_mixer[] = { .info = snd_hda_mixer_amp_volume_info, .get = snd_hda_mixer_amp_volume_get, .put = alc262_fujitsu_master_vol_put, + .tlv = { .c = snd_hda_mixer_amp_tlv }, .private_value = HDA_COMPOSE_AMP_VAL(0x0c, 3, 0, HDA_OUTPUT), }, { diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 7eaf755b014b..887b52e96ec4 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1528,6 +1528,7 @@ static struct snd_kcontrol_new vaio_mixer[] = { .info = snd_hda_mixer_amp_volume_info, .get = snd_hda_mixer_amp_volume_get, .put = vaio_master_vol_put, + .tlv = { .c = snd_hda_mixer_amp_tlv }, .private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT), }, { From adf75dcab1deb9625538f74906508c1f6136fd98 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 18 Aug 2006 09:03:45 +0200 Subject: [PATCH 0942/1063] [ALSA] riptide: fix compile errors with older gcc Change the syntax of a union initialization that is not understood by gcc 2.x. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/pci/riptide/riptide.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index f435fcd6dca9..fe210c853442 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -673,9 +673,13 @@ static struct lbuspath lbus_rec_path = { #define FIRMWARE_VERSIONS 1 static union firmware_version firmware_versions[] = { { - .firmware.ASIC = 3,.firmware.CODEC = 2, - .firmware.AUXDSP = 3,.firmware.PROG = 773, - }, + .firmware = { + .ASIC = 3, + .CODEC = 2, + .AUXDSP = 3, + .PROG = 773, + }, + }, }; static u32 atoh(unsigned char *in, unsigned int len) From 2aaeee8bd1cf51b6ed7c751a8472cb77f3ddc642 Mon Sep 17 00:00:00 2001 From: Tobin Davis Date: Mon, 21 Aug 2006 19:01:12 +0200 Subject: [PATCH 0943/1063] [ALSA] hda-codec - add missing device ids This patch adds missing device ids for Intel 915 and D102GGC motherboards. Signed-off-by: Tobin Davis Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 79d361260b27..53aa57f5a1a1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2143,7 +2143,10 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20f, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe210, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe211, .config = ALC880_3ST }, + { .pci_subvendor = 0x8086, .pci_subdevice = 0xe212, .config = ALC880_3ST }, + { .pci_subvendor = 0x8086, .pci_subdevice = 0xe213, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe214, .config = ALC880_3ST }, + { .pci_subvendor = 0x8086, .pci_subdevice = 0xe234, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe302, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe303, .config = ALC880_3ST }, { .pci_subvendor = 0x8086, .pci_subdevice = 0xe304, .config = ALC880_3ST }, @@ -5058,6 +5061,8 @@ static struct hda_board_config alc883_cfg_tbl[] = { { .modelname = "3stack-6ch", .config = ALC883_3ST_6ch }, { .pci_subvendor = 0x108e, .pci_subdevice = 0x534d, .config = ALC883_3ST_6ch }, + { .pci_subvendor = 0x8086, .pci_subdevice = 0xd601, + .config = ALC883_3ST_6ch }, /* D102GGC */ { .modelname = "6stack-dig", .config = ALC883_6ST_DIG }, { .pci_subvendor = 0x1462, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* MSI */ From 68a6abd97f8b9aa072e36b1901531e7bb69b6efc Mon Sep 17 00:00:00 2001 From: Tobin Davis Date: Mon, 21 Aug 2006 19:02:10 +0200 Subject: [PATCH 0944/1063] [ALSA] hda-codec - Fix headphone output for some Intel 945 systems This patch enables headphone output at initialization for Intel 945 based systems that don't have proper detection circuitry. Signed-off-by: Tobin Davis Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 887b52e96ec4..d709389c4f61 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1143,6 +1143,8 @@ static int stac92xx_init(struct hda_codec *codec) STAC_UNSOL_ENABLE); /* fake event to set up pins */ codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26); + /* enable the headphones by default. If/when unsol_event detection works, this will be ignored */ + stac92xx_auto_init_hp_out(codec); } else { stac92xx_auto_init_multi_out(codec); stac92xx_auto_init_hp_out(codec); From 79cf0d376fbf1cdf8e9c7c70c3a7c7434a716879 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2006 16:35:19 +0200 Subject: [PATCH 0945/1063] [ALSA] Fix missing selection of CONFIG_VIDEO_DEV from SND_FM801_TEA575X Fixed the missing selection of CONFIG_VIDEO_DEV from SND_FM801_TEA575X. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index e49c0fe21b0d..dffb6be76800 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -475,6 +475,7 @@ config SND_FM801_TEA575X depends on SND_FM801_TEA575X_BOOL default SND_FM801 select VIDEO_V4L1 + select VIDEO_DEV config SND_HDA_INTEL tristate "Intel HD Audio" From 948a4db217235ba51c41d8e7c2ffcf9432e57274 Mon Sep 17 00:00:00 2001 From: Tobin Davis Date: Tue, 22 Aug 2006 19:43:46 +0200 Subject: [PATCH 0946/1063] [ALSA] hda-codec - add missing device ids for Intel 945 boards This patch adds missing device ids for Intel 945 motherboards. Signed-off-by: Tobin Davis Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 66 ++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index d709389c4f61..7b29288690cb 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -396,19 +396,53 @@ static struct hda_board_config stac922x_cfg_tbl[] = { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2668, /* DFI LanParty */ .config = STAC_REF }, /* SigmaTel reference board */ + /* Intel 945G based systems */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x0101, .config = STAC_D945GTP3 }, /* Intel D945GTP - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x0202, - .config = STAC_D945GTP3 }, /* Intel D945GNT - 3 Stack, 9221 A1 */ + .config = STAC_D945GTP3 }, /* Intel D945GNT - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, - .pci_subdevice = 0x0b0b, - .config = STAC_D945GTP3 }, /* Intel D945PSN - 3 Stack, 9221 A1 */ + .pci_subdevice = 0x0606, + .config = STAC_D945GTP3 }, /* Intel D945GTP - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0601, + .config = STAC_D945GTP3 }, /* Intel D945GTP - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0111, + .config = STAC_D945GTP3 }, /* Intel D945GZP - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x1115, + .config = STAC_D945GTP3 }, /* Intel D945GPM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x1116, + .config = STAC_D945GTP3 }, /* Intel D945GBO - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x1117, + .config = STAC_D945GTP3 }, /* Intel D945GPM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x1118, + .config = STAC_D945GTP3 }, /* Intel D945GPM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x1119, + .config = STAC_D945GTP3 }, /* Intel D945GPM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x8826, + .config = STAC_D945GTP3 }, /* Intel D945GPM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x5049, + .config = STAC_D945GTP3 }, /* Intel D945GCZ - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x5055, + .config = STAC_D945GTP3 }, /* Intel D945GCZ - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x5048, + .config = STAC_D945GTP3 }, /* Intel D945GPB - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0110, + .config = STAC_D945GTP3 }, /* Intel D945GLR - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, - .pci_subdevice = 0x0707, - .config = STAC_D945GTP5 }, /* Intel D945PSV - 5 Stack */ - { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x0404, .config = STAC_D945GTP5 }, /* Intel D945GTP - 5 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, @@ -420,6 +454,26 @@ static struct hda_board_config stac922x_cfg_tbl[] = { { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x0417, .config = STAC_D945GTP5 }, /* Intel D975XBK - 5 Stack */ + /* Intel 945P based systems */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0b0b, + .config = STAC_D945GTP3 }, /* Intel D945PSN - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0112, + .config = STAC_D945GTP3 }, /* Intel D945PLN - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0d0d, + .config = STAC_D945GTP3 }, /* Intel D945PLM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0909, + .config = STAC_D945GTP3 }, /* Intel D945PAW - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0505, + .config = STAC_D945GTP3 }, /* Intel D945PLM - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x0707, + .config = STAC_D945GTP5 }, /* Intel D945PSV - 5 Stack */ + /* other systems */ { .pci_subvendor = 0x8384, .pci_subdevice = 0x7680, .config = STAC_MACMINI }, /* Apple Mac Mini (early 2006) */ From 81d3dbde76eedcd3ede8a73eb72790d67fa254a9 Mon Sep 17 00:00:00 2001 From: Tobin Davis Date: Tue, 22 Aug 2006 19:44:45 +0200 Subject: [PATCH 0947/1063] [ALSA] hda-codec - Add support for new Intel boards with Stac9227 codec This patch adds full 5.1 audio support for Intel boards with the SigmaTel 9227 codec chip (946, 963, 965 series). Signed-off-by: Tobin Davis Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 126 ++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 34 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 7b29288690cb..73ca566e9eb7 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -377,18 +377,11 @@ static unsigned int d945gtp5_pin_configs[10] = { 0x02a19320, 0x40000100, }; -static unsigned int d965_2112_pin_configs[10] = { - 0x0221401f, 0x40000100, 0x40000100, 0x01014011, - 0x01a19021, 0x01813024, 0x01452130, 0x40000100, - 0x02a19320, 0x40000100, -}; - static unsigned int *stac922x_brd_tbl[STAC_922X_MODELS] = { [STAC_REF] = ref922x_pin_configs, [STAC_D945GTP3] = d945gtp3_pin_configs, [STAC_D945GTP5] = d945gtp5_pin_configs, [STAC_MACMINI] = d945gtp5_pin_configs, - [STAC_D965_2112] = d965_2112_pin_configs, }; static struct hda_board_config stac922x_cfg_tbl[] = { @@ -493,8 +486,16 @@ static unsigned int ref927x_pin_configs[14] = { 0x01c41030, 0x40000100, }; +static unsigned int d965_2112_pin_configs[14] = { + 0x0221401f, 0x02a19120, 0x40000100, 0x01014011, + 0x01a19021, 0x01813024, 0x40000100, 0x40000100, + 0x40000100, 0x40000100, 0x40000100, 0x40000100, + 0x40000100, 0x40000100 +}; + static unsigned int *stac927x_brd_tbl[] = { - ref927x_pin_configs, + [STAC_REF] = ref927x_pin_configs, + [STAC_D965_2112] = d965_2112_pin_configs, }; static struct hda_board_config stac927x_cfg_tbl[] = { @@ -502,6 +503,66 @@ static struct hda_board_config stac927x_cfg_tbl[] = { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2668, /* DFI LanParty */ .config = STAC_REF }, /* SigmaTel reference board */ + /* SigmaTel 9227 reference board */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x284b, + .config = STAC_D965_284B }, + /* Intel 946 based systems */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x3d01, + .config = STAC_D965_2112 }, /* D946 configuration */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0xa301, + .config = STAC_D965_2112 }, /* Intel D946GZT - 3 stack */ + /* 965 based systems */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2116, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2115, + .config = STAC_D965_2112 }, /* Intel DQ965WC - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2114, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2113, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2112, + .config = STAC_D965_2112 }, /* Intel DG965MS - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2111, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2110, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2009, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2008, + .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2007, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2006, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2005, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2004, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2003, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2002, + .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2001, + .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stackg */ {} /* terminator */ }; @@ -1391,25 +1452,6 @@ static int patch_stac922x(struct hda_codec *codec) spec->multiout.dac_nids = spec->dac_nids; - switch (spec->board_config) { - case STAC_D965_2112: - spec->adc_nids = stac9227_adc_nids; - spec->mux_nids = stac9227_mux_nids; -#if 0 - spec->multiout.dac_nids = d965_2112_dac_nids; - spec->multiout.num_dacs = ARRAY_SIZE(d965_2112_dac_nids); -#endif - spec->init = d965_2112_core_init; - spec->mixer = stac9227_mixer; - break; - case STAC_D965_284B: - spec->adc_nids = stac9227_adc_nids; - spec->mux_nids = stac9227_mux_nids; - spec->init = stac9227_core_init; - spec->mixer = stac9227_mixer; - break; - } - err = stac92xx_parse_auto_config(codec, 0x08, 0x09); if (err < 0) { stac92xx_free(codec); @@ -1437,19 +1479,35 @@ static int patch_stac927x(struct hda_codec *codec) spec->board_config = snd_hda_check_board_config(codec, stac927x_cfg_tbl); if (spec->board_config < 0) snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC927x, using BIOS defaults\n"); - else { + else if (stac927x_brd_tbl[spec->board_config] != NULL) { spec->num_pins = 14; spec->pin_nids = stac927x_pin_nids; spec->pin_configs = stac927x_brd_tbl[spec->board_config]; stac92xx_set_config_regs(codec); } - spec->adc_nids = stac927x_adc_nids; - spec->mux_nids = stac927x_mux_nids; - spec->num_muxes = 3; - - spec->init = stac927x_core_init; - spec->mixer = stac927x_mixer; + switch (spec->board_config) { + case STAC_D965_2112: + spec->adc_nids = stac927x_adc_nids; + spec->mux_nids = stac927x_mux_nids; + spec->num_muxes = 3; + spec->init = d965_2112_core_init; + spec->mixer = stac9227_mixer; + break; + case STAC_D965_284B: + spec->adc_nids = stac9227_adc_nids; + spec->mux_nids = stac9227_mux_nids; + spec->num_muxes = 2; + spec->init = stac9227_core_init; + spec->mixer = stac9227_mixer; + break; + default: + spec->adc_nids = stac927x_adc_nids; + spec->mux_nids = stac927x_mux_nids; + spec->num_muxes = 3; + spec->init = stac927x_core_init; + spec->mixer = stac927x_mixer; + } spec->multiout.dac_nids = spec->dac_nids; From e96224ae974844d3f4e84f927ca4b17f1a2079a3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Aug 2006 17:57:44 +0200 Subject: [PATCH 0948/1063] [ALSA] hda-intel - Switch to polling mode for CORB/RIRB communication Automatically switch to polling mode for CORB/RIRB communication if the irq-driven mode seems not working well. If the polling mode still doesn't work, switch to single_cmd mode as fallback. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 79d63c99f092..ce75e07aaa2a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -332,6 +332,7 @@ struct azx { int position_fix; unsigned int initialized: 1; unsigned int single_cmd: 1; + unsigned int polling_mode: 1; }; /* driver types */ @@ -518,8 +519,23 @@ static unsigned int azx_rirb_get_response(struct hda_codec *codec) struct azx *chip = codec->bus->private_data; int timeout = 50; - while (chip->rirb.cmds) { + for (;;) { + if (chip->polling_mode) { + spin_lock_irq(&chip->reg_lock); + azx_update_rirb(chip); + spin_unlock_irq(&chip->reg_lock); + } + if (! chip->rirb.cmds) + break; if (! --timeout) { + if (! chip->polling_mode) { + snd_printk(KERN_WARNING "hda_intel: " + "azx_get_response timeout, " + "switching to polling mode...\n"); + chip->polling_mode = 1; + timeout = 50; + continue; + } snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, " "switching to single_cmd mode...\n"); From c27354460b1e0cbcd9dfc9232a76bd56c46dce89 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 21 Aug 2006 19:27:35 +0200 Subject: [PATCH 0949/1063] [ALSA] sparc dbri: removal of dri_desc struct The structure is in big part redudant. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 405c603717b6..0b8545ad3e9a 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -250,6 +250,7 @@ static struct { #define DBRI_NO_STREAMS 2 /* One transmit/receive descriptor */ +/* When ba != 0 descriptor is used */ struct dbri_mem { volatile __u32 word1; __u32 ba; /* Transmit/Receive Buffer Address */ @@ -282,12 +283,6 @@ struct dbri_pipe { volatile __u32 *recv_fixed_ptr; /* Ptr to receive fixed data */ }; -struct dbri_desc { - int inuse; /* Boolean flag */ - int next; /* Index of next desc, or -1 */ - unsigned int len; -}; - /* Per stream (playback or record) information */ struct dbri_streaminfo { struct snd_pcm_substream *substream; @@ -317,7 +312,7 @@ struct snd_dbri { int wait_ackd; /* sequence of command buffers acknowledged */ struct dbri_pipe pipes[DBRI_NO_PIPES]; /* DBRI's 32 data pipes */ - struct dbri_desc descs[DBRI_NO_DESCS]; + int next_desc[DBRI_NO_DESCS]; /* Index of next desc, or -1 */ int chi_in_pipe; int chi_out_pipe; @@ -803,8 +798,8 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) desc = dbri->pipes[pipe].first_desc; while (desc != -1) { - dbri->descs[desc].inuse = 0; - desc = dbri->descs[desc].next; + dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0; + desc = dbri->next_desc[desc]; } dbri->pipes[pipe].desc = -1; @@ -1093,7 +1088,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period int mylen; for (; desc < DBRI_NO_DESCS; desc++) { - if (!dbri->descs[desc].inuse) + if (!dbri->dma->desc[desc].ba) break; } if (desc == DBRI_NO_DESCS) { @@ -1110,19 +1105,16 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period mylen = period; } - dbri->descs[desc].inuse = 1; - dbri->descs[desc].next = -1; + dbri->next_desc[desc] = -1; dbri->dma->desc[desc].ba = dvma_buffer; dbri->dma->desc[desc].nda = 0; if (streamno == DBRI_PLAY) { - dbri->descs[desc].len = mylen; dbri->dma->desc[desc].word1 = DBRI_TD_CNT(mylen); dbri->dma->desc[desc].word4 = 0; if (first_desc != -1) dbri->dma->desc[desc].word1 |= DBRI_TD_M; } else { - dbri->descs[desc].len = 0; dbri->dma->desc[desc].word1 = 0; dbri->dma->desc[desc].word4 = DBRI_RD_B | DBRI_RD_BCNT(mylen); @@ -1131,7 +1123,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period if (first_desc == -1) { first_desc = desc; } else { - dbri->descs[last_desc].next = desc; + dbri->next_desc[last_desc] = desc; dbri->dma->desc[last_desc].nda = dbri->dma_dvma + dbri_dma_off(desc, desc); } @@ -1154,7 +1146,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period dbri->pipes[info->pipe].first_desc = first_desc; dbri->pipes[info->pipe].desc = first_desc; - for (desc = first_desc; desc != -1; desc = dbri->descs[desc].next) { + for (desc = first_desc; desc != -1; desc = dbri->next_desc[desc]) { dprintk(D_DESC, "DESC %d: %08x %08x %08x %08x\n", desc, dbri->dma->desc[desc].word1, @@ -1747,6 +1739,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) struct dbri_streaminfo *info; int td; int status; + int len; info = &dbri->stream_info[DBRI_PLAY]; @@ -1765,11 +1758,12 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) dprintk(D_INT, "TD %d, status 0x%02x\n", td, status); dbri->dma->desc[td].word4 = 0; /* Reset it for next time. */ - info->offset += dbri->descs[td].len; - info->left -= dbri->descs[td].len; + len = DBRI_RD_CNT(dbri->dma->desc[td].word1); + info->offset += len; + info->left -= len; /* On the last TD, transmit them all again. */ - if (dbri->descs[td].next == -1) { + if (dbri->next_desc[td] == -1) { if (info->left > 0) { printk(KERN_WARNING "%d bytes left after last transfer.\n", @@ -1779,7 +1773,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) tasklet_schedule(&xmit_descs_task); } - td = dbri->descs[td].next; + td = dbri->next_desc[td]; dbri->pipes[pipe].desc = td; } @@ -1803,8 +1797,8 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) return; } - dbri->descs[rd].inuse = 0; - dbri->pipes[pipe].desc = dbri->descs[rd].next; + dbri->dma->desc[rd].ba = 0; + dbri->pipes[pipe].desc = dbri->next_desc[rd]; status = dbri->dma->desc[rd].word1; dbri->dma->desc[rd].word1 = 0; /* Reset it for next time. */ @@ -1818,7 +1812,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status)); /* On the last TD, transmit them all again. */ - if (dbri->descs[rd].next == -1) { + if (dbri->next_desc[rd] == -1) { if (info->left > info->size) { printk(KERN_WARNING "%d bytes recorded in %d size buffer.\n", From 470f1f1a1c2597fab98339ab0966dbf602d604f0 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 21 Aug 2006 19:28:16 +0200 Subject: [PATCH 0950/1063] [ALSA] sparc dbri: more driver cleanup A general clean up and redudant code removal. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 0b8545ad3e9a..6fc37c9cb4fc 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -241,9 +241,7 @@ static struct { #define DBRI_INT_BLK 64 #define DBRI_NO_DESCS 64 #define DBRI_NO_PIPES 32 - -#define DBRI_MM_ONB 1 -#define DBRI_MM_SB 2 +#define DBRI_MAX_PIPE (DBRI_NO_PIPES - 1) #define DBRI_REC 0 #define DBRI_PLAY 1 @@ -650,10 +648,6 @@ static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get) /* Delay if previous commands are still being processed */ while ((--maxloops) > 0 && (dbri->wait_send != dbri->wait_ackd)) { msleep_interruptible(1); - /* If dbri_cmdlock() got called from inside the - * interrupt handler, this will do the processing. - */ - dbri_process_interrupt_buffer(dbri); } if (maxloops == 0) { printk(KERN_ERR "DBRI: Chip never completed command buffer %d\n", @@ -780,7 +774,7 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) int desc; volatile int *cmd; - if (pipe < 0 || pipe > 31) { + if (pipe < 0 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: reset_pipe called with illegal pipe number\n"); return; } @@ -806,10 +800,9 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) dbri->pipes[pipe].first_desc = -1; } -/* FIXME: direction as an argument? */ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp) { - if (pipe < 0 || pipe > 31) { + if (pipe < 0 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: setup_pipe called with illegal pipe number\n"); return; } @@ -843,7 +836,7 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe, int prevpipe; int nextpipe; - if (pipe < 0 || pipe > 31 || basepipe < 0 || basepipe > 31) { + if (pipe < 0 || pipe > DBRI_MAX_PIPE || basepipe < 0 || basepipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: link_time_slot called with illegal pipe number\n"); return; @@ -931,7 +924,8 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, volatile s32 *cmd; int val; - if (pipe < 0 || pipe > 31 || prevpipe < 0 || prevpipe > 31) { + if (pipe < 0 || pipe > DBRI_MAX_PIPE + || prevpipe < 0 || prevpipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: unlink_time_slot called with illegal pipe number\n"); return; @@ -972,7 +966,7 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) { volatile s32 *cmd; - if (pipe < 16 || pipe > 31) { + if (pipe < 16 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n"); return; } @@ -1007,7 +1001,7 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr) { - if (pipe < 16 || pipe > 31) { + if (pipe < 16 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: recv_fixed called with illegal pipe number\n"); return; } @@ -1182,20 +1176,14 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla /* Set CHI Anchor: Pipe 16 */ - val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(16) | D_PIPE(16); + val = D_DTS_VO | D_DTS_VI | D_DTS_INS + | D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16); *(cmd++) = DBRI_CMD(D_DTS, 0, val); *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); - *(cmd++) = 0; - - val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(16) | D_PIPE(16); - *(cmd++) = DBRI_CMD(D_DTS, 0, val); - *(cmd++) = 0; *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); dbri->pipes[16].sdp = 1; dbri->pipes[16].nextpipe = 16; - dbri->chi_in_pipe = 16; - dbri->chi_out_pipe = 16; #if 0 chi_initialized++; @@ -1214,11 +1202,10 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla 16, dbri->pipes[pipe].nextpipe); } - dbri->chi_in_pipe = 16; - dbri->chi_out_pipe = 16; - cmd = dbri_cmdlock(dbri, GetLock); } + dbri->chi_in_pipe = 16; + dbri->chi_out_pipe = 16; if (master_or_slave == CHIslave) { /* Setup DBRI for CHI Slave - receive clock, frame sync (FS) @@ -1341,8 +1328,8 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted) } else { /* Start by setting the playback attenuation. */ struct dbri_streaminfo *info = &dbri->stream_info[DBRI_PLAY]; - int left_gain = info->left_gain % 64; - int right_gain = info->right_gain % 64; + int left_gain = info->left_gain & 0x3f; + int right_gain = info->right_gain & 0x3f; dbri->mm.data[0] &= ~0x3f; /* Reset the volume bits */ dbri->mm.data[1] &= ~0x3f; @@ -1351,8 +1338,8 @@ static void cs4215_setdata(struct snd_dbri * dbri, int muted) /* Now set the recording gain. */ info = &dbri->stream_info[DBRI_REC]; - left_gain = info->left_gain % 16; - right_gain = info->right_gain % 16; + left_gain = info->left_gain & 0xf; + right_gain = info->right_gain & 0xf; dbri->mm.data[2] |= CS4215_LG(left_gain); dbri->mm.data[3] |= CS4215_RG(right_gain); } From d1fdf07e22efdb9fa53739c0f0fec1f6b24c2056 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 21 Aug 2006 19:29:18 +0200 Subject: [PATCH 0951/1063] [ALSA] sparc dbri: fixed setting of burst size after reset A proper way to set DBRI's burst size. The size must be set after each chip reset. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 6fc37c9cb4fc..810f8b99a60e 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -690,6 +690,7 @@ static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd) static void dbri_reset(struct snd_dbri * dbri) { int i; + u32 tmp; dprintk(D_GEN, "reset 0:%x 2:%x 8:%x 9:%x\n", sbus_readl(dbri->regs + REG0), @@ -699,13 +700,20 @@ static void dbri_reset(struct snd_dbri * dbri) sbus_writel(D_R, dbri->regs + REG0); /* Soft Reset */ for (i = 0; (sbus_readl(dbri->regs + REG0) & D_R) && i < 64; i++) udelay(10); + + /* A brute approach - DBRI falls back to working burst size by itself + * On SS20 D_S does not work, so do not try so high. */ + tmp = sbus_readl(dbri->regs + REG0); + tmp |= D_G | D_E; + tmp &= ~D_S; + sbus_writel(tmp, dbri->regs + REG0); } /* Lock must not be held before calling this */ static void dbri_initialize(struct snd_dbri * dbri) { volatile s32 *cmd; - u32 dma_addr, tmp; + u32 dma_addr; unsigned long flags; int n; @@ -721,13 +729,6 @@ static void dbri_initialize(struct snd_dbri * dbri) for (n = 0; n < DBRI_NO_PIPES; n++) dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1; - /* A brute approach - DBRI falls back to working burst size by itself - * On SS20 D_S does not work, so do not try so high. */ - tmp = sbus_readl(dbri->regs + REG0); - tmp |= D_G | D_E; - tmp &= ~D_S; - sbus_writel(tmp, dbri->regs + REG0); - /* * Initialize the interrupt ringbuffer. */ From 294a30dc8cf13c492913f2ed3a6540bdf6e84e39 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 21 Aug 2006 19:29:59 +0200 Subject: [PATCH 0952/1063] [ALSA] sparc dbri: simplifed linking time slot function A simplified routines to link and unlink time slots. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 129 +++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 86 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 810f8b99a60e..5696f792e3d1 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -274,7 +274,6 @@ enum in_or_out { PIPEinput, PIPEoutput }; struct dbri_pipe { u32 sdp; /* SDP command word */ int nextpipe; /* Next pipe in linked list */ - int cycle; /* Offset of timeslot (bits) */ int length; /* Length of timeslot (bits) */ int first_desc; /* Index of first descriptor */ int desc; /* Index of active descriptor */ @@ -312,8 +311,6 @@ struct snd_dbri { struct dbri_pipe pipes[DBRI_NO_PIPES]; /* DBRI's 32 data pipes */ int next_desc[DBRI_NO_DESCS]; /* Index of next desc, or -1 */ - int chi_in_pipe; - int chi_out_pipe; int chi_bpf; struct cs4215 mm; /* mmcodec special info */ @@ -827,92 +824,55 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp) reset_pipe(dbri, pipe); } -/* FIXME: direction not needed */ static void link_time_slot(struct snd_dbri * dbri, int pipe, - enum in_or_out direction, int basepipe, + int prevpipe, int nextpipe, int length, int cycle) { volatile s32 *cmd; int val; - int prevpipe; - int nextpipe; - if (pipe < 0 || pipe > DBRI_MAX_PIPE || basepipe < 0 || basepipe > DBRI_MAX_PIPE) { + if (pipe < 0 || pipe > DBRI_MAX_PIPE + || prevpipe < 0 || prevpipe > DBRI_MAX_PIPE + || nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: link_time_slot called with illegal pipe number\n"); return; } - if (dbri->pipes[pipe].sdp == 0 || dbri->pipes[basepipe].sdp == 0) { + if (dbri->pipes[pipe].sdp == 0 + || dbri->pipes[prevpipe].sdp == 0 + || dbri->pipes[nextpipe].sdp == 0) { printk(KERN_ERR "DBRI: link_time_slot called on uninitialized pipe\n"); return; } - /* Deal with CHI special case: - * "If transmission on edges 0 or 1 is desired, then cycle n - * (where n = # of bit times per frame...) must be used." - * - DBRI data sheet, page 11 - */ - if (basepipe == 16 && direction == PIPEoutput && cycle == 0) - cycle = dbri->chi_bpf; - - if (basepipe == pipe) { - prevpipe = pipe; - nextpipe = pipe; - } else { - /* We're not initializing a new linked list (basepipe != pipe), - * so run through the linked list and find where this pipe - * should be sloted in, based on its cycle. CHI confuses - * things a bit, since it has a single anchor for both its - * transmit and receive lists. - */ - if (basepipe == 16) { - if (direction == PIPEinput) { - prevpipe = dbri->chi_in_pipe; - } else { - prevpipe = dbri->chi_out_pipe; - } - } else { - prevpipe = basepipe; - } - - nextpipe = dbri->pipes[prevpipe].nextpipe; - - while (dbri->pipes[nextpipe].cycle < cycle - && dbri->pipes[nextpipe].nextpipe != basepipe) { - prevpipe = nextpipe; - nextpipe = dbri->pipes[nextpipe].nextpipe; - } - } - - if (prevpipe == 16) { - if (direction == PIPEinput) { - dbri->chi_in_pipe = pipe; - } else { - dbri->chi_out_pipe = pipe; - } - } else { - dbri->pipes[prevpipe].nextpipe = pipe; - } + dbri->pipes[prevpipe].nextpipe = pipe; dbri->pipes[pipe].nextpipe = nextpipe; - dbri->pipes[pipe].cycle = cycle; dbri->pipes[pipe].length = length; cmd = dbri_cmdlock(dbri, NoGetLock); - if (direction == PIPEinput) { - val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe; - *(cmd++) = DBRI_CMD(D_DTS, 0, val); - *(cmd++) = - D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe); - *(cmd++) = 0; - } else { + if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) { + /* Deal with CHI special case: + * "If transmission on edges 0 or 1 is desired, then cycle n + * (where n = # of bit times per frame...) must be used." + * - DBRI data sheet, page 11 + */ + if (prevpipe == 16 && cycle == 0) + cycle = dbri->chi_bpf; + val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(prevpipe) | pipe; *(cmd++) = DBRI_CMD(D_DTS, 0, val); *(cmd++) = 0; *(cmd++) = D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe); + } else { + val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe; + *(cmd++) = DBRI_CMD(D_DTS, 0, val); + *(cmd++) = + D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe); + *(cmd++) = 0; } dbri_cmdsend(dbri, cmd); @@ -1192,21 +1152,18 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla } else { int pipe; - for (pipe = dbri->chi_in_pipe; - pipe != 16; pipe = dbri->pipes[pipe].nextpipe) { - unlink_time_slot(dbri, pipe, PIPEinput, - 16, dbri->pipes[pipe].nextpipe); - } - for (pipe = dbri->chi_out_pipe; - pipe != 16; pipe = dbri->pipes[pipe].nextpipe) { - unlink_time_slot(dbri, pipe, PIPEoutput, - 16, dbri->pipes[pipe].nextpipe); - } - - cmd = dbri_cmdlock(dbri, GetLock); + for (pipe = 0; pipe < DBRI_NO_PIPES; pipe++ ) + if ( pipe != 16 ) { + if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) + unlink_time_slot(dbri, pipe, PIPEoutput, + 16, dbri->pipes[pipe].nextpipe); + else + unlink_time_slot(dbri, pipe, PIPEinput, + 16, dbri->pipes[pipe].nextpipe); + } + + cmd = dbri_cmdlock(dbri, GetLock); } - dbri->chi_in_pipe = 16; - dbri->chi_out_pipe = 16; if (master_or_slave == CHIslave) { /* Setup DBRI for CHI Slave - receive clock, frame sync (FS) @@ -1397,10 +1354,10 @@ static void cs4215_open(struct snd_dbri * dbri) */ data_width = dbri->mm.channels * dbri->mm.precision; - link_time_slot(dbri, 20, PIPEoutput, 16, 32, dbri->mm.offset + 32); - link_time_slot(dbri, 4, PIPEoutput, 16, data_width, dbri->mm.offset); - link_time_slot(dbri, 6, PIPEinput, 16, data_width, dbri->mm.offset); - link_time_slot(dbri, 21, PIPEinput, 16, 16, dbri->mm.offset + 40); + link_time_slot(dbri, 4, 16, 16, data_width, dbri->mm.offset); + link_time_slot(dbri, 20, 4, 16, 32, dbri->mm.offset + 32); + link_time_slot(dbri, 6, 16, 16, data_width, dbri->mm.offset); + link_time_slot(dbri, 21, 6, 16, 16, dbri->mm.offset + 40); /* FIXME: enable CHI after _setdata? */ tmp = sbus_readl(dbri->regs + REG0); @@ -1466,9 +1423,9 @@ static int cs4215_setctrl(struct snd_dbri * dbri) * Pipe 19: Receive timeslot 7 (version). */ - link_time_slot(dbri, 17, PIPEoutput, 16, 32, dbri->mm.offset); - link_time_slot(dbri, 18, PIPEinput, 16, 8, dbri->mm.offset); - link_time_slot(dbri, 19, PIPEinput, 16, 8, dbri->mm.offset + 48); + link_time_slot(dbri, 17, 16, 16, 32, dbri->mm.offset); + link_time_slot(dbri, 18, 16, 16, 8, dbri->mm.offset); + link_time_slot(dbri, 19, 18, 16, 8, dbri->mm.offset + 48); /* Wait for the chip to echo back CLB (Control Latch Bit) as zero */ dbri->mm.ctrl[0] &= ~CS4215_CLB; @@ -2445,11 +2402,11 @@ static void dbri_debug_read(struct snd_info_entry * entry, struct dbri_pipe *pptr = &dbri->pipes[pipe]; snd_iprintf(buffer, "Pipe %d: %s SDP=0x%x desc=%d, " - "len=%d @ %d next %d\n", + "len=%d next %d\n", pipe, ((pptr->sdp & D_SDP_TO_SER) ? "output" : "input"), pptr->sdp, pptr->desc, - pptr->length, pptr->cycle, pptr->nextpipe); + pptr->length, pptr->nextpipe); } } } From 1be54c824be9b5e163cd83dabdf0ad3ac81c72a8 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 21 Aug 2006 19:30:57 +0200 Subject: [PATCH 0953/1063] [ALSA] sparc dbri: ring buffered version It is a complete rework of low level layer to work on ring buffers for comands and data descriptors. This removes annoying noise due to delay in data buffer switching. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 385 ++++++++++++++++++++++----------------------- 1 file changed, 192 insertions(+), 193 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 5696f792e3d1..3fb2ede80eaf 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -2,6 +2,8 @@ * Driver for DBRI sound chip found on Sparcs. * Copyright (C) 2004, 2005 Martin Habets (mhabets@users.sourceforge.net) * + * Converted to ring buffered version by Krzysztof Helt (krzysztof.h1@wp.pl) + * * Based entirely upon drivers/sbus/audio/dbri.c which is: * Copyright (C) 1997 Rudolf Koenig (rfkoenig@immd4.informatik.uni-erlangen.de) * Copyright (C) 1998, 1999 Brent Baccala (baccala@freesoft.org) @@ -260,7 +262,7 @@ struct dbri_mem { * the CPU and the DBRI */ struct dbri_dma { - volatile s32 cmd[DBRI_NO_CMDS]; /* Place for commands */ + s32 cmd[DBRI_NO_CMDS]; /* Place for commands */ volatile s32 intr[DBRI_INT_BLK]; /* Interrupt field */ struct dbri_mem desc[DBRI_NO_DESCS]; /* Xmit/receive descriptors */ }; @@ -284,7 +286,6 @@ struct dbri_pipe { struct dbri_streaminfo { struct snd_pcm_substream *substream; u32 dvma_buffer; /* Device view of Alsa DMA buffer */ - int left; /* # of bytes left in DMA buffer */ int size; /* Size of DMA buffer */ size_t offset; /* offset in user buffer */ int pipe; /* Data pipe used */ @@ -305,11 +306,11 @@ struct snd_dbri { void __iomem *regs; /* dbri HW regs */ int dbri_irqp; /* intr queue pointer */ - int wait_send; /* sequence of command buffers send */ - int wait_ackd; /* sequence of command buffers acknowledged */ struct dbri_pipe pipes[DBRI_NO_PIPES]; /* DBRI's 32 data pipes */ int next_desc[DBRI_NO_DESCS]; /* Index of next desc, or -1 */ + spinlock_t cmdlock; /* Protects cmd queue accesses */ + s32 *cmdptr; /* Pointer to the last queued cmd */ int chi_bpf; @@ -544,7 +545,7 @@ struct snd_dbri { #define DBRI_TD_TBC (1<<0) /* Transmit buffer Complete */ #define DBRI_TD_STATUS(v) ((v)&0xff) /* Transmit status */ /* Maximum buffer size per TD: almost 8Kb */ -#define DBRI_TD_MAXCNT ((1 << 13) - 1) +#define DBRI_TD_MAXCNT ((1 << 13) - 4) /* Receive descriptor defines */ #define DBRI_RD_F (1<<31) /* End of Frame */ @@ -608,79 +609,110 @@ The list is terminated with a WAIT command, which generates a CPU interrupt to signal completion. Since the DBRI can run in parallel with the CPU, several means of -synchronization present themselves. The method implemented here is close -to the original scheme (Rudolf's), and uses 2 counters (wait_send and -wait_ackd) to synchronize the command buffer between the CPU and the DBRI. +synchronization present themselves. The method implemented here is only +to use the dbri_cmdwait() to wait for execution of batch of sent commands. -A more sophisticated scheme might involve a circular command buffer -or an array of command buffers. A routine could fill one with -commands and link it onto a list. When a interrupt signaled -completion of the current command buffer, look on the list for -the next one. +A circular command buffer is used here. A new command is being added +while other can be executed. The scheme works by adding two WAIT commands +after each sent batch of commands. When the next batch is prepared it is +added after the WAIT commands then the WAITs are replaced with single JUMP +command to the new batch. The the DBRI is forced to reread the last WAIT +command (replaced by the JUMP by then). If the DBRI is still executing +previous commands the request to reread the WAIT command is ignored. Every time a routine wants to write commands to the DBRI, it must -first call dbri_cmdlock() and get an initial pointer into dbri->dma->cmd -in return. dbri_cmdlock() will block if the previous commands have not -been completed yet. After this the commands can be written to the buffer, -and dbri_cmdsend() is called with the final pointer value to send them -to the DBRI. +first call dbri_cmdlock() and get pointer to a free space in +dbri->dma->cmd buffer. After this, the commands can be written to +the buffer, and dbri_cmdsend() is called with the final pointer value +to send them to the DBRI. */ static void dbri_process_interrupt_buffer(struct snd_dbri * dbri); -enum dbri_lock { NoGetLock, GetLock }; #define MAXLOOPS 10 - -static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get) +/* + * Wait for the current command string to execute + */ +static void dbri_cmdwait(struct snd_dbri *dbri) { int maxloops = MAXLOOPS; -#ifndef SMP - if ((get == GetLock) && spin_is_locked(&dbri->lock)) { - printk(KERN_ERR "DBRI: cmdlock called while in spinlock."); - } -#endif - /* Delay if previous commands are still being processed */ - while ((--maxloops) > 0 && (dbri->wait_send != dbri->wait_ackd)) { + while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P)) msleep_interruptible(1); - } + if (maxloops == 0) { - printk(KERN_ERR "DBRI: Chip never completed command buffer %d\n", - dbri->wait_send); + printk(KERN_ERR "DBRI: Chip never completed command buffer\n"); } else { dprintk(D_CMD, "Chip completed command buffer (%d)\n", MAXLOOPS - maxloops - 1); } +} +/* + * Lock the command queue and returns pointer to a space for len cmd words + * It locks the cmdlock spinlock. + */ +static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len) +{ + /* Space for 2 WAIT cmds (replaced later by 1 JUMP cmd) */ + len += 2; + spin_lock(&dbri->cmdlock); + if (dbri->cmdptr - dbri->dma->cmd + len < DBRI_NO_CMDS - 2) + return dbri->cmdptr + 2; + else if (len < sbus_readl(dbri->regs + REG8) - dbri->dma_dvma) + return dbri->dma->cmd; + else + printk(KERN_ERR "DBRI: no space for commands."); - /*if (get == GetLock) spin_lock(&dbri->lock); */ - return &dbri->dma->cmd[0]; + return 0; } -static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd) +/* + * Send prepared cmd string. It works by writting a JMP cmd into + * the last WAIT cmd and force DBRI to reread the cmd. + * The JMP cmd points to the new cmd string. + * It also releases the cmdlock spinlock. + */ +static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) { - volatile s32 *ptr; + s32 *ptr; + s32 tmp, addr; + static int wait_id = 0; - for (ptr = &dbri->dma->cmd[0]; ptr < cmd; ptr++) { + wait_id++; + wait_id &= 0xffff; /* restrict it to a 16 bit counter. */ + *(cmd) = DBRI_CMD(D_WAIT, 1, wait_id); + *(cmd+1) = DBRI_CMD(D_WAIT, 1, wait_id); + + /* Replace the last command with JUMP */ + addr = dbri->dma_dvma + (cmd - len - dbri->dma->cmd) * sizeof(s32); + *(dbri->cmdptr+1) = addr; + *(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0); + +#ifdef DBRI_DEBUG + if (cmd > dbri->cmdptr ) + for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) { + dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); + } + else { + ptr = dbri->cmdptr; dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); + ptr = dbri->cmdptr+1; + dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); + for (ptr = dbri->dma->cmd; ptr < cmd+2; ptr++) { + dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); + } } +#endif - if ((cmd - &dbri->dma->cmd[0]) >= DBRI_NO_CMDS - 1) { - printk(KERN_ERR "DBRI: Command buffer overflow! (bug in driver)\n"); - /* Ignore the last part. */ - cmd = &dbri->dma->cmd[DBRI_NO_CMDS - 3]; - } + /* Reread the last command */ + tmp = sbus_readl(dbri->regs + REG0); + tmp |= D_P; + sbus_writel(tmp, dbri->regs + REG0); - dbri->wait_send++; - dbri->wait_send &= 0xffff; /* restrict it to a 16 bit counter. */ - *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); - *(cmd++) = DBRI_CMD(D_WAIT, 1, dbri->wait_send); - - /* Set command pointer and signal it is valid. */ - sbus_writel(dbri->dma_dvma, dbri->regs + REG8); - - /*spin_unlock(&dbri->lock); */ + dbri->cmdptr = cmd; + spin_unlock(&dbri->cmdlock); } /* Lock must be held when calling this */ @@ -709,7 +741,7 @@ static void dbri_reset(struct snd_dbri * dbri) /* Lock must not be held before calling this */ static void dbri_initialize(struct snd_dbri * dbri) { - volatile s32 *cmd; + s32 *cmd; u32 dma_addr; unsigned long flags; int n; @@ -718,14 +750,11 @@ static void dbri_initialize(struct snd_dbri * dbri) dbri_reset(dbri); - cmd = dbri_cmdlock(dbri, NoGetLock); - dprintk(D_GEN, "init: cmd: %p, int: %p\n", - &dbri->dma->cmd[0], &dbri->dma->intr[0]); - /* Initialize pipes */ for (n = 0; n < DBRI_NO_PIPES; n++) dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1; + spin_lock_init(&dbri->cmdlock); /* * Initialize the interrupt ringbuffer. */ @@ -735,10 +764,19 @@ static void dbri_initialize(struct snd_dbri * dbri) /* * Set up the interrupt queue */ + spin_lock(&dbri->cmdlock); + cmd = dbri->cmdptr = dbri->dma->cmd; *(cmd++) = DBRI_CMD(D_IIQ, 0, 0); *(cmd++) = dma_addr; + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); + dbri->cmdptr = cmd; + *(cmd++) = DBRI_CMD(D_WAIT, 1, 0); + *(cmd++) = DBRI_CMD(D_WAIT, 1, 0); + dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0); + sbus_writel(dma_addr, dbri->regs + REG8); + spin_unlock(&dbri->cmdlock); + dbri_cmdwait(dbri); - dbri_cmdsend(dbri, cmd); spin_unlock_irqrestore(&dbri->lock, flags); } @@ -770,7 +808,7 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) { int sdp; int desc; - volatile int *cmd; + s32 *cmd; if (pipe < 0 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: reset_pipe called with illegal pipe number\n"); @@ -783,16 +821,18 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) return; } - cmd = dbri_cmdlock(dbri, NoGetLock); + cmd = dbri_cmdlock(dbri, 3); *(cmd++) = DBRI_CMD(D_SDP, 0, sdp | D_SDP_C | D_SDP_P); *(cmd++) = 0; - dbri_cmdsend(dbri, cmd); + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); + dbri_cmdsend(dbri, cmd, 3); desc = dbri->pipes[pipe].first_desc; - while (desc != -1) { - dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0; - desc = dbri->next_desc[desc]; - } + if ( desc >= 0) + do { + dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0; + desc = dbri->next_desc[desc]; + } while (desc != -1 && desc != dbri->pipes[pipe].first_desc); dbri->pipes[pipe].desc = -1; dbri->pipes[pipe].first_desc = -1; @@ -828,7 +868,7 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe, int prevpipe, int nextpipe, int length, int cycle) { - volatile s32 *cmd; + s32 *cmd; int val; if (pipe < 0 || pipe > DBRI_MAX_PIPE @@ -847,11 +887,10 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe, } dbri->pipes[prevpipe].nextpipe = pipe; - dbri->pipes[pipe].nextpipe = nextpipe; dbri->pipes[pipe].length = length; - cmd = dbri_cmdlock(dbri, NoGetLock); + cmd = dbri_cmdlock(dbri, 4); if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) { /* Deal with CHI special case: @@ -874,25 +913,27 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe, D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe); *(cmd++) = 0; } + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); - dbri_cmdsend(dbri, cmd); + dbri_cmdsend(dbri, cmd, 4); } static void unlink_time_slot(struct snd_dbri * dbri, int pipe, enum in_or_out direction, int prevpipe, int nextpipe) { - volatile s32 *cmd; + s32 *cmd; int val; if (pipe < 0 || pipe > DBRI_MAX_PIPE - || prevpipe < 0 || prevpipe > DBRI_MAX_PIPE) { + || prevpipe < 0 || prevpipe > DBRI_MAX_PIPE + || nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: unlink_time_slot called with illegal pipe number\n"); return; } - cmd = dbri_cmdlock(dbri, NoGetLock); + cmd = dbri_cmdlock(dbri, 4); if (direction == PIPEinput) { val = D_DTS_VI | D_DTS_DEL | D_DTS_PRVIN(prevpipe) | pipe; @@ -905,8 +946,9 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, *(cmd++) = 0; *(cmd++) = D_TS_NEXT(nextpipe); } + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); - dbri_cmdsend(dbri, cmd); + dbri_cmdsend(dbri, cmd, 4); } /* xmit_fixed() / recv_fixed() @@ -925,7 +967,7 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, */ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) { - volatile s32 *cmd; + s32 *cmd; if (pipe < 16 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n"); @@ -952,12 +994,14 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) if (dbri->pipes[pipe].sdp & D_SDP_MSB) data = reverse_bytes(data, dbri->pipes[pipe].length); - cmd = dbri_cmdlock(dbri, GetLock); + cmd = dbri_cmdlock(dbri, 3); *(cmd++) = DBRI_CMD(D_SSP, 0, pipe); *(cmd++) = data; + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); - dbri_cmdsend(dbri, cmd); + dbri_cmdsend(dbri, cmd, 3); + dbri_cmdwait(dbri); } static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr) @@ -991,6 +1035,8 @@ static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr) * and work by building chains of descriptors which identify the * data buffers. Buffers too large for a single descriptor will * be spread across multiple descriptors. + * + * All descriptors create a ring buffer. */ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period) { @@ -1051,14 +1097,13 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period return -1; } - if (len > DBRI_TD_MAXCNT) { - mylen = DBRI_TD_MAXCNT; /* 8KB - 1 */ - } else { + if (len > DBRI_TD_MAXCNT) + mylen = DBRI_TD_MAXCNT; /* 8KB - 4 */ + else mylen = len; - } - if (mylen > period) { + + if (mylen > period) mylen = period; - } dbri->next_desc[desc] = -1; dbri->dma->desc[desc].ba = dvma_buffer; @@ -1067,17 +1112,17 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period if (streamno == DBRI_PLAY) { dbri->dma->desc[desc].word1 = DBRI_TD_CNT(mylen); dbri->dma->desc[desc].word4 = 0; - if (first_desc != -1) - dbri->dma->desc[desc].word1 |= DBRI_TD_M; + dbri->dma->desc[desc].word1 |= + DBRI_TD_F | DBRI_TD_B; } else { dbri->dma->desc[desc].word1 = 0; dbri->dma->desc[desc].word4 = DBRI_RD_B | DBRI_RD_BCNT(mylen); } - if (first_desc == -1) { + if (first_desc == -1) first_desc = desc; - } else { + else { dbri->next_desc[last_desc] = desc; dbri->dma->desc[last_desc].nda = dbri->dma_dvma + dbri_dma_off(desc, desc); @@ -1093,21 +1138,28 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period return -1; } - dbri->dma->desc[last_desc].word1 &= ~DBRI_TD_M; if (streamno == DBRI_PLAY) { dbri->dma->desc[last_desc].word1 |= - DBRI_TD_I | DBRI_TD_F | DBRI_TD_B; + DBRI_TD_F | DBRI_TD_B; + dbri->dma->desc[last_desc].nda = + dbri->dma_dvma + dbri_dma_off(desc, first_desc); + dbri->next_desc[last_desc] = first_desc; } dbri->pipes[info->pipe].first_desc = first_desc; dbri->pipes[info->pipe].desc = first_desc; - for (desc = first_desc; desc != -1; desc = dbri->next_desc[desc]) { +#ifdef DBRI_DEBUG + for (desc = first_desc; desc != -1; ) { dprintk(D_DESC, "DESC %d: %08x %08x %08x %08x\n", desc, dbri->dma->desc[desc].word1, dbri->dma->desc[desc].ba, dbri->dma->desc[desc].nda, dbri->dma->desc[desc].word4); + desc = dbri->next_desc[desc]; + if ( desc == first_desc ) + break; } +#endif return 0; } @@ -1127,43 +1179,24 @@ enum master_or_slave { CHImaster, CHIslave }; static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_slave, int bits_per_frame) { - volatile s32 *cmd; + s32 *cmd; int val; - static int chi_initialized = 0; /* FIXME: mutex? */ - if (!chi_initialized) { + /* Set CHI Anchor: Pipe 16 */ - cmd = dbri_cmdlock(dbri, GetLock); + cmd = dbri_cmdlock(dbri, 4); + val = D_DTS_VO | D_DTS_VI | D_DTS_INS + | D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16); + *(cmd++) = DBRI_CMD(D_DTS, 0, val); + *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); + *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); + dbri_cmdsend(dbri, cmd, 4); - /* Set CHI Anchor: Pipe 16 */ + dbri->pipes[16].sdp = 1; + dbri->pipes[16].nextpipe = 16; - val = D_DTS_VO | D_DTS_VI | D_DTS_INS - | D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16); - *(cmd++) = DBRI_CMD(D_DTS, 0, val); - *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); - *(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16); - - dbri->pipes[16].sdp = 1; - dbri->pipes[16].nextpipe = 16; - -#if 0 - chi_initialized++; -#endif - } else { - int pipe; - - for (pipe = 0; pipe < DBRI_NO_PIPES; pipe++ ) - if ( pipe != 16 ) { - if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) - unlink_time_slot(dbri, pipe, PIPEoutput, - 16, dbri->pipes[pipe].nextpipe); - else - unlink_time_slot(dbri, pipe, PIPEinput, - 16, dbri->pipes[pipe].nextpipe); - } - - cmd = dbri_cmdlock(dbri, GetLock); - } + cmd = dbri_cmdlock(dbri, 4); if (master_or_slave == CHIslave) { /* Setup DBRI for CHI Slave - receive clock, frame sync (FS) @@ -1202,8 +1235,9 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); *(cmd++) = DBRI_CMD(D_CDM, 0, D_CDM_XCE | D_CDM_XEN | D_CDM_REN); + *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); - dbri_cmdsend(dbri, cmd); + dbri_cmdsend(dbri, cmd, 4); } /* @@ -1240,6 +1274,8 @@ static void cs4215_setup_pipes(struct snd_dbri * dbri) setup_pipe(dbri, 17, D_SDP_FIXED | D_SDP_TO_SER | D_SDP_MSB); setup_pipe(dbri, 18, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB); setup_pipe(dbri, 19, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB); + + dbri_cmdwait(dbri); } static int cs4215_init_data(struct cs4215 *mm) @@ -1271,7 +1307,7 @@ static int cs4215_init_data(struct cs4215 *mm) mm->status = 0; mm->version = 0xff; mm->precision = 8; /* For ULAW */ - mm->channels = 2; + mm->channels = 1; return 0; } @@ -1554,7 +1590,6 @@ static int cs4215_init(struct snd_dbri * dbri) } cs4215_setup_pipes(dbri); - cs4215_init_data(&dbri->mm); /* Enable capture of the status & version timeslots. */ @@ -1583,9 +1618,7 @@ buffer and calls dbri_process_one_interrupt() for each interrupt word. Complicated interrupts are handled by dedicated functions (which appear first in this file). Any pending interrupts can be serviced by calling dbri_process_interrupt_buffer(), which works even if the CPU's -interrupts are disabled. This function is used by dbri_cmdlock() -to make sure we're synced up with the chip before each command sequence, -even if we're running cli'ed. +interrupts are disabled. */ @@ -1594,11 +1627,10 @@ even if we're running cli'ed. * Transmit the current TD's for recording/playing, if needed. * For playback, ALSA has filled the DMA memory with new data (we hope). */ -static void xmit_descs(unsigned long data) +static void xmit_descs(struct snd_dbri *dbri) { - struct snd_dbri *dbri = (struct snd_dbri *) data; struct dbri_streaminfo *info; - volatile s32 *cmd; + s32 *cmd; unsigned long flags; int first_td; @@ -1609,7 +1641,7 @@ static void xmit_descs(unsigned long data) info = &dbri->stream_info[DBRI_REC]; spin_lock_irqsave(&dbri->lock, flags); - if ((info->left >= info->size) && (info->pipe >= 0)) { + if (info->pipe >= 0) { first_td = dbri->pipes[info->pipe].first_desc; dprintk(D_DESC, "xmit_descs rec @ TD %d\n", first_td); @@ -1619,16 +1651,15 @@ static void xmit_descs(unsigned long data) goto play; } - cmd = dbri_cmdlock(dbri, NoGetLock); + cmd = dbri_cmdlock(dbri, 2); *(cmd++) = DBRI_CMD(D_SDP, 0, dbri->pipes[info->pipe].sdp | D_SDP_P | D_SDP_EVERY | D_SDP_C); *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); - dbri_cmdsend(dbri, cmd); + dbri_cmdsend(dbri, cmd, 2); /* Reset our admin of the pipe & bytes read. */ dbri->pipes[info->pipe].desc = first_td; - info->left = 0; } play: @@ -1638,33 +1669,27 @@ play: info = &dbri->stream_info[DBRI_PLAY]; spin_lock_irqsave(&dbri->lock, flags); - if ((info->left <= 0) && (info->pipe >= 0)) { + if (info->pipe >= 0) { first_td = dbri->pipes[info->pipe].first_desc; dprintk(D_DESC, "xmit_descs play @ TD %d\n", first_td); /* Stream could be closed by the time we run. */ - if (first_td < 0) { - spin_unlock_irqrestore(&dbri->lock, flags); - return; + if (first_td >= 0) { + cmd = dbri_cmdlock(dbri, 2); + *(cmd++) = DBRI_CMD(D_SDP, 0, + dbri->pipes[info->pipe].sdp + | D_SDP_P | D_SDP_EVERY | D_SDP_C); + *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); + dbri_cmdsend(dbri, cmd, 2); + + /* Reset our admin of the pipe & bytes written. */ + dbri->pipes[info->pipe].desc = first_td; } - - cmd = dbri_cmdlock(dbri, NoGetLock); - *(cmd++) = DBRI_CMD(D_SDP, 0, - dbri->pipes[info->pipe].sdp - | D_SDP_P | D_SDP_EVERY | D_SDP_C); - *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); - dbri_cmdsend(dbri, cmd); - - /* Reset our admin of the pipe & bytes written. */ - dbri->pipes[info->pipe].desc = first_td; - info->left = info->size; } spin_unlock_irqrestore(&dbri->lock, flags); } -static DECLARE_TASKLET(xmit_descs_task, xmit_descs, 0); - /* transmission_complete_intr() * * Called by main interrupt handler when DBRI signals transmission complete @@ -1684,7 +1709,6 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) struct dbri_streaminfo *info; int td; int status; - int len; info = &dbri->stream_info[DBRI_PLAY]; @@ -1703,20 +1727,7 @@ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) dprintk(D_INT, "TD %d, status 0x%02x\n", td, status); dbri->dma->desc[td].word4 = 0; /* Reset it for next time. */ - len = DBRI_RD_CNT(dbri->dma->desc[td].word1); - info->offset += len; - info->left -= len; - - /* On the last TD, transmit them all again. */ - if (dbri->next_desc[td] == -1) { - if (info->left > 0) { - printk(KERN_WARNING - "%d bytes left after last transfer.\n", - info->left); - info->left = 0; - } - tasklet_schedule(&xmit_descs_task); - } + info->offset += DBRI_RD_CNT(dbri->dma->desc[td].word1); td = dbri->next_desc[td]; dbri->pipes[pipe].desc = td; @@ -1749,7 +1760,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) info = &dbri->stream_info[DBRI_REC]; info->offset += DBRI_RD_CNT(status); - info->left += DBRI_RD_CNT(status); /* FIXME: Check status */ @@ -1757,6 +1767,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status)); /* On the last TD, transmit them all again. */ +#if 0 if (dbri->next_desc[rd] == -1) { if (info->left > info->size) { printk(KERN_WARNING @@ -1765,6 +1776,7 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) } tasklet_schedule(&xmit_descs_task); } +#endif /* Notify ALSA */ if (spin_is_locked(&dbri->lock)) { @@ -1793,16 +1805,11 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x) channel, code, rval); } - if (channel == D_INTR_CMD && command == D_WAIT) { - dbri->wait_ackd = val; - if (dbri->wait_send != val) { - printk(KERN_ERR "Processing wait command %d when %d was send.\n", - val, dbri->wait_send); - } - return; - } - switch (code) { + case D_INTR_CMDI: + if (command != D_WAIT) + printk(KERN_ERR "DBRI: Command read interrupt\n"); + break; case D_INTR_BRDY: reception_complete_intr(dbri, channel); break; @@ -1815,8 +1822,10 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x) * resend SDP command with clear pipe bit (C) set */ { - volatile s32 *cmd; - + /* FIXME: do something useful in case of underrun */ + printk(KERN_ERR "DBRI: Underrun error\n"); +#if 0 + s32 *cmd; int pipe = channel; int td = dbri->pipes[pipe].desc; @@ -1827,6 +1836,7 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x) | D_SDP_P | D_SDP_C | D_SDP_2SAME); *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, td); dbri_cmdsend(dbri, cmd); +#endif } break; case D_INTR_FXDT: @@ -1847,9 +1857,7 @@ static void dbri_process_one_interrupt(struct snd_dbri * dbri, int x) /* dbri_process_interrupt_buffer advances through the DBRI's interrupt * buffer until it finds a zero word (indicating nothing more to do * right now). Non-zero words require processing and are handed off - * to dbri_process_one_interrupt AFTER advancing the pointer. This - * order is important since we might recurse back into this function - * and need to make sure the pointer has been advanced first. + * to dbri_process_one_interrupt AFTER advancing the pointer. */ static void dbri_process_interrupt_buffer(struct snd_dbri * dbri) { @@ -1919,8 +1927,6 @@ static irqreturn_t snd_dbri_interrupt(int irq, void *dev_id, dbri_process_interrupt_buffer(dbri); - /* FIXME: Write 0 into regs to ACK interrupt */ - spin_unlock(&dbri->lock); return IRQ_HANDLED; @@ -1962,7 +1968,6 @@ static int snd_dbri_open(struct snd_pcm_substream *substream) spin_lock_irqsave(&dbri->lock, flags); info->substream = substream; - info->left = 0; info->offset = 0; info->dvma_buffer = 0; info->pipe = -1; @@ -1980,7 +1985,6 @@ static int snd_dbri_close(struct snd_pcm_substream *substream) dprintk(D_USR, "close audio output.\n"); info->substream = NULL; - info->left = 0; info->offset = 0; return 0; @@ -2062,10 +2066,8 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream) info->size = snd_pcm_lib_buffer_bytes(substream); if (DBRI_STREAMNO(substream) == DBRI_PLAY) info->pipe = 4; /* Send pipe */ - else { + else info->pipe = 6; /* Receive pipe */ - info->left = info->size; /* To trigger submittal */ - } spin_lock_irq(&dbri->lock); @@ -2093,14 +2095,11 @@ static int snd_dbri_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_START: dprintk(D_USR, "start audio, period is %d bytes\n", (int)snd_pcm_lib_period_bytes(substream)); - /* Enable & schedule the tasklet that re-submits the TDs. */ - xmit_descs_task.data = (unsigned long)dbri; - tasklet_schedule(&xmit_descs_task); + /* Re-submit the TDs. */ + xmit_descs(dbri); break; case SNDRV_PCM_TRIGGER_STOP: dprintk(D_USR, "stop audio.\n"); - /* Make the tasklet bail out immediately. */ - xmit_descs_task.data = 0; reset_pipe(dbri, info->pipe); break; default: @@ -2118,8 +2117,8 @@ static snd_pcm_uframes_t snd_dbri_pointer(struct snd_pcm_substream *substream) ret = bytes_to_frames(substream->runtime, info->offset) % substream->runtime->buffer_size; - dprintk(D_USR, "I/O pointer: %ld frames, %d bytes left.\n", - ret, info->left); + dprintk(D_USR, "I/O pointer: %ld frames of %ld.\n", + ret, substream->runtime->buffer_size); return ret; } From ab93c7ae54a81bcecb77608ca89eea140f1d45ad Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 23 Aug 2006 11:37:36 +0200 Subject: [PATCH 0954/1063] [ALSA] sparc dbri: hardware constrains added This patch adds ALSA hardware constrains so stereo is possible only with 16-bit format. It contains small cleanups to ring buffered code as well. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 81 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 3fb2ede80eaf..3e6ad507849d 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -85,7 +85,7 @@ MODULE_PARM_DESC(id, "ID string for Sun DBRI soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Sun DBRI soundcard."); -#define DBRI_DEBUG +#undef DBRI_DEBUG #define D_INT (1<<0) #define D_GEN (1<<1) @@ -160,7 +160,7 @@ static struct { /* { NA, (1 << 4), (5 << 3) }, */ { 48000, (1 << 4), (6 << 3) }, { 9600, (1 << 4), (7 << 3) }, - { 5513, (2 << 4), (0 << 3) }, /* Actually 5512.5 */ + { 5512, (2 << 4), (0 << 3) }, /* Actually 5512.5 */ { 11025, (2 << 4), (1 << 3) }, { 18900, (2 << 4), (2 << 3) }, { 22050, (2 << 4), (3 << 3) }, @@ -628,8 +628,6 @@ to send them to the DBRI. */ -static void dbri_process_interrupt_buffer(struct snd_dbri * dbri); - #define MAXLOOPS 10 /* * Wait for the current command string to execute @@ -669,15 +667,15 @@ static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len) } /* - * Send prepared cmd string. It works by writting a JMP cmd into + * Send prepared cmd string. It works by writting a JUMP cmd into * the last WAIT cmd and force DBRI to reread the cmd. - * The JMP cmd points to the new cmd string. + * The JUMP cmd points to the new cmd string. * It also releases the cmdlock spinlock. */ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) { - s32 *ptr; s32 tmp, addr; + unsigned long flags; static int wait_id = 0; wait_id++; @@ -691,14 +689,17 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) *(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0); #ifdef DBRI_DEBUG - if (cmd > dbri->cmdptr ) + if (cmd > dbri->cmdptr) { + s32 *ptr; + for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) { dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); } - else { - ptr = dbri->cmdptr; + } else { + s32 *ptr = dbri->cmdptr; + dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); - ptr = dbri->cmdptr+1; + ptr++; dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); for (ptr = dbri->dma->cmd; ptr < cmd+2; ptr++) { dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); @@ -706,10 +707,12 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) } #endif + spin_lock_irqsave(&dbri->lock, flags); /* Reread the last command */ tmp = sbus_readl(dbri->regs + REG0); tmp |= D_P; sbus_writel(tmp, dbri->regs + REG0); + spin_unlock_irqrestore(&dbri->lock, flags); dbri->cmdptr = cmd; spin_unlock(&dbri->cmdlock); @@ -1549,8 +1552,7 @@ static int cs4215_prepare(struct snd_dbri * dbri, unsigned int rate, CS4215_BSEL_128 | CS4215_FREQ[freq_idx].xtal; dbri->mm.channels = channels; - /* Stereo bit: 8 bit stereo not working yet. */ - if ((channels > 1) && (dbri->mm.precision == 16)) + if (channels == 2) dbri->mm.ctrl[1] |= CS4215_DFR_STEREO; ret = cs4215_setctrl(dbri); @@ -1624,7 +1626,7 @@ interrupts are disabled. /* xmit_descs() * - * Transmit the current TD's for recording/playing, if needed. + * Starts transmiting the current TD's for recording/playing. * For playback, ALSA has filled the DMA memory with new data (we hope). */ static void xmit_descs(struct snd_dbri *dbri) @@ -1699,9 +1701,9 @@ play: * them as available. Stops when the first descriptor is found without * TBC (Transmit Buffer Complete) set, or we've run through them all. * - * The DMA buffers are not released, but re-used. Since the transmit buffer - * descriptors are not clobbered, they can be re-submitted as is. This is - * done by the xmit_descs() tasklet above since that could take longer. + * The DMA buffers are not released. They form a ring buffer and + * they are filled by ALSA while others are transmitted by DMA. + * */ static void transmission_complete_intr(struct snd_dbri * dbri, int pipe) @@ -1944,8 +1946,8 @@ static struct snd_pcm_hardware snd_dbri_pcm_hw = { SNDRV_PCM_FMTBIT_A_LAW | SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_BE, - .rates = SNDRV_PCM_RATE_8000_48000, - .rate_min = 8000, + .rates = SNDRV_PCM_RATE_8000_48000 | SNDRV_PCM_RATE_5512, + .rate_min = 5512, .rate_max = 48000, .channels_min = 1, .channels_max = 2, @@ -1956,6 +1958,39 @@ static struct snd_pcm_hardware snd_dbri_pcm_hw = { .periods_max = 1024, }; +static int snd_hw_rule_format(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_interval *c = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_CHANNELS); + struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + struct snd_mask fmt; + + snd_mask_any(&fmt); + if (c->min > 1) { + fmt.bits[0] &= SNDRV_PCM_FMTBIT_S16_BE; + return snd_mask_refine(f, &fmt); + } + return 0; +} + +static int snd_hw_rule_channels(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_interval *c = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_CHANNELS); + struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + struct snd_interval ch; + + snd_interval_any(&ch); + if (!(f->bits[0] & SNDRV_PCM_FMTBIT_S16_BE)) { + ch.min = ch.max = 1; + ch.integer = 1; + return snd_interval_refine(c, &ch); + } + return 0; +} + static int snd_dbri_open(struct snd_pcm_substream *substream) { struct snd_dbri *dbri = snd_pcm_substream_chip(substream); @@ -1973,6 +2008,14 @@ static int snd_dbri_open(struct snd_pcm_substream *substream) info->pipe = -1; spin_unlock_irqrestore(&dbri->lock, flags); + snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_CHANNELS, + snd_hw_rule_format, 0, SNDRV_PCM_HW_PARAM_FORMAT, + -1); + snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_FORMAT, + snd_hw_rule_channels, 0, + SNDRV_PCM_HW_PARAM_CHANNELS, + -1); + cs4215_open(dbri); return 0; From 93f09c4cc111506db2ffa6220b7a3d7f73e41aa3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 21 Aug 2006 19:22:45 +0200 Subject: [PATCH 0955/1063] [ALSA] make sound/pci/emu10k1/emu10k1.c:snd_emu10k1_resume() static This patch makes the needlessly global snd_emu10k1_resume() static. Signed-off-by: Adrian Bunk Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/emu10k1/emu10k1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 289bcd99c19c..493ec0816bb3 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -232,7 +232,7 @@ static int snd_emu10k1_suspend(struct pci_dev *pci, pm_message_t state) return 0; } -int snd_emu10k1_resume(struct pci_dev *pci) +static int snd_emu10k1_resume(struct pci_dev *pci) { struct snd_card *card = pci_get_drvdata(pci); struct snd_emu10k1 *emu = card->private_data; From 7376d013fc6d3a45d748e0ce758ca9412b01b9dd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 21 Aug 2006 19:17:46 +0200 Subject: [PATCH 0956/1063] [ALSA] intel_hda: MSI support Simple patch to enable Message Signalled Interrupts for the HDA Intel audio controller. Tested with: Intel Corporation 82801FB/FBM/FR/FW/FRW (ICH6 Family) High Definition Audio Controller (rev 03) MSI is better because it means audio doesn't end up sharing IRQ with USB. Signed-off-by: Stephen Hemminger Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ce75e07aaa2a..c9ae9f778928 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -55,6 +55,7 @@ static char *model; static int position_fix; static int probe_mask = -1; static int single_cmd; +static int disable_msi; module_param(index, int, 0444); MODULE_PARM_DESC(index, "Index value for Intel HD audio interface."); @@ -68,6 +69,8 @@ module_param(probe_mask, int, 0444); MODULE_PARM_DESC(probe_mask, "Bitmask to probe codecs (default = -1)."); module_param(single_cmd, bool, 0444); MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs (for debugging only)."); +module_param(disable_msi, int, 0); +MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); /* just for backward compatibility */ @@ -1418,8 +1421,10 @@ static int azx_free(struct azx *chip) msleep(1); } - if (chip->irq >= 0) + if (chip->irq >= 0) { + pci_disable_msi(chip->pci); free_irq(chip->irq, (void*)chip); + } if (chip->remap_addr) iounmap(chip->remap_addr); @@ -1502,6 +1507,9 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, goto errout; } + if (!disable_msi) + pci_enable_msi(pci); + if (request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED, "HDA Intel", (void*)chip)) { snd_printk(KERN_ERR SFX "unable to grab IRQ %d\n", pci->irq); From 2f3482fbbd5dac7d0e86fe5b7ac5c1e51d52b084 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Aug 2006 18:44:31 +0200 Subject: [PATCH 0957/1063] [ALSA] Add TLV support to AC97 codec driver Added the TLV support to AC97 codec driver for addition of dB range information. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_codec.c | 54 ++++++++++++++++++++++++++++++++----- sound/pci/ac97/ac97_patch.c | 39 +++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index e5d062d640df..c47f43dbd664 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1181,6 +1182,32 @@ static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, return 0; } +/* + * set dB information + */ +static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0); + +static unsigned int *find_db_scale(unsigned int maxval) +{ + switch (maxval) { + case 0x0f: return db_scale_4bit; + case 0x1f: return db_scale_5bit; + case 0x3f: return db_scale_6bit; + } + return NULL; +} + +static void set_tlv_db_scale(struct snd_kcontrol *kctl, unsigned int *tlv) +{ + kctl->tlv.p = tlv; + if (tlv) + kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; +} + /* * create a volume for normal stereo/mono controls */ @@ -1203,6 +1230,10 @@ static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigne tmp.index = ac97->num; kctl = snd_ctl_new1(&tmp, ac97); } + if (reg >= AC97_PHONE && reg <= AC97_PCM) + set_tlv_db_scale(kctl, db_scale_5bit_12db_max); + else + set_tlv_db_scale(kctl, find_db_scale(lo_max)); err = snd_ctl_add(card, kctl); if (err < 0) return err; @@ -1282,6 +1313,7 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 0, &max); kctl->private_value &= ~(0xff << 16); kctl->private_value |= (int)max << 16; + set_tlv_db_scale(kctl, find_db_scale(max)); snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max); } @@ -1295,6 +1327,7 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 8, &max); kctl->private_value &= ~(0xff << 16); kctl->private_value |= (int)max << 16; + set_tlv_db_scale(kctl, find_db_scale(max)); snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max << 8); } @@ -1342,8 +1375,9 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) ((ac97->flags & AC97_HAS_PC_BEEP) || snd_ac97_try_volume_mix(ac97, AC97_PC_BEEP))) { for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_4bit); snd_ac97_write_cache(ac97, AC97_PC_BEEP, snd_ac97_read(ac97, AC97_PC_BEEP) | 0x801e); } @@ -1410,22 +1444,26 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) else init_val = 0x9f1f; for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_5bit); ac97->spec.ad18xx.pcmreg[0] = init_val; if (ac97->scaps & AC97_SCAP_SURROUND_DAC) { for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_5bit); ac97->spec.ad18xx.pcmreg[1] = init_val; } if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) { for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_5bit); for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_5bit); ac97->spec.ad18xx.pcmreg[2] = init_val; } snd_ac97_write_cache(ac97, AC97_PCM, init_val); @@ -1453,16 +1491,18 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) if (err < 0) return err; } - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_rec_gain); snd_ac97_write_cache(ac97, AC97_REC_SEL, 0x0000); snd_ac97_write_cache(ac97, AC97_REC_GAIN, 0x0000); } /* build MIC Capture controls */ if (snd_ac97_try_volume_mix(ac97, AC97_REC_GAIN_MIC)) { for (idx = 0; idx < 2; idx++) - if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0) + if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0) return err; + set_tlv_db_scale(kctl, db_scale_rec_gain); snd_ac97_write_cache(ac97, AC97_REC_GAIN_MIC, 0x0000); } diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 37c6be481c4a..392f6ccace5d 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "ac97_patch.h" #include "ac97_id.h" @@ -51,6 +52,20 @@ static int patch_build_controls(struct snd_ac97 * ac97, const struct snd_kcontro return 0; } +/* replace with a new TLV */ +static void reset_tlv(struct snd_ac97 *ac97, const char *name, + unsigned int *tlv) +{ + struct snd_ctl_elem_id sid; + struct snd_kcontrol *kctl; + memset(&sid, 0, sizeof(sid)); + strcpy(sid.name, name); + sid.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + kctl = snd_ctl_find_id(ac97->bus->card, &sid); + if (kctl && kctl->tlv.p) + kctl->tlv.p = tlv; +} + /* set to the page, update bits and restore the page */ static int ac97_update_bits_page(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value, unsigned short page) { @@ -1522,12 +1537,16 @@ static const struct snd_kcontrol_new snd_ac97_controls_ad1885[] = { AC97_SINGLE("Line Jack Sense", AC97_AD_JACK_SPDIF, 8, 1, 1), /* inverted */ }; +static DECLARE_TLV_DB_SCALE(db_scale_6bit_6db_max, -8850, 150, 0); + static int patch_ad1885_specific(struct snd_ac97 * ac97) { int err; if ((err = patch_build_controls(ac97, snd_ac97_controls_ad1885, ARRAY_SIZE(snd_ac97_controls_ad1885))) < 0) return err; + reset_tlv(ac97, "Headphone Playback Volume", + db_scale_6bit_6db_max); return 0; } @@ -1551,12 +1570,27 @@ int patch_ad1885(struct snd_ac97 * ac97) return 0; } +static int patch_ad1886_specific(struct snd_ac97 * ac97) +{ + reset_tlv(ac97, "Headphone Playback Volume", + db_scale_6bit_6db_max); + return 0; +} + +static struct snd_ac97_build_ops patch_ad1886_build_ops = { + .build_specific = &patch_ad1886_specific, +#ifdef CONFIG_PM + .resume = ad18xx_resume +#endif +}; + int patch_ad1886(struct snd_ac97 * ac97) { patch_ad1881(ac97); /* Presario700 workaround */ /* for Jack Sense/SPDIF Register misetting causing */ snd_ac97_write_cache(ac97, AC97_AD_JACK_SPDIF, 0x0010); + ac97->build_ops = &patch_ad1886_build_ops; return 0; } @@ -2015,6 +2049,8 @@ static const struct snd_kcontrol_new snd_ac97_spdif_controls_alc650[] = { /* AC97_SINGLE("IEC958 Input Monitor", AC97_ALC650_MULTICH, 13, 1, 0), */ }; +static DECLARE_TLV_DB_SCALE(db_scale_5bit_3db_max, -4350, 150, 0); + static int patch_alc650_specific(struct snd_ac97 * ac97) { int err; @@ -2025,6 +2061,9 @@ static int patch_alc650_specific(struct snd_ac97 * ac97) if ((err = patch_build_controls(ac97, snd_ac97_spdif_controls_alc650, ARRAY_SIZE(snd_ac97_spdif_controls_alc650))) < 0) return err; } + if (ac97->id != AC97_ID_ALC650F) + reset_tlv(ac97, "Master Playback Volume", + db_scale_5bit_3db_max); return 0; } From 7058c042001e111c601e1b031d9bcb8b5d392b74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Aug 2006 18:44:54 +0200 Subject: [PATCH 0958/1063] [ALSA] Added TLV support to VIA82xx driver Added the TLV support to VIA82xx driver for addition of dB range information. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/via82xx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index e0e3bfd7a2db..6db3d4cc4d8d 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -1698,21 +1699,29 @@ static int snd_via8233_pcmdxs_volume_put(struct snd_kcontrol *kcontrol, return change; } +static DECLARE_TLV_DB_SCALE(db_scale_dxs, -9450, 150, 1); + static struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control __devinitdata = { .name = "PCM Playback Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .info = snd_via8233_dxs_volume_info, .get = snd_via8233_pcmdxs_volume_get, .put = snd_via8233_pcmdxs_volume_put, + .tlv = { .p = db_scale_dxs } }; static struct snd_kcontrol_new snd_via8233_dxs_volume_control __devinitdata = { .name = "VIA DXS Playback Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .count = 4, .info = snd_via8233_dxs_volume_info, .get = snd_via8233_dxs_volume_get, .put = snd_via8233_dxs_volume_put, + .tlv = { .p = db_scale_dxs } }; /* From 9107226d2ca8a15534da96313a1d370fb1eb8f9e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 12:04:34 +0200 Subject: [PATCH 0959/1063] [ALSA] Add dB scale information to ak4531 codec Added the dB scale information to ak4531 codec driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ak4531_codec.c | 49 ++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/sound/pci/ac97/ak4531_codec.c b/sound/pci/ac97/ak4531_codec.c index 94c26ec05882..c153cb79c518 100644 --- a/sound/pci/ac97/ak4531_codec.c +++ b/sound/pci/ac97/ak4531_codec.c @@ -27,6 +27,7 @@ #include #include +#include MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Universal routines for AK4531 codec"); @@ -63,6 +64,14 @@ static void snd_ak4531_dump(struct snd_ak4531 *ak4531) .info = snd_ak4531_info_single, \ .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22) } +#define AK4531_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_ak4531_info_single, \ + .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \ + .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22), \ + .tlv = { .p = (xtlv) } } static int snd_ak4531_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -122,6 +131,14 @@ static int snd_ak4531_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_e .info = snd_ak4531_info_double, \ .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22) } +#define AK4531_DOUBLE_TLV(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_ak4531_info_double, \ + .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \ + .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22), \ + .tlv = { .p = (xtlv) } } static int snd_ak4531_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -250,50 +267,62 @@ static int snd_ak4531_put_input_sw(struct snd_kcontrol *kcontrol, struct snd_ctl return change; } +static DECLARE_TLV_DB_SCALE(db_scale_master, -6200, 200, 0); +static DECLARE_TLV_DB_SCALE(db_scale_mono, -2800, 400, 0); +static DECLARE_TLV_DB_SCALE(db_scale_input, -5000, 200, 0); + static struct snd_kcontrol_new snd_ak4531_controls[] = { -AK4531_DOUBLE("Master Playback Switch", 0, AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1), +AK4531_DOUBLE_TLV("Master Playback Switch", 0, + AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1, + db_scale_master), AK4531_DOUBLE("Master Playback Volume", 0, AK4531_LMASTER, AK4531_RMASTER, 0, 0, 0x1f, 1), -AK4531_SINGLE("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1), +AK4531_SINGLE_TLV("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1, + db_scale_mono), AK4531_SINGLE("Master Mono Playback Volume", 0, AK4531_MONO_OUT, 0, 0x07, 1), AK4531_DOUBLE("PCM Switch", 0, AK4531_LVOICE, AK4531_RVOICE, 7, 7, 1, 1), -AK4531_DOUBLE("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1), +AK4531_DOUBLE_TLV("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1, + db_scale_input), AK4531_DOUBLE("PCM Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 3, 2, 1, 0), AK4531_DOUBLE("PCM Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 2, 2, 1, 0), AK4531_DOUBLE("PCM Switch", 1, AK4531_LFM, AK4531_RFM, 7, 7, 1, 1), -AK4531_DOUBLE("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1), +AK4531_DOUBLE_TLV("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1, + db_scale_input), AK4531_DOUBLE("PCM Playback Switch", 1, AK4531_OUT_SW1, AK4531_OUT_SW1, 6, 5, 1, 0), AK4531_INPUT_SW("PCM Capture Route", 1, AK4531_LIN_SW1, AK4531_RIN_SW1, 6, 5), AK4531_DOUBLE("CD Switch", 0, AK4531_LCD, AK4531_RCD, 7, 7, 1, 1), -AK4531_DOUBLE("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1), +AK4531_DOUBLE_TLV("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1, + db_scale_input), AK4531_DOUBLE("CD Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 2, 1, 1, 0), AK4531_INPUT_SW("CD Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 2, 1), AK4531_DOUBLE("Line Switch", 0, AK4531_LLINE, AK4531_RLINE, 7, 7, 1, 1), -AK4531_DOUBLE("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1), +AK4531_DOUBLE_TLV("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1, + db_scale_input), AK4531_DOUBLE("Line Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 4, 3, 1, 0), AK4531_INPUT_SW("Line Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 4, 3), AK4531_DOUBLE("Aux Switch", 0, AK4531_LAUXA, AK4531_RAUXA, 7, 7, 1, 1), -AK4531_DOUBLE("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1), +AK4531_DOUBLE_TLV("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1, + db_scale_input), AK4531_DOUBLE("Aux Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 5, 4, 1, 0), AK4531_INPUT_SW("Aux Capture Route", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 4, 3), AK4531_SINGLE("Mono Switch", 0, AK4531_MONO1, 7, 1, 1), -AK4531_SINGLE("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1), +AK4531_SINGLE_TLV("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1, db_scale_input), AK4531_SINGLE("Mono Playback Switch", 0, AK4531_OUT_SW2, 0, 1, 0), AK4531_DOUBLE("Mono Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 0, 0, 1, 0), AK4531_SINGLE("Mono Switch", 1, AK4531_MONO2, 7, 1, 1), -AK4531_SINGLE("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1), +AK4531_SINGLE_TLV("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1, db_scale_input), AK4531_SINGLE("Mono Playback Switch", 1, AK4531_OUT_SW2, 1, 1, 0), AK4531_DOUBLE("Mono Capture Switch", 1, AK4531_LIN_SW2, AK4531_RIN_SW2, 1, 1, 1, 0), -AK4531_SINGLE("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1), +AK4531_SINGLE_TLV("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1, db_scale_input), AK4531_SINGLE("Mic Switch", 0, AK4531_MIC, 7, 1, 1), AK4531_SINGLE("Mic Playback Switch", 0, AK4531_OUT_SW1, 0, 1, 0), AK4531_DOUBLE("Mic Capture Switch", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 0, 0, 1, 0), From 9f6ab25063f04597e02968ae8393e8f4703c1563 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 12:14:25 +0200 Subject: [PATCH 0960/1063] [ALSA] Add dB scale information to cs4281 driver Added the dB scale information to cs4281 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/cs4281.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index 9631456ec3de..1990430a21c1 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -1054,6 +1055,8 @@ static int snd_cs4281_put_volume(struct snd_kcontrol *kcontrol, return change; } +static DECLARE_TLV_DB_SCALE(db_scale_dsp, -4650, 150, 0); + static struct snd_kcontrol_new snd_cs4281_fm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1062,6 +1065,7 @@ static struct snd_kcontrol_new snd_cs4281_fm_vol = .get = snd_cs4281_get_volume, .put = snd_cs4281_put_volume, .private_value = ((BA0_FMLVC << 16) | BA0_FMRVC), + .tlv = { .p = db_scale_dsp }, }; static struct snd_kcontrol_new snd_cs4281_pcm_vol = @@ -1072,6 +1076,7 @@ static struct snd_kcontrol_new snd_cs4281_pcm_vol = .get = snd_cs4281_get_volume, .put = snd_cs4281_put_volume, .private_value = ((BA0_PPLVC << 16) | BA0_PPRVC), + .tlv = { .p = db_scale_dsp }, }; static void snd_cs4281_mixer_free_ac97_bus(struct snd_ac97_bus *bus) From 666c70ffd1c4be795de988f26a8ab13524d4ed47 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 12:32:06 +0200 Subject: [PATCH 0961/1063] [ALSA] Add dB scale information to fm801 driver Added the dB scale information to fm801 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/fm801.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index f3f2b2c99723..bdfda1997d5b 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1053,6 +1054,13 @@ static int snd_fm801_put_single(struct snd_kcontrol *kcontrol, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_fm801_info_double, \ .get = snd_fm801_get_double, .put = snd_fm801_put_double, \ .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24) } +#define FM801_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .info = snd_fm801_info_double, \ + .get = snd_fm801_get_double, .put = snd_fm801_put_double, \ + .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \ + .tlv = { .p = (xtlv) } } static int snd_fm801_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) @@ -1149,14 +1157,19 @@ static int snd_fm801_put_mux(struct snd_kcontrol *kcontrol, return snd_fm801_update_bits(chip, FM801_REC_SRC, 7, val); } +static DECLARE_TLV_DB_SCALE(db_scale_dsp, -3450, 150, 0); + #define FM801_CONTROLS ARRAY_SIZE(snd_fm801_controls) static struct snd_kcontrol_new snd_fm801_controls[] __devinitdata = { -FM801_DOUBLE("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1), +FM801_DOUBLE_TLV("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1, + db_scale_dsp), FM801_SINGLE("Wave Playback Switch", FM801_PCM_VOL, 15, 1, 1), -FM801_DOUBLE("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1), +FM801_DOUBLE_TLV("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1, + db_scale_dsp), FM801_SINGLE("I2S Playback Switch", FM801_I2S_VOL, 15, 1, 1), -FM801_DOUBLE("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1), +FM801_DOUBLE_TLV("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1, + db_scale_dsp), FM801_SINGLE("FM Playback Switch", FM801_FM_VOL, 15, 1, 1), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, From a0aef8edfc9d6d682dba557fe42599297cbc329a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 13:01:37 +0200 Subject: [PATCH 0962/1063] [ALSA] Add dB scale information to trident driver Added the dB scale information to trident driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/trident/trident_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c index 4930cc6b054d..ebbe12d78d8c 100644 --- a/sound/pci/trident/trident_main.c +++ b/sound/pci/trident/trident_main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -2627,6 +2628,8 @@ static int snd_trident_vol_control_get(struct snd_kcontrol *kcontrol, return 0; } +static DECLARE_TLV_DB_SCALE(db_scale_gvol, -6375, 25, 0); + static int snd_trident_vol_control_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -2653,6 +2656,7 @@ static struct snd_kcontrol_new snd_trident_vol_music_control __devinitdata = .get = snd_trident_vol_control_get, .put = snd_trident_vol_control_put, .private_value = 16, + .tlv = { .p = db_scale_gvol }, }; static struct snd_kcontrol_new snd_trident_vol_wave_control __devinitdata = @@ -2663,6 +2667,7 @@ static struct snd_kcontrol_new snd_trident_vol_wave_control __devinitdata = .get = snd_trident_vol_control_get, .put = snd_trident_vol_control_put, .private_value = 0, + .tlv = { .p = db_scale_gvol }, }; /*--------------------------------------------------------------------------- @@ -2730,6 +2735,7 @@ static struct snd_kcontrol_new snd_trident_pcm_vol_control __devinitdata = .info = snd_trident_pcm_vol_control_info, .get = snd_trident_pcm_vol_control_get, .put = snd_trident_pcm_vol_control_put, + /* FIXME: no tlv yet */ }; /*--------------------------------------------------------------------------- @@ -2839,6 +2845,8 @@ static int snd_trident_pcm_rvol_control_put(struct snd_kcontrol *kcontrol, return change; } +static DECLARE_TLV_DB_SCALE(db_scale_crvol, -3175, 25, 1); + static struct snd_kcontrol_new snd_trident_pcm_rvol_control __devinitdata = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -2848,6 +2856,7 @@ static struct snd_kcontrol_new snd_trident_pcm_rvol_control __devinitdata = .info = snd_trident_pcm_rvol_control_info, .get = snd_trident_pcm_rvol_control_get, .put = snd_trident_pcm_rvol_control_put, + .tlv = { .p = db_scale_crvol }, }; /*--------------------------------------------------------------------------- @@ -2903,6 +2912,7 @@ static struct snd_kcontrol_new snd_trident_pcm_cvol_control __devinitdata = .info = snd_trident_pcm_cvol_control_info, .get = snd_trident_pcm_cvol_control_get, .put = snd_trident_pcm_cvol_control_put, + .tlv = { .p = db_scale_crvol }, }; static void snd_trident_notify_pcm_change1(struct snd_card *card, From fb567a8e4f077b7b084c0558706339c35a4fb186 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 13:07:19 +0200 Subject: [PATCH 0963/1063] [ALSA] Add dB scale information to dummy driver Added the dB scale information to dummy driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/drivers/dummy.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 73b16134a434..42001efa9f3e 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -443,10 +444,13 @@ static int __init snd_card_dummy_pcm(struct snd_dummy *dummy, int device, int su } #define DUMMY_VOLUME(xname, xindex, addr) \ -{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ .info = snd_dummy_volume_info, \ .get = snd_dummy_volume_get, .put = snd_dummy_volume_put, \ - .private_value = addr } + .private_value = addr, \ + .tlv = { .p = db_scale_dummy } } static int snd_dummy_volume_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) @@ -497,6 +501,8 @@ static int snd_dummy_volume_put(struct snd_kcontrol *kcontrol, return change; } +static DECLARE_TLV_DB_SCALE(db_scale_dummy, -4500, 30, 0); + #define DUMMY_CAPSRC(xname, xindex, addr) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_dummy_capsrc_info, \ From 0e7febf15851fb438b9518654340d1f704d202e5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2006 13:16:01 +0200 Subject: [PATCH 0964/1063] [ALSA] Add dB scale information to ad1816a driver Added the dB scale information to ad1816a driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/isa/ad1816a/ad1816a_lib.c | 55 ++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c index 8fcf2c151823..fd9b61eda0f3 100644 --- a/sound/isa/ad1816a/ad1816a_lib.c +++ b/sound/isa/ad1816a/ad1816a_lib.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -765,6 +766,13 @@ static int snd_ad1816a_put_mux(struct snd_kcontrol *kcontrol, struct snd_ctl_ele return change; } +#define AD1816A_SINGLE_TLV(xname, reg, shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .info = snd_ad1816a_info_single, \ + .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \ + .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \ + .tlv = { .p = (xtlv) } } #define AD1816A_SINGLE(xname, reg, shift, mask, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_single, \ .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \ @@ -822,6 +830,14 @@ static int snd_ad1816a_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_ return change; } +#define AD1816A_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .info = snd_ad1816a_info_double, \ + .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \ + .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \ + .tlv = { .p = (xtlv) } } + #define AD1816A_DOUBLE(xname, reg, shift_left, shift_right, mask, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_double, \ .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \ @@ -890,28 +906,44 @@ static int snd_ad1816a_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_ return change; } +static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0); + static struct snd_kcontrol_new snd_ad1816a_controls[] __devinitdata = { AD1816A_DOUBLE("Master Playback Switch", AD1816A_MASTER_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1), +AD1816A_DOUBLE_TLV("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1, + db_scale_5bit), AD1816A_DOUBLE("PCM Playback Switch", AD1816A_VOICE_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1), +AD1816A_DOUBLE_TLV("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1, + db_scale_6bit), AD1816A_DOUBLE("Line Playback Switch", AD1816A_LINE_GAIN_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1), +AD1816A_DOUBLE_TLV("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1, + db_scale_5bit_12db_max), AD1816A_DOUBLE("CD Playback Switch", AD1816A_CD_GAIN_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1), +AD1816A_DOUBLE_TLV("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1, + db_scale_5bit_12db_max), AD1816A_DOUBLE("Synth Playback Switch", AD1816A_SYNTH_GAIN_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1), +AD1816A_DOUBLE_TLV("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1, + db_scale_5bit_12db_max), AD1816A_DOUBLE("FM Playback Switch", AD1816A_FM_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1), +AD1816A_DOUBLE_TLV("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1, + db_scale_6bit), AD1816A_SINGLE("Mic Playback Switch", AD1816A_MIC_GAIN_ATT, 15, 1, 1), -AD1816A_SINGLE("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1), +AD1816A_SINGLE_TLV("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1, + db_scale_5bit_12db_max), AD1816A_SINGLE("Mic Boost", AD1816A_MIC_GAIN_ATT, 14, 1, 0), AD1816A_DOUBLE("Video Playback Switch", AD1816A_VID_GAIN_ATT, 15, 7, 1, 1), -AD1816A_DOUBLE("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1), +AD1816A_DOUBLE_TLV("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1, + db_scale_5bit_12db_max), AD1816A_SINGLE("Phone Capture Switch", AD1816A_PHONE_IN_GAIN_ATT, 15, 1, 1), -AD1816A_SINGLE("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1), +AD1816A_SINGLE_TLV("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1, + db_scale_4bit), AD1816A_SINGLE("Phone Playback Switch", AD1816A_PHONE_OUT_ATT, 7, 1, 1), -AD1816A_SINGLE("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1), +AD1816A_SINGLE_TLV("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1, + db_scale_5bit), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Source", @@ -920,7 +952,8 @@ AD1816A_SINGLE("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1), .put = snd_ad1816a_put_mux, }, AD1816A_DOUBLE("Capture Switch", AD1816A_ADC_PGA, 15, 7, 1, 1), -AD1816A_DOUBLE("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0), +AD1816A_DOUBLE_TLV("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0, + db_scale_rec_gain), AD1816A_SINGLE("3D Control - Switch", AD1816A_3D_PHAT_CTRL, 15, 1, 1), AD1816A_SINGLE("3D Control - Level", AD1816A_3D_PHAT_CTRL, 0, 15, 0), }; From eac06a10d2b814dfacc36a8fff35ef07bf4eec8e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2006 13:16:25 +0200 Subject: [PATCH 0965/1063] [ALSA] Add dB scale information to ad1848 driver Added the dB scale information to ad1848 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/ad1848.h | 22 +++++++++++----- sound/isa/ad1848/ad1848_lib.c | 49 ++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/include/sound/ad1848.h b/include/sound/ad1848.h index 57af1fe7b309..c8de6f83338f 100644 --- a/include/sound/ad1848.h +++ b/include/sound/ad1848.h @@ -179,14 +179,13 @@ enum { AD1848_MIX_SINGLE, AD1848_MIX_DOUBLE, AD1848_MIX_CAPTURE }; #define AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) \ ((left_reg) | ((right_reg) << 8) | ((shift_left) << 16) | ((shift_right) << 19) | ((mask) << 24) | ((invert) << 22)) -int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value); - /* for ease of use */ struct ad1848_mix_elem { const char *name; int index; int type; unsigned long private_value; + unsigned int *tlv; }; #define AD1848_SINGLE(xname, xindex, reg, shift, mask, invert) \ @@ -195,15 +194,26 @@ struct ad1848_mix_elem { .type = AD1848_MIX_SINGLE, \ .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert) } +#define AD1848_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ +{ .name = xname, \ + .index = xindex, \ + .type = AD1848_MIX_SINGLE, \ + .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert), \ + .tlv = xtlv } + #define AD1848_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \ { .name = xname, \ .index = xindex, \ .type = AD1848_MIX_DOUBLE, \ .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) } -static inline int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c) -{ - return snd_ad1848_add_ctl(chip, c->name, c->index, c->type, c->private_value); -} +#define AD1848_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \ +{ .name = xname, \ + .index = xindex, \ + .type = AD1848_MIX_DOUBLE, \ + .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert), \ + .tlv = xtlv } + +int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c); #endif /* __SOUND_AD1848_H */ diff --git a/sound/isa/ad1848/ad1848_lib.c b/sound/isa/ad1848/ad1848_lib.c index e711f87d5fd1..a6fbd5d1d62f 100644 --- a/sound/isa/ad1848/ad1848_lib.c +++ b/sound/isa/ad1848/ad1848_lib.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,8 @@ void snd_ad1848_out(struct snd_ad1848 *chip, #endif } +EXPORT_SYMBOL(snd_ad1848_out); + static void snd_ad1848_dout(struct snd_ad1848 *chip, unsigned char reg, unsigned char value) { @@ -941,6 +944,8 @@ int snd_ad1848_create(struct snd_card *card, return 0; } +EXPORT_SYMBOL(snd_ad1848_create); + static struct snd_pcm_ops snd_ad1848_playback_ops = { .open = snd_ad1848_playback_open, .close = snd_ad1848_playback_close, @@ -988,12 +993,16 @@ int snd_ad1848_pcm(struct snd_ad1848 *chip, int device, struct snd_pcm **rpcm) return 0; } +EXPORT_SYMBOL(snd_ad1848_pcm); + const struct snd_pcm_ops *snd_ad1848_get_pcm_ops(int direction) { return direction == SNDRV_PCM_STREAM_PLAYBACK ? &snd_ad1848_playback_ops : &snd_ad1848_capture_ops; } +EXPORT_SYMBOL(snd_ad1848_get_pcm_ops); + /* * MIXER part */ @@ -1171,7 +1180,8 @@ static int snd_ad1848_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_e /* */ -int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value) +int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, + const struct ad1848_mix_elem *c) { static struct snd_kcontrol_new newctls[] = { [AD1848_MIX_SINGLE] = { @@ -1196,32 +1206,46 @@ int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int struct snd_kcontrol *ctl; int err; - ctl = snd_ctl_new1(&newctls[type], chip); + ctl = snd_ctl_new1(&newctls[c->type], chip); if (! ctl) return -ENOMEM; - strlcpy(ctl->id.name, name, sizeof(ctl->id.name)); - ctl->id.index = index; - ctl->private_value = value; + strlcpy(ctl->id.name, c->name, sizeof(ctl->id.name)); + ctl->id.index = c->index; + ctl->private_value = c->private_value; + if (c->tlv) { + ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ; + ctl->tlv.p = c->tlv; + } if ((err = snd_ctl_add(chip->card, ctl)) < 0) return err; return 0; } +EXPORT_SYMBOL(snd_ad1848_add_ctl_elem); + +static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0); static struct ad1848_mix_elem snd_ad1848_controls[] = { AD1848_DOUBLE("PCM Playback Switch", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 7, 7, 1, 1), -AD1848_DOUBLE("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1), +AD1848_DOUBLE_TLV("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1, + db_scale_6bit), AD1848_DOUBLE("Aux Playback Switch", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 7, 7, 1, 1), -AD1848_DOUBLE("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1), +AD1848_DOUBLE_TLV("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1, + db_scale_5bit_12db_max), AD1848_DOUBLE("Aux Playback Switch", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 7, 7, 1, 1), -AD1848_DOUBLE("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1), -AD1848_DOUBLE("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0), +AD1848_DOUBLE_TLV("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1, + db_scale_5bit_12db_max), +AD1848_DOUBLE_TLV("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0, + db_scale_rec_gain), { .name = "Capture Source", .type = AD1848_MIX_CAPTURE, }, AD1848_SINGLE("Loopback Capture Switch", 0, AD1848_LOOPBACK, 0, 1, 0), -AD1848_SINGLE("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0) +AD1848_SINGLE_TLV("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0, + db_scale_6bit), }; int snd_ad1848_mixer(struct snd_ad1848 *chip) @@ -1245,12 +1269,7 @@ int snd_ad1848_mixer(struct snd_ad1848 *chip) return 0; } -EXPORT_SYMBOL(snd_ad1848_out); -EXPORT_SYMBOL(snd_ad1848_create); -EXPORT_SYMBOL(snd_ad1848_pcm); -EXPORT_SYMBOL(snd_ad1848_get_pcm_ops); EXPORT_SYMBOL(snd_ad1848_mixer); -EXPORT_SYMBOL(snd_ad1848_add_ctl); /* * INIT part From a1c7a7d890634eaec106e5fb3a7d9c92b8f85b0d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2006 13:16:39 +0200 Subject: [PATCH 0966/1063] [ALSA] Add dB scale information to opl3sa2 driver Added the dB scale information to opl3sa2 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/isa/opl3sa2.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index 4031b61b797f..da92bf6c392b 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -337,6 +338,14 @@ static irqreturn_t snd_opl3sa2_interrupt(int irq, void *dev_id, struct pt_regs * .info = snd_opl3sa2_info_single, \ .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \ .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24) } +#define OPL3SA2_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_opl3sa2_info_single, \ + .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \ + .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \ + .tlv = { .p = (xtlv) } } static int snd_opl3sa2_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -395,6 +404,14 @@ static int snd_opl3sa2_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_ .info = snd_opl3sa2_info_double, \ .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \ .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22) } +#define OPL3SA2_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ + .name = xname, .index = xindex, \ + .info = snd_opl3sa2_info_double, \ + .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \ + .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \ + .tlv = { .p = (xtlv) } } static int snd_opl3sa2_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -469,11 +486,16 @@ static int snd_opl3sa2_put_double(struct snd_kcontrol *kcontrol, struct snd_ctl_ return change; } +static DECLARE_TLV_DB_SCALE(db_scale_master, -3000, 200, 0); +static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0); + static struct snd_kcontrol_new snd_opl3sa2_controls[] = { OPL3SA2_DOUBLE("Master Playback Switch", 0, 0x07, 0x08, 7, 7, 1, 1), -OPL3SA2_DOUBLE("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1), +OPL3SA2_DOUBLE_TLV("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1, + db_scale_master), OPL3SA2_SINGLE("Mic Playback Switch", 0, 0x09, 7, 1, 1), -OPL3SA2_SINGLE("Mic Playback Volume", 0, 0x09, 0, 31, 1) +OPL3SA2_SINGLE_TLV("Mic Playback Volume", 0, 0x09, 0, 31, 1, + db_scale_5bit_12db_max), }; static struct snd_kcontrol_new snd_opl3sa2_tone_controls[] = { From bab282b912baf372d8f705357946ef691b621899 Mon Sep 17 00:00:00 2001 From: Vladimir Avdonin Date: Tue, 22 Aug 2006 13:31:58 +0200 Subject: [PATCH 0967/1063] [ALSA] hda-codec - Fix for Acer laptops with ALC883 codec Patch enables the internal speaker on acer laptops with ALC883. Signed-off-by: Vladimir Avdonin Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/patch_realtek.c | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index d7e95f144569..504ebbceafbc 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -820,6 +820,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. 3stack-6ch 3-jack 6-channel 3stack-6ch-dig 3-jack 6-channel with SPDIF I/O 6stack-dig-demo 6-jack digital for Intel demo board + acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc) auto auto-config reading BIOS (default) ALC861/660 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53aa57f5a1a1..65903812b307 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -111,6 +111,7 @@ enum { ALC883_3ST_6ch, ALC883_6ST_DIG, ALC888_DEMO_BOARD, + ALC883_ACER, ALC883_AUTO, ALC883_MODEL_LAST, }; @@ -5069,6 +5070,9 @@ static struct hda_board_config alc883_cfg_tbl[] = { { .pci_subvendor = 0x105b, .pci_subdevice = 0x6668, .config = ALC883_6ST_DIG }, /* Foxconn */ { .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD }, + { .modelname = "acer", .config = ALC883_ACER }, + { .pci_subvendor = 0x1025, .pci_subdevice = 0/*0x0102*/, + .config = ALC883_ACER }, { .modelname = "auto", .config = ALC883_AUTO }, {} }; @@ -5139,6 +5143,23 @@ static struct alc_config_preset alc883_presets[] = { .channel_mode = alc883_sixstack_modes, .input_mux = &alc883_capture_source, }, + [ALC883_ACER] = { + .mixers = { alc883_base_mixer, + alc883_chmode_mixer }, + /* On TravelMate laptops, GPIO 0 enables the internal speaker + * and the headphone jack. Turn this on and rely on the + * standard mute methods whenever the user wants to turn + * these outputs off. + */ + .init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs }, + .num_dacs = ARRAY_SIZE(alc883_dac_nids), + .dac_nids = alc883_dac_nids, + .num_adc_nids = ARRAY_SIZE(alc883_adc_nids), + .adc_nids = alc883_adc_nids, + .num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes), + .channel_mode = alc883_3ST_2ch_modes, + .input_mux = &alc883_capture_source, + }, }; From 7b89190cf6ecd5075c272b4ec12f65a4ce45a762 Mon Sep 17 00:00:00 2001 From: Magnus Sandin Date: Tue, 22 Aug 2006 13:33:12 +0200 Subject: [PATCH 0968/1063] [ALSA] ac97 - Enable S/PDIF on ASUS P5P800-VM mobo The attached patch will force building the S/PDIF controls on the PCU SSID for Asus P5P800-VM motherboard, even if the AC97_EI_SPDIF bit is not set. Signed-off-by: Magnus Sandin Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_codec.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index c47f43dbd664..a79e91850ba3 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -1573,6 +1573,12 @@ static int snd_ac97_mixer_build(struct snd_ac97 * ac97) } /* build S/PDIF controls */ + + /* Hack for ASUS P5P800-VM, which does not indicate S/PDIF capability */ + if (ac97->subsystem_vendor == 0x1043 && + ac97->subsystem_device == 0x810f) + ac97->ext_id |= AC97_EI_SPDIF; + if ((ac97->ext_id & AC97_EI_SPDIF) && !(ac97->scaps & AC97_SCAP_NO_SPDIF)) { if (ac97->build_ops->build_spdif) { if ((err = ac97->build_ops->build_spdif(ac97)) < 0) From 6d8590650eb81d2c869c7adf4b469071cec11eee Mon Sep 17 00:00:00 2001 From: Guillaume Munch Date: Tue, 22 Aug 2006 17:15:47 +0200 Subject: [PATCH 0969/1063] [ALSA] hda-codec - Support for SigmaTel 9872 - AR11M and AR11S uses the same chip hence we claim to support the AR Series. - Added commentary about STAC9225s which shares the same id as CXD9872RD. - Added entry for 7662 but won't work automatically until pci_subdevice is known. - 'vaio' model now corresponds to CXD9872RD_VAIO for backward compat. - Replaced STAC766x_VAIO with CXD9872RD_VAIO, STAC9872AK_VAIO, STAC9872K_VAIO and CXD9872AKD_VAIO - Added 'vaio-ar' model for potential future modifications. Signed-off-by: Guillaume Munch Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 5 +- sound/pci/hda/patch_sigmatel.c | 107 +++++++++++++++--- 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 504ebbceafbc..48d3bdf2a7cd 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -859,8 +859,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. laptop-dig ditto with SPDIF auto auto-config reading BIOS (default) - STAC7664/7661(?) - vaio Setup for VAIO FE550G/SZ110/AR11B + STAC9872 + vaio Setup for VAIO FE550G/SZ110 + vaio-ar Setup for VAIO AR If the default configuration doesn't work and one of the above matches with your device, report it together with the PCI diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 73ca566e9eb7..139d73e18a3c 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1563,7 +1563,7 @@ static int patch_stac9205(struct hda_codec *codec) } /* - * STAC 7661(?) and 7664 hack + * STAC9872 hack */ /* static config for Sony VAIO FE550G and Sony VAIO AR */ @@ -1597,6 +1597,23 @@ static struct hda_verb vaio_init[] = { {} }; +static struct hda_verb vaio_ar_init[] = { + {0x0a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP }, /* HP <- 0x2 */ + {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT }, /* Speaker <- 0x5 */ + {0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? (<- 0x2) */ + {0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN }, /* CD */ +/* {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },*/ /* Optical Out */ + {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? */ + {0x15, AC_VERB_SET_CONNECT_SEL, 0x2}, /* mic-sel: 0a,0d,14,02 */ + {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* HP */ + {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* Speaker */ +/* {0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},*/ /* Optical Out */ + {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* capture sw/vol -> 0x8 */ + {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, /* CD-in -> 0x6 */ + {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Mic-in -> 0x9 */ + {} +}; + /* bind volumes of both NID 0x02 and 0x05 */ static int vaio_master_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -1667,7 +1684,40 @@ static struct snd_kcontrol_new vaio_mixer[] = { {} }; -static struct hda_codec_ops stac766x_patch_ops = { +static struct snd_kcontrol_new vaio_ar_mixer[] = { + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Master Playback Volume", + .info = snd_hda_mixer_amp_volume_info, + .get = snd_hda_mixer_amp_volume_get, + .put = vaio_master_vol_put, + .private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT), + }, + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Master Playback Switch", + .info = snd_hda_mixer_amp_switch_info, + .get = snd_hda_mixer_amp_switch_get, + .put = vaio_master_sw_put, + .private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT), + }, + /* HDA_CODEC_VOLUME("CD Capture Volume", 0x07, 0, HDA_INPUT), */ + HDA_CODEC_VOLUME("Capture Volume", 0x09, 0, HDA_INPUT), + HDA_CODEC_MUTE("Capture Switch", 0x09, 0, HDA_INPUT), + /*HDA_CODEC_MUTE("Optical Out Switch", 0x10, 0, HDA_OUTPUT), + HDA_CODEC_VOLUME("Optical Out Volume", 0x10, 0, HDA_OUTPUT),*/ + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Capture Source", + .count = 1, + .info = stac92xx_mux_enum_info, + .get = stac92xx_mux_enum_get, + .put = stac92xx_mux_enum_put, + }, + {} +}; + +static struct hda_codec_ops stac9872_patch_ops = { .build_controls = stac92xx_build_controls, .build_pcms = stac92xx_build_pcms, .init = stac92xx_init, @@ -1677,25 +1727,34 @@ static struct hda_codec_ops stac766x_patch_ops = { #endif }; -enum { STAC766x_VAIO }; +enum { /* FE and SZ series. id=0x83847661 and subsys=0x104D0700 or 104D1000. */ + CXD9872RD_VAIO, + /* Unknown. id=0x83847662 and subsys=0x104D1200 or 104D1000. */ + STAC9872AK_VAIO, + /* Unknown. id=0x83847661 and subsys=0x104D1200. */ + STAC9872K_VAIO, + /* AR Series. id=0x83847664 and subsys=104D1300 */ + CXD9872AKD_VAIO + }; -static struct hda_board_config stac766x_cfg_tbl[] = { - { .modelname = "vaio", .config = STAC766x_VAIO }, +static struct hda_board_config stac9872_cfg_tbl[] = { + { .modelname = "vaio", .config = CXD9872RD_VAIO }, + { .modelname = "vaio-ar", .config = CXD9872AKD_VAIO }, { .pci_subvendor = 0x104d, .pci_subdevice = 0x81e6, - .config = STAC766x_VAIO }, + .config = CXD9872RD_VAIO }, { .pci_subvendor = 0x104d, .pci_subdevice = 0x81ef, - .config = STAC766x_VAIO }, + .config = CXD9872RD_VAIO }, { .pci_subvendor = 0x104d, .pci_subdevice = 0x81fd, - .config = STAC766x_VAIO }, + .config = CXD9872AKD_VAIO }, {} }; -static int patch_stac766x(struct hda_codec *codec) +static int patch_stac9872(struct hda_codec *codec) { struct sigmatel_spec *spec; int board_config; - board_config = snd_hda_check_board_config(codec, stac766x_cfg_tbl); + board_config = snd_hda_check_board_config(codec, stac9872_cfg_tbl); if (board_config < 0) /* unknown config, let generic-parser do its job... */ return snd_hda_parse_generic_codec(codec); @@ -1706,7 +1765,9 @@ static int patch_stac766x(struct hda_codec *codec) codec->spec = spec; switch (board_config) { - case STAC766x_VAIO: + case CXD9872RD_VAIO: + case STAC9872AK_VAIO: + case STAC9872K_VAIO: spec->mixer = vaio_mixer; spec->init = vaio_init; spec->multiout.max_channels = 2; @@ -1718,9 +1779,22 @@ static int patch_stac766x(struct hda_codec *codec) spec->input_mux = &vaio_mux; spec->mux_nids = vaio_mux_nids; break; + + case CXD9872AKD_VAIO: + spec->mixer = vaio_ar_mixer; + spec->init = vaio_ar_init; + spec->multiout.max_channels = 2; + spec->multiout.num_dacs = ARRAY_SIZE(vaio_dacs); + spec->multiout.dac_nids = vaio_dacs; + spec->multiout.hp_nid = VAIO_HP_DAC; + spec->num_adcs = ARRAY_SIZE(vaio_adcs); + spec->adc_nids = vaio_adcs; + spec->input_mux = &vaio_mux; + spec->mux_nids = vaio_mux_nids; + break; } - codec->patch_ops = stac766x_patch_ops; + codec->patch_ops = stac9872_patch_ops; return 0; } @@ -1752,7 +1826,13 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x83847627, .name = "STAC9271D", .patch = patch_stac927x }, { .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x }, { .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x }, - { .id = 0x83847661, .name = "STAC7661", .patch = patch_stac766x }, + /* The following does not take into account .id=0x83847661 when subsys = + * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are + * currently not fully supported. + */ + { .id = 0x83847661, .name = "CXD9872RD/K", .patch = patch_stac9872 }, + { .id = 0x83847662, .name = "STAC9872AK", .patch = patch_stac9872 }, + { .id = 0x83847664, .name = "CXD9872AKD", .patch = patch_stac9872 }, { .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 }, { .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 }, { .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 }, @@ -1761,6 +1841,5 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = { { .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 }, { .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 }, { .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 }, - { .id = 0x83847664, .name = "STAC7664", .patch = patch_stac766x }, {} /* terminator */ }; From 11b44bbde52b6c50ed8c9ba579d7ee9ff5b48cd8 Mon Sep 17 00:00:00 2001 From: Richard Fish Date: Wed, 23 Aug 2006 18:31:34 +0200 Subject: [PATCH 0970/1063] [ALSA] hda-codec - restore HDA sigmatel pin configs on resume This patch restores the Intel HDA Sigmatel codec pin configuration on resume. Most of it is dedicated to saving the BIOS pin configuration if necessary, so that even unrecognized chips can be resumed correctly. Signed-off-by: Richard Fish Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 100 ++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 139d73e18a3c..239ae3fad054 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -73,6 +73,7 @@ struct sigmatel_spec { hda_nid_t *pin_nids; unsigned int num_pins; unsigned int *pin_configs; + unsigned int *bios_pin_configs; /* codec specific stuff */ struct hda_verb *init; @@ -584,13 +585,42 @@ static struct hda_board_config stac9205_cfg_tbl[] = { {} /* terminator */ }; +static int stac92xx_save_bios_config_regs(struct hda_codec *codec) +{ + int i; + struct sigmatel_spec *spec = codec->spec; + + if (! spec->bios_pin_configs) { + spec->bios_pin_configs = kcalloc(spec->num_pins, + sizeof(*spec->bios_pin_configs), GFP_KERNEL); + if (! spec->bios_pin_configs) + return -ENOMEM; + } + + for (i = 0; i < spec->num_pins; i++) { + hda_nid_t nid = spec->pin_nids[i]; + unsigned int pin_cfg; + + pin_cfg = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_CONFIG_DEFAULT, 0x00); + snd_printdd(KERN_INFO "hda_codec: pin nid %2.2x bios pin config %8.8x\n", + nid, pin_cfg); + spec->bios_pin_configs[i] = pin_cfg; + } + + return 0; +} + static void stac92xx_set_config_regs(struct hda_codec *codec) { int i; struct sigmatel_spec *spec = codec->spec; unsigned int pin_cfg; - for (i=0; i < spec->num_pins; i++) { + if (! spec->pin_nids || ! spec->pin_configs) + return; + + for (i = 0; i < spec->num_pins; i++) { snd_hda_codec_write(codec, spec->pin_nids[i], 0, AC_VERB_SET_CONFIG_DEFAULT_BYTES_0, spec->pin_configs[i] & 0x000000ff); @@ -1302,6 +1332,9 @@ static void stac92xx_free(struct hda_codec *codec) kfree(spec->kctl_alloc); } + if (spec->bios_pin_configs) + kfree(spec->bios_pin_configs); + kfree(spec); } @@ -1359,6 +1392,7 @@ static int stac92xx_resume(struct hda_codec *codec) int i; stac92xx_init(codec); + stac92xx_set_config_regs(codec); for (i = 0; i < spec->num_mixers; i++) snd_hda_resume_ctls(codec, spec->mixers[i]); if (spec->multiout.dig_out_nid) @@ -1391,12 +1425,18 @@ static int patch_stac9200(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + spec->num_pins = 8; + spec->pin_nids = stac9200_pin_nids; spec->board_config = snd_hda_check_board_config(codec, stac9200_cfg_tbl); - if (spec->board_config < 0) - snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n"); - else { - spec->num_pins = 8; - spec->pin_nids = stac9200_pin_nids; + if (spec->board_config < 0) { + snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n"); + err = stac92xx_save_bios_config_regs(codec); + if (err < 0) { + stac92xx_free(codec); + return err; + } + spec->pin_configs = spec->bios_pin_configs; + } else { spec->pin_configs = stac9200_brd_tbl[spec->board_config]; stac92xx_set_config_regs(codec); } @@ -1432,13 +1472,19 @@ static int patch_stac922x(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + spec->num_pins = 10; + spec->pin_nids = stac922x_pin_nids; spec->board_config = snd_hda_check_board_config(codec, stac922x_cfg_tbl); - if (spec->board_config < 0) - snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, " - "using BIOS defaults\n"); - else if (stac922x_brd_tbl[spec->board_config] != NULL) { - spec->num_pins = 10; - spec->pin_nids = stac922x_pin_nids; + if (spec->board_config < 0) { + snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, " + "using BIOS defaults\n"); + err = stac92xx_save_bios_config_regs(codec); + if (err < 0) { + stac92xx_free(codec); + return err; + } + spec->pin_configs = spec->bios_pin_configs; + } else if (stac922x_brd_tbl[spec->board_config] != NULL) { spec->pin_configs = stac922x_brd_tbl[spec->board_config]; stac92xx_set_config_regs(codec); } @@ -1476,12 +1522,18 @@ static int patch_stac927x(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + spec->num_pins = 14; + spec->pin_nids = stac927x_pin_nids; spec->board_config = snd_hda_check_board_config(codec, stac927x_cfg_tbl); - if (spec->board_config < 0) + if (spec->board_config < 0) { snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC927x, using BIOS defaults\n"); - else if (stac927x_brd_tbl[spec->board_config] != NULL) { - spec->num_pins = 14; - spec->pin_nids = stac927x_pin_nids; + err = stac92xx_save_bios_config_regs(codec); + if (err < 0) { + stac92xx_free(codec); + return err; + } + spec->pin_configs = spec->bios_pin_configs; + } else if (stac927x_brd_tbl[spec->board_config] != NULL) { spec->pin_configs = stac927x_brd_tbl[spec->board_config]; stac92xx_set_config_regs(codec); } @@ -1532,12 +1584,18 @@ static int patch_stac9205(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + spec->num_pins = 14; + spec->pin_nids = stac9205_pin_nids; spec->board_config = snd_hda_check_board_config(codec, stac9205_cfg_tbl); - if (spec->board_config < 0) - snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n"); - else { - spec->num_pins = 14; - spec->pin_nids = stac9205_pin_nids; + if (spec->board_config < 0) { + snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n"); + err = stac92xx_save_bios_config_regs(codec); + if (err < 0) { + stac92xx_free(codec); + return err; + } + spec->pin_configs = spec->bios_pin_configs; + } else { spec->pin_configs = stac9205_brd_tbl[spec->board_config]; stac92xx_set_config_regs(codec); } From 071c73ad5fce436ee00c9422b7ca0c5d629451fb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 18:34:06 +0200 Subject: [PATCH 0971/1063] [ALSA] hda-codec - Fix mic capture with generic parser Fixed the mic capture with generic parser of hda-codec driver - Use VREF80 for mic pins if available - Handle multiple inputs correctly on audio-input widget node. Confirmed on a conexant codec chip. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_generic.c | 126 +++++++++++++++++++++++++----------- 1 file changed, 89 insertions(+), 37 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 85ad164ada59..dedfc5b1083a 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -461,14 +461,19 @@ static const char *get_input_type(struct hda_gnode *node, unsigned int *pinctl) return "Front Line"; return "Line"; case AC_JACK_CD: +#if 0 if (pinctl) *pinctl |= AC_PINCTL_VREF_GRD; +#endif return "CD"; case AC_JACK_AUX: if ((location & 0x0f) == AC_JACK_LOC_FRONT) return "Front Aux"; return "Aux"; case AC_JACK_MIC_IN: + if (node->pin_caps & + (AC_PINCAP_VREF_80 << AC_PINCAP_VREF_SHIFT)) + *pinctl |= AC_PINCTL_VREF_80; if ((location & 0x0f) == AC_JACK_LOC_FRONT) return "Front Mic"; return "Mic"; @@ -556,6 +561,29 @@ static int parse_adc_sub_nodes(struct hda_codec *codec, struct hda_gspec *spec, return 1; /* found */ } +/* add a capture source element */ +static void add_cap_src(struct hda_gspec *spec, int idx) +{ + struct hda_input_mux_item *csrc; + char *buf; + int num, ocap; + + num = spec->input_mux.num_items; + csrc = &spec->input_mux.items[num]; + buf = spec->cap_labels[num]; + for (ocap = 0; ocap < num; ocap++) { + if (! strcmp(buf, spec->cap_labels[ocap])) { + /* same label already exists, + * put the index number to be unique + */ + sprintf(buf, "%s %d", spec->cap_labels[ocap], num); + break; + } + } + csrc->index = idx; + spec->input_mux.num_items++; +} + /* * parse input */ @@ -576,28 +604,26 @@ static int parse_input_path(struct hda_codec *codec, struct hda_gnode *adc_node) * if it reaches to a proper input PIN, add the path as the * input path. */ + /* first, check the direct connections to PIN widgets */ for (i = 0; i < adc_node->nconns; i++) { node = hda_get_node(spec, adc_node->conn_list[i]); - if (! node) - continue; - err = parse_adc_sub_nodes(codec, spec, node); - if (err < 0) - return err; - else if (err > 0) { - struct hda_input_mux_item *csrc = &spec->input_mux.items[spec->input_mux.num_items]; - char *buf = spec->cap_labels[spec->input_mux.num_items]; - int ocap; - for (ocap = 0; ocap < spec->input_mux.num_items; ocap++) { - if (! strcmp(buf, spec->cap_labels[ocap])) { - /* same label already exists, - * put the index number to be unique - */ - sprintf(buf, "%s %d", spec->cap_labels[ocap], - spec->input_mux.num_items); - } - } - csrc->index = i; - spec->input_mux.num_items++; + if (node && node->type == AC_WID_PIN) { + err = parse_adc_sub_nodes(codec, spec, node); + if (err < 0) + return err; + else if (err > 0) + add_cap_src(spec, i); + } + } + /* ... then check the rests, more complicated connections */ + for (i = 0; i < adc_node->nconns; i++) { + node = hda_get_node(spec, adc_node->conn_list[i]); + if (node && node->type != AC_WID_PIN) { + err = parse_adc_sub_nodes(codec, spec, node); + if (err < 0) + return err; + else if (err > 0) + add_cap_src(spec, i); } } @@ -647,9 +673,6 @@ static int parse_input(struct hda_codec *codec) /* * create mixer controls if possible */ -#define DIR_OUT 0x1 -#define DIR_IN 0x2 - static int create_mixer(struct hda_codec *codec, struct hda_gnode *node, unsigned int index, const char *type, const char *dir_sfx) { @@ -743,28 +766,57 @@ static int build_input_controls(struct hda_codec *codec) { struct hda_gspec *spec = codec->spec; struct hda_gnode *adc_node = spec->adc_node; - int err; + int i, err; + static struct snd_kcontrol_new cap_sel = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Capture Source", + .info = capture_source_info, + .get = capture_source_get, + .put = capture_source_put, + }; - if (! adc_node) + if (! adc_node || ! spec->input_mux.num_items) return 0; /* not found */ + spec->cur_cap_src = 0; + select_input_connection(codec, adc_node, + spec->input_mux.items[0].index); + /* create capture volume and switch controls if the ADC has an amp */ - err = create_mixer(codec, adc_node, 0, NULL, "Capture"); + /* do we have only a single item? */ + if (spec->input_mux.num_items == 1) { + err = create_mixer(codec, adc_node, + spec->input_mux.items[0].index, + NULL, "Capture"); + if (err < 0) + return err; + return 0; + } /* create input MUX if multiple sources are available */ - if (spec->input_mux.num_items > 1) { - static struct snd_kcontrol_new cap_sel = { - .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .name = "Capture Source", - .info = capture_source_info, - .get = capture_source_get, - .put = capture_source_put, - }; - if ((err = snd_ctl_add(codec->bus->card, snd_ctl_new1(&cap_sel, codec))) < 0) + if ((err = snd_ctl_add(codec->bus->card, + snd_ctl_new1(&cap_sel, codec))) < 0) + return err; + + /* no volume control? */ + if (! (adc_node->wid_caps & AC_WCAP_IN_AMP) || + ! (adc_node->amp_in_caps & AC_AMPCAP_NUM_STEPS)) + return 0; + + for (i = 0; i < spec->input_mux.num_items; i++) { + struct snd_kcontrol_new knew; + char name[32]; + sprintf(name, "%s Capture Volume", + spec->input_mux.items[i].label); + knew = (struct snd_kcontrol_new) + HDA_CODEC_VOLUME(name, adc_node->nid, + spec->input_mux.items[i].index, + HDA_INPUT); + if ((err = snd_ctl_add(codec->bus->card, + snd_ctl_new1(&knew, codec))) < 0) return err; - spec->cur_cap_src = 0; - select_input_connection(codec, adc_node, spec->input_mux.items[0].index); } + return 0; } From 3479307f8ca3cbf4181b8bf7d8c824156a9e63b7 Mon Sep 17 00:00:00 2001 From: Jochen Voss Date: Wed, 23 Aug 2006 18:35:35 +0200 Subject: [PATCH 0972/1063] [ALSA] Fix volume control for the AK4358 DAC Fix volume control for the AK4358 DAC. The attenuation control registers of the AK4358 use only 7bit for the volume, the msb is used to enable attenuation output. Without this patch there are 256 volume levels the lower 128 of which are mute. Signed-off-by: Jochen Voss Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/i2c/other/ak4xxx-adda.c | 42 ++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index 0aea536a3371..89fc3cbc2356 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -284,11 +284,13 @@ EXPORT_SYMBOL(snd_akm4xxx_init); #define AK_GET_CHIP(val) (((val) >> 8) & 0xff) #define AK_GET_ADDR(val) ((val) & 0xff) -#define AK_GET_SHIFT(val) (((val) >> 16) & 0x7f) +#define AK_GET_SHIFT(val) (((val) >> 16) & 0x3f) +#define AK_GET_NEEDSMSB(val) (((val) >> 22) & 1) #define AK_GET_INVERT(val) (((val) >> 23) & 1) #define AK_GET_MASK(val) (((val) >> 24) & 0xff) #define AK_COMPOSE(chip,addr,shift,mask) \ (((chip) << 8) | (addr) | ((shift) << 16) | ((mask) << 24)) +#define AK_NEEDSMSB (1<<22) #define AK_INVERT (1<<23) static int snd_akm4xxx_volume_info(struct snd_kcontrol *kcontrol, @@ -309,10 +311,13 @@ static int snd_akm4xxx_volume_get(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); + int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); int invert = AK_GET_INVERT(kcontrol->private_value); unsigned int mask = AK_GET_MASK(kcontrol->private_value); unsigned char val = snd_akm4xxx_get(ak, chip, addr); - + + if (needsmsb) + val &= 0x7f; ucontrol->value.integer.value[0] = invert ? mask - val : val; return 0; } @@ -323,6 +328,7 @@ static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); + int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); int invert = AK_GET_INVERT(kcontrol->private_value); unsigned int mask = AK_GET_MASK(kcontrol->private_value); unsigned char nval = ucontrol->value.integer.value[0] % (mask+1); @@ -330,6 +336,8 @@ static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol, if (invert) nval = mask - nval; + if (needsmsb) + nval |= 0x80; change = snd_akm4xxx_get(ak, chip, addr) != nval; if (change) snd_akm4xxx_write(ak, chip, addr, nval); @@ -354,13 +362,19 @@ static int snd_akm4xxx_stereo_volume_get(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); + int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); int invert = AK_GET_INVERT(kcontrol->private_value); unsigned int mask = AK_GET_MASK(kcontrol->private_value); - unsigned char val = snd_akm4xxx_get(ak, chip, addr); - + unsigned char val; + + val = snd_akm4xxx_get(ak, chip, addr); + if (needsmsb) + val &= 0x7f; ucontrol->value.integer.value[0] = invert ? mask - val : val; val = snd_akm4xxx_get(ak, chip, addr+1); + if (needsmsb) + val &= 0x7f; ucontrol->value.integer.value[1] = invert ? mask - val : val; return 0; @@ -372,6 +386,7 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); + int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); int invert = AK_GET_INVERT(kcontrol->private_value); unsigned int mask = AK_GET_MASK(kcontrol->private_value); unsigned char nval = ucontrol->value.integer.value[0] % (mask+1); @@ -379,6 +394,8 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol, if (invert) nval = mask - nval; + if (needsmsb) + nval |= 0x80; change0 = snd_akm4xxx_get(ak, chip, addr) != nval; if (change0) snd_akm4xxx_write(ak, chip, addr, nval); @@ -386,6 +403,8 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol, nval = ucontrol->value.integer.value[1] % (mask+1); if (invert) nval = mask - nval; + if (needsmsb) + nval |= 0x80; change1 = snd_akm4xxx_get(ak, chip, addr+1) != nval; if (change1) snd_akm4xxx_write(ak, chip, addr+1, nval); @@ -585,16 +604,13 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) /* register 4-9, chip #0 only */ ctl->private_value = AK_COMPOSE(0, idx + 4, 0, 255); break; - case SND_AK4358: - if (idx >= 6) - /* register 4-9, chip #0 only */ - ctl->private_value = - AK_COMPOSE(0, idx + 5, 0, 255); - else - /* register 4-9, chip #0 only */ - ctl->private_value = - AK_COMPOSE(0, idx + 4, 0, 255); + case SND_AK4358: { + /* register 4-9 and 11-12, chip #0 only */ + int addr = idx < 6 ? idx + 4 : idx + 5; + ctl->private_value = + AK_COMPOSE(0, addr, 0, 127) | AK_NEEDSMSB; break; + } case SND_AK4381: /* register 3 & 4 */ ctl->private_value = From c6ff77f71fe692fa48fe02dbfe74a01f3d5e55e2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 19:53:02 +0200 Subject: [PATCH 0973/1063] [ALSA] Add dB scale information to pcxhr driver Added the dB scale information to pcxhr driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/pcxhr/pcxhr_mixer.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/pcxhr/pcxhr_mixer.c b/sound/pci/pcxhr/pcxhr_mixer.c index 94e63a1e90d9..b133ad9e095e 100644 --- a/sound/pci/pcxhr/pcxhr_mixer.c +++ b/sound/pci/pcxhr/pcxhr_mixer.c @@ -31,6 +31,7 @@ #include "pcxhr_hwdep.h" #include "pcxhr_core.h" #include +#include #include #include "pcxhr_mixer.h" @@ -43,6 +44,9 @@ #define PCXHR_ANALOG_PLAYBACK_LEVEL_MAX 128 /* 0.0 dB */ #define PCXHR_ANALOG_PLAYBACK_ZERO_LEVEL 104 /* -24.0 dB ( 0.0 dB - fix level +24.0 dB ) */ +static DECLARE_TLV_DB_SCALE(db_scale_analog_capture, -9600, 50, 0); +static DECLARE_TLV_DB_SCALE(db_scale_analog_playback, -12800, 100, 0); + static int pcxhr_update_analog_audio_level(struct snd_pcxhr *chip, int is_capture, int channel) { int err, vol; @@ -130,10 +134,13 @@ static int pcxhr_analog_vol_put(struct snd_kcontrol *kcontrol, static struct snd_kcontrol_new pcxhr_control_analog_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), /* name will be filled later */ .info = pcxhr_analog_vol_info, .get = pcxhr_analog_vol_get, .put = pcxhr_analog_vol_put, + /* tlv will be filled later */ }; /* shared */ @@ -188,6 +195,7 @@ static struct snd_kcontrol_new pcxhr_control_output_switch = { #define PCXHR_DIGITAL_LEVEL_MAX 0x1ff /* +18 dB */ #define PCXHR_DIGITAL_ZERO_LEVEL 0x1b7 /* 0 dB */ +static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0); #define MORE_THAN_ONE_STREAM_LEVEL 0x000001 #define VALID_STREAM_PAN_LEVEL_MASK 0x800000 @@ -343,11 +351,14 @@ static int pcxhr_pcm_vol_put(struct snd_kcontrol *kcontrol, static struct snd_kcontrol_new snd_pcxhr_pcm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), /* name will be filled later */ /* count will be filled later */ .info = pcxhr_digital_vol_info, /* shared */ .get = pcxhr_pcm_vol_get, .put = pcxhr_pcm_vol_put, + .tlv = { .p = db_scale_digital }, }; @@ -433,10 +444,13 @@ static int pcxhr_monitor_vol_put(struct snd_kcontrol *kcontrol, static struct snd_kcontrol_new pcxhr_control_monitor_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Monitoring Volume", .info = pcxhr_digital_vol_info, /* shared */ .get = pcxhr_monitor_vol_get, .put = pcxhr_monitor_vol_put, + .tlv = { .p = db_scale_digital }, }; /* @@ -928,6 +942,7 @@ int pcxhr_create_mixer(struct pcxhr_mgr *mgr) temp = pcxhr_control_analog_level; temp.name = "Master Playback Volume"; temp.private_value = 0; /* playback */ + temp.tlv.p = db_scale_analog_playback; if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0) return err; /* output mute controls */ @@ -963,6 +978,7 @@ int pcxhr_create_mixer(struct pcxhr_mgr *mgr) temp = pcxhr_control_analog_level; temp.name = "Master Capture Volume"; temp.private_value = 1; /* capture */ + temp.tlv.p = db_scale_analog_capture; if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0) return err; From 1186ed8c7dc9c0185e783beddf241509cc224f1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2006 19:53:28 +0200 Subject: [PATCH 0974/1063] [ALSA] Add dB scale information to vxpocket and vx222 drivers Added the dB scale information to vxpocket and vx222 drivers. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/vx_core.h | 1 + sound/drivers/vx/vx_mixer.c | 17 +++++++++++++++-- sound/pci/vx222/vx222.c | 7 +++++++ sound/pci/vx222/vx222_ops.c | 9 +++++++++ sound/pcmcia/vx/vxpocket.c | 5 +++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/sound/vx_core.h b/include/sound/vx_core.h index 9821a6194caa..dbca14170615 100644 --- a/include/sound/vx_core.h +++ b/include/sound/vx_core.h @@ -128,6 +128,7 @@ struct snd_vx_hardware { unsigned int num_ins; unsigned int num_outs; unsigned int output_level_max; + unsigned int *output_level_db_scale; }; /* hwdep id string */ diff --git a/sound/drivers/vx/vx_mixer.c b/sound/drivers/vx/vx_mixer.c index c1d7fcdd1973..1613ed844ac6 100644 --- a/sound/drivers/vx/vx_mixer.c +++ b/sound/drivers/vx/vx_mixer.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "vx_cmd.h" @@ -455,10 +456,13 @@ static int vx_output_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ele static struct snd_kcontrol_new vx_control_output_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Master Playback Volume", .info = vx_output_level_info, .get = vx_output_level_get, .put = vx_output_level_put, + /* tlv will be filled later */ }; /* @@ -712,12 +716,17 @@ static int vx_monitor_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_ return 0; } +static DECLARE_TLV_DB_SCALE(db_scale_audio_gain, -10975, 25, 0); + static struct snd_kcontrol_new vx_control_audio_gain = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), /* name will be filled later */ .info = vx_audio_gain_info, .get = vx_audio_gain_get, - .put = vx_audio_gain_put + .put = vx_audio_gain_put, + .tlv = { .p = db_scale_audio_gain }, }; static struct snd_kcontrol_new vx_control_output_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -729,9 +738,12 @@ static struct snd_kcontrol_new vx_control_output_switch = { static struct snd_kcontrol_new vx_control_monitor_gain = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitoring Volume", + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .info = vx_audio_gain_info, /* shared */ .get = vx_audio_monitor_get, - .put = vx_audio_monitor_put + .put = vx_audio_monitor_put, + .tlv = { .p = db_scale_audio_gain }, }; static struct snd_kcontrol_new vx_control_monitor_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -918,6 +930,7 @@ int snd_vx_mixer_new(struct vx_core *chip) for (i = 0; i < chip->hw->num_outs; i++) { temp = vx_control_output_level; temp.index = i; + temp.tlv.p = chip->hw->output_level_db_scale; if ((err = snd_ctl_add(card, snd_ctl_new1(&temp, chip))) < 0) return err; } diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c index 9c03c6b4e490..e7cd8acab59a 100644 --- a/sound/pci/vx222/vx222.c +++ b/sound/pci/vx222/vx222.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "vx222.h" #define CARD_NAME "VX222" @@ -72,6 +73,9 @@ MODULE_DEVICE_TABLE(pci, snd_vx222_ids); /* */ +static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0); +static DECLARE_TLV_DB_SCALE(db_scale_akm, -7350, 50, 0); + static struct snd_vx_hardware vx222_old_hw = { .name = "VX222/Old", @@ -81,6 +85,7 @@ static struct snd_vx_hardware vx222_old_hw = { .num_ins = 1, .num_outs = 1, .output_level_max = VX_ANALOG_OUT_LEVEL_MAX, + .output_level_db_scale = db_scale_old_vol, }; static struct snd_vx_hardware vx222_v2_hw = { @@ -92,6 +97,7 @@ static struct snd_vx_hardware vx222_v2_hw = { .num_ins = 1, .num_outs = 1, .output_level_max = VX2_AKM_LEVEL_MAX, + .output_level_db_scale = db_scale_akm, }; static struct snd_vx_hardware vx222_mic_hw = { @@ -103,6 +109,7 @@ static struct snd_vx_hardware vx222_mic_hw = { .num_ins = 1, .num_outs = 1, .output_level_max = VX2_AKM_LEVEL_MAX, + .output_level_db_scale = db_scale_akm, }; diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c index 9b6d345b83a6..5e51950e05f9 100644 --- a/sound/pci/vx222/vx222_ops.c +++ b/sound/pci/vx222/vx222_ops.c @@ -28,6 +28,7 @@ #include #include +#include #include #include "vx222.h" @@ -845,6 +846,8 @@ static void vx2_set_input_level(struct snd_vx222 *chip) #define MIC_LEVEL_MAX 0xff +static DECLARE_TLV_DB_SCALE(db_scale_mic, -6450, 50, 0); + /* * controls API for input levels */ @@ -922,18 +925,24 @@ static int vx_mic_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_v static struct snd_kcontrol_new vx_control_input_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Capture Volume", .info = vx_input_level_info, .get = vx_input_level_get, .put = vx_input_level_put, + .tlv = { .p = db_scale_mic }, }; static struct snd_kcontrol_new vx_control_mic_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Mic Capture Volume", .info = vx_mic_level_info, .get = vx_mic_level_get, .put = vx_mic_level_put, + .tlv = { .p = db_scale_mic }, }; /* diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 76c85cffb40e..3089fcca800e 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -27,6 +27,7 @@ #include #include #include +#include /* */ @@ -90,6 +91,8 @@ static int snd_vxpocket_dev_free(struct snd_device *device) * Only output levels can be modified */ +static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0); + static struct snd_vx_hardware vxpocket_hw = { .name = "VXPocket", .type = VX_TYPE_VXPOCKET, @@ -99,6 +102,7 @@ static struct snd_vx_hardware vxpocket_hw = { .num_ins = 1, .num_outs = 1, .output_level_max = VX_ANALOG_OUT_LEVEL_MAX, + .output_level_db_scale = db_scale_old_vol, }; /* VX-pocket 440 @@ -120,6 +124,7 @@ static struct snd_vx_hardware vxp440_hw = { .num_ins = 2, .num_outs = 2, .output_level_max = VX_ANALOG_OUT_LEVEL_MAX, + .output_level_db_scale = db_scale_old_vol, }; From 86148e84c218e49b54521e8dae7bb78eb66c4281 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 24 Aug 2006 12:36:36 +0200 Subject: [PATCH 0975/1063] [ALSA] Fix errors with user TLV_WRITE Fixed the errors at checking info.access field during user TLV_WRITE call. It should have been zero-initialized. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/core/control.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/control.c b/sound/core/control.c index ac1442682eac..3030aaa6d2c5 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1048,6 +1048,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, if (ue == NULL) return -ENOMEM; ue->info = *info; + ue->info.access = 0; ue->elem_data = (char *)ue + sizeof(*ue); ue->elem_data_size = private_size; kctl.private_free = snd_ctl_elem_user_free; From 18c1c3f694105ab2a6f43e054e23f9a751b2f869 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Aug 2006 11:39:34 +0200 Subject: [PATCH 0976/1063] [ALSA] Return error if no user TLV is defined Retrun error to user TLV_READ ioctl if no TLV is defined. (Until now, nothing was written and rerunred successfully.) Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/core/control.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/control.c b/sound/core/control.c index 3030aaa6d2c5..6973a9686b67 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -951,6 +951,8 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, ue->tlv_data = new_data; ue->tlv_data_size = size; } else { + if (! ue->tlv_data_size || ! ue->tlv_data) + return -ENXIO; if (size < ue->tlv_data_size) return -ENOSPC; if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size)) From 2c7782b420ee137057eeec7c24a565ac85fc1988 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Aug 2006 13:11:26 +0200 Subject: [PATCH 0977/1063] [ALSA] hda-codec - Use model=ref for some Dell laptops Force to choose model=ref for some Dell laptops with STAC9200 codec chip for fixing the silent mic recording problem (possibly due to a BIOS bug). Reference: ALSA bug#2038 So far, applied to Inspiron 630m, Latitude D620 and 120L. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 239ae3fad054..8d5ad7c0db07 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -582,6 +582,13 @@ static struct hda_board_config stac9205_cfg_tbl[] = { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2668, /* DFI LanParty */ .config = STAC_REF }, /* SigmaTel reference board */ + /* Dell laptops have BIOS problem */ + { .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01b5, + .config = STAC_REF }, /* Dell Inspiron 630m */ + { .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01c2, + .config = STAC_REF }, /* Dell Latitude D620 */ + { .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01cb, + .config = STAC_REF }, /* Dell Latitude 120L */ {} /* terminator */ }; From aaad3653a5f073ce9eaef4efd387cf7fc3a53d18 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 28 Aug 2006 12:59:23 +0200 Subject: [PATCH 0978/1063] [ALSA] sparc dbri: recording is back This patch fixes sound recording after the driver convertion to ring buffered version. It also contains small clean ups to the driver. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 65 ++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 45 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 3e6ad507849d..cdca8e4a96e4 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -107,7 +107,7 @@ static char *cmds[] = { #define dprintk(a, x...) if(dbri_debug & a) printk(KERN_DEBUG x) #else -#define dprintk(a, x...) +#define dprintk(a, x...) do { } while (0) #endif /* DBRI_DEBUG */ @@ -610,10 +610,10 @@ CPU interrupt to signal completion. Since the DBRI can run in parallel with the CPU, several means of synchronization present themselves. The method implemented here is only -to use the dbri_cmdwait() to wait for execution of batch of sent commands. +use of the dbri_cmdwait() to wait for execution of batch of sent commands. A circular command buffer is used here. A new command is being added -while other can be executed. The scheme works by adding two WAIT commands +while another can be executed. The scheme works by adding two WAIT commands after each sent batch of commands. When the next batch is prepared it is added after the WAIT commands then the WAITs are replaced with single JUMP command to the new batch. The the DBRI is forced to reread the last WAIT @@ -628,7 +628,7 @@ to send them to the DBRI. */ -#define MAXLOOPS 10 +#define MAXLOOPS 20 /* * Wait for the current command string to execute */ @@ -692,9 +692,8 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) if (cmd > dbri->cmdptr) { s32 *ptr; - for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) { + for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++) dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr); - } } else { s32 *ptr = dbri->cmdptr; @@ -1141,13 +1140,9 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period return -1; } - if (streamno == DBRI_PLAY) { - dbri->dma->desc[last_desc].word1 |= - DBRI_TD_F | DBRI_TD_B; - dbri->dma->desc[last_desc].nda = - dbri->dma_dvma + dbri_dma_off(desc, first_desc); - dbri->next_desc[last_desc] = first_desc; - } + dbri->dma->desc[last_desc].nda = + dbri->dma_dvma + dbri_dma_off(desc, first_desc); + dbri->next_desc[last_desc] = first_desc; dbri->pipes[info->pipe].first_desc = first_desc; dbri->pipes[info->pipe].desc = first_desc; @@ -1639,7 +1634,6 @@ static void xmit_descs(struct snd_dbri *dbri) if (dbri == NULL) return; /* Disabled */ - /* First check the recording stream for buffer overflow */ info = &dbri->stream_info[DBRI_REC]; spin_lock_irqsave(&dbri->lock, flags); @@ -1649,27 +1643,20 @@ static void xmit_descs(struct snd_dbri *dbri) dprintk(D_DESC, "xmit_descs rec @ TD %d\n", first_td); /* Stream could be closed by the time we run. */ - if (first_td < 0) { - goto play; + if (first_td >= 0) { + cmd = dbri_cmdlock(dbri, 2); + *(cmd++) = DBRI_CMD(D_SDP, 0, + dbri->pipes[info->pipe].sdp + | D_SDP_P | D_SDP_EVERY | D_SDP_C); + *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); + dbri_cmdsend(dbri, cmd, 2); + + /* Reset our admin of the pipe. */ + dbri->pipes[info->pipe].desc = first_td; } - - cmd = dbri_cmdlock(dbri, 2); - *(cmd++) = DBRI_CMD(D_SDP, 0, - dbri->pipes[info->pipe].sdp - | D_SDP_P | D_SDP_EVERY | D_SDP_C); - *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); - dbri_cmdsend(dbri, cmd, 2); - - /* Reset our admin of the pipe & bytes read. */ - dbri->pipes[info->pipe].desc = first_td; } -play: - spin_unlock_irqrestore(&dbri->lock, flags); - - /* Now check the playback stream for buffer underflow */ info = &dbri->stream_info[DBRI_PLAY]; - spin_lock_irqsave(&dbri->lock, flags); if (info->pipe >= 0) { first_td = dbri->pipes[info->pipe].first_desc; @@ -1685,7 +1672,7 @@ play: *(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td); dbri_cmdsend(dbri, cmd, 2); - /* Reset our admin of the pipe & bytes written. */ + /* Reset our admin of the pipe. */ dbri->pipes[info->pipe].desc = first_td; } } @@ -1755,7 +1742,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) return; } - dbri->dma->desc[rd].ba = 0; dbri->pipes[pipe].desc = dbri->next_desc[rd]; status = dbri->dma->desc[rd].word1; dbri->dma->desc[rd].word1 = 0; /* Reset it for next time. */ @@ -1768,18 +1754,6 @@ static void reception_complete_intr(struct snd_dbri * dbri, int pipe) dprintk(D_INT, "Recv RD %d, status 0x%02x, len %d\n", rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status)); - /* On the last TD, transmit them all again. */ -#if 0 - if (dbri->next_desc[rd] == -1) { - if (info->left > info->size) { - printk(KERN_WARNING - "%d bytes recorded in %d size buffer.\n", - info->left, info->size); - } - tasklet_schedule(&xmit_descs_task); - } -#endif - /* Notify ALSA */ if (spin_is_locked(&dbri->lock)) { spin_unlock(&dbri->lock); @@ -2113,6 +2087,7 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream) info->pipe = 6; /* Receive pipe */ spin_lock_irq(&dbri->lock); + info->offset = 0; /* Setup the all the transmit/receive desciptors to cover the * whole DMA buffer. From 99dabfe716002c54b4dffa545460dc74bc632c22 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 28 Aug 2006 13:00:45 +0200 Subject: [PATCH 0979/1063] [ALSA] dbri sparc: fixes TS leak This patch fixes time slot leak in the dbri driver. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index cdca8e4a96e4..6b090fb66a8d 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -1044,7 +1044,7 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period { struct dbri_streaminfo *info = &dbri->stream_info[streamno]; __u32 dvma_buffer; - int desc = 0; + int desc; int len; int first_desc = -1; int last_desc = -1; @@ -1087,6 +1087,18 @@ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period len &= ~3; } + /* Free descriptors if pipe has any */ + desc = dbri->pipes[info->pipe].first_desc; + if ( desc >= 0) + do { + dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0; + desc = dbri->next_desc[desc]; + } while (desc != -1 && desc != dbri->pipes[info->pipe].first_desc); + + dbri->pipes[info->pipe].desc = -1; + dbri->pipes[info->pipe].first_desc = -1; + + desc = 0; while (len > 0) { int mylen; @@ -2054,6 +2066,7 @@ static int snd_dbri_hw_free(struct snd_pcm_substream *substream) struct snd_dbri *dbri = snd_pcm_substream_chip(substream); struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream); int direction; + dprintk(D_USR, "hw_free.\n"); /* hw_free can get called multiple times. Only unmap the DMA once. @@ -2068,7 +2081,10 @@ static int snd_dbri_hw_free(struct snd_pcm_substream *substream) substream->runtime->buffer_size, direction); info->dvma_buffer = 0; } - info->pipe = -1; + if (info->pipe != -1) { + reset_pipe(dbri, info->pipe); + info->pipe = -1; + } return snd_pcm_lib_free_pages(substream); } From 1f14d167f0233342eab53bb1a429ddad1e848de4 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Mon, 28 Aug 2006 13:01:31 +0200 Subject: [PATCH 0980/1063] [ALSA] sparc dbri: OSS layer fix This patch removes setting of incorrect stop_threshold value inside the driver. After the change, playback through the OSS layer works correctly. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 6b090fb66a8d..82d5e8072f2b 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -2111,8 +2111,6 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream) ret = setup_descs(dbri, DBRI_STREAMNO(substream), snd_pcm_lib_period_bytes(substream)); - runtime->stop_threshold = DBRI_TD_MAXCNT / runtime->channels; - spin_unlock_irq(&dbri->lock); dprintk(D_USR, "prepare audio output. %d bytes\n", info->size); From 063a40d9111ce7558f2fdfa4f85acfc47eb27353 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Aug 2006 13:20:13 +0200 Subject: [PATCH 0981/1063] [ALSA] Add the definition of linear volume TLV Added the definition of linear volume TLV type. Some DSP chips and codecs (e.g. AK codec) use linear volume control. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/tlv.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/sound/tlv.h b/include/sound/tlv.h index b826e1df1da6..7905841643df 100644 --- a/include/sound/tlv.h +++ b/include/sound/tlv.h @@ -33,6 +33,7 @@ #define SNDRV_CTL_TLVT_CONTAINER 0 /* one level down - group of TLVs */ #define SNDRV_CTL_TLVT_DB_SCALE 1 /* dB scale */ +#define SNDRV_CTL_TLVT_DB_LINEAR 2 /* linear volume */ #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \ unsigned int name[] = { \ @@ -40,4 +41,13 @@ unsigned int name[] = { \ (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \ } +/* linear volume between min_dB and max_dB (.01dB unit) */ +#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB) \ +unsigned int name[] = { \ + SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \ + (min_dB), (max_dB) \ +} + +#define TLV_DB_GAIN_MUTE -9999999 + #endif /* __SOUND_TLV_H */ From 33925186d843e7004288cd3d87843c5a1dbf55a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Aug 2006 13:29:42 +0200 Subject: [PATCH 0982/1063] [ALSA] ymfpci - Add TLV entries for native volume controls Added the linear volume TLV entries for YMFPCI native volume controls. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ymfpci/ymfpci_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index a55b5fd7da64..24f6fc52f898 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1477,11 +1478,15 @@ static int snd_ymfpci_put_single(struct snd_kcontrol *kcontrol, return change; } +static DECLARE_TLV_DB_LINEAR(db_scale_native, TLV_DB_GAIN_MUTE, 0); + #define YMFPCI_DOUBLE(xname, xindex, reg) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \ .info = snd_ymfpci_info_double, \ .get = snd_ymfpci_get_double, .put = snd_ymfpci_put_double, \ - .private_value = reg } + .private_value = reg, \ + .tlv = { .p = db_scale_native } } static int snd_ymfpci_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { From 9f458e7fb5b92385d348fb6039ba7211a6d6ba6e Mon Sep 17 00:00:00 2001 From: Andrey Liakhovets Date: Mon, 28 Aug 2006 16:52:41 +0200 Subject: [PATCH 0983/1063] [ALSA] ac97 - Fix VIA EPIA sound problem Fix the bad sound quality on VIA EPIA system using VIA VT1617A (ALSA bug#2381). Signed-off-by: Andrey Liakhovets Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_patch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 392f6ccace5d..bdd7f89234f6 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -2799,6 +2799,10 @@ int patch_vt1616(struct snd_ac97 * ac97) */ int patch_vt1617a(struct snd_ac97 * ac97) { + /* bring analog power consumption to normal, like WinXP driver + * for EPIA SP + */ + snd_ac97_write_cache(ac97, 0x5c, 0x20); ac97->ext_id |= AC97_EI_SPDIF; /* force the detection of spdif */ ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000; return 0; From a79eee8d3d8a80c37d235e1181d67c3705c7bbfe Mon Sep 17 00:00:00 2001 From: Luke Ross Date: Tue, 29 Aug 2006 10:46:32 +0200 Subject: [PATCH 0984/1063] [ALSA] Support for non-standard rates in USB audio driver There's at least one USB audio chipset out there which supports only one non-standard rate (ID 0e6a:0310 supports 46875Hz). There's a few other patches for this card which are unsatisfactory because they attempt to map this rate to 44.1k leading to sound distortion. The patch below uses SNDRV_PCM_RATE_KNOT to properly support the non-standard rates where they are available. Signed-off-by: Luke Ross Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/usb/usbaudio.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 087f9b64d8a0..664dd4c21e66 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -123,6 +123,7 @@ struct audioformat { unsigned int rate_min, rate_max; /* min/max rates */ unsigned int nr_rates; /* number of rate table entries */ unsigned int *rate_table; /* rate table */ + unsigned int needs_knot; /* any unusual rates? */ }; struct snd_usb_substream; @@ -1759,6 +1760,9 @@ static int check_hw_params_convention(struct snd_usb_substream *subs) } channels[f->format] |= (1 << f->channels); rates[f->format] |= f->rates; + /* needs knot? */ + if (f->needs_knot) + goto __out; } /* check whether channels and rates match for all formats */ cmaster = rmaster = 0; @@ -1799,6 +1803,38 @@ static int check_hw_params_convention(struct snd_usb_substream *subs) return err; } +/* + * If the device supports unusual bit rates, does the request meet these? + */ +static int snd_usb_pcm_check_knot(struct snd_pcm_runtime *runtime, + struct snd_usb_substream *subs) +{ + struct list_head *p; + struct snd_pcm_hw_constraint_list constraints_rates; + int err; + + list_for_each(p, &subs->fmt_list) { + struct audioformat *fp; + fp = list_entry(p, struct audioformat, list); + + if (!fp->needs_knot) + continue; + + constraints_rates.count = fp->nr_rates; + constraints_rates.list = fp->rate_table; + constraints_rates.mask = 0; + + err = snd_pcm_hw_constraint_list(runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &constraints_rates); + + if (err < 0) + return err; + } + + return 0; +} + /* * set up the runtime hardware information. @@ -1861,6 +1897,8 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre SNDRV_PCM_HW_PARAM_CHANNELS, -1)) < 0) return err; + if ((err = snd_usb_pcm_check_knot(runtime, subs)) < 0) + return err; } return 0; } @@ -2406,6 +2444,7 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform unsigned char *fmt, int offset) { int nr_rates = fmt[offset]; + int found; if (fmt[0] < offset + 1 + 3 * (nr_rates ? nr_rates : 2)) { snd_printk(KERN_ERR "%d:%u:%d : invalid FORMAT_TYPE desc\n", chip->dev->devnum, fp->iface, fp->altsetting); @@ -2428,6 +2467,7 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform return -1; } + fp->needs_knot = 0; fp->nr_rates = nr_rates; fp->rate_min = fp->rate_max = combine_triple(&fmt[8]); for (r = 0, idx = offset + 1; r < nr_rates; r++, idx += 3) { @@ -2436,13 +2476,19 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform fp->rate_min = rate; else if (rate > fp->rate_max) fp->rate_max = rate; + found = 0; for (c = 0; c < (int)ARRAY_SIZE(conv_rates); c++) { if (rate == conv_rates[c]) { + found = 1; fp->rates |= (1 << c); break; } } + if (!found) + fp->needs_knot = 1; } + if (fp->needs_knot) + fp->rates |= SNDRV_PCM_RATE_KNOT; } else { /* continuous rates */ fp->rates = SNDRV_PCM_RATE_CONTINUOUS; From d0ae48471570c680333cbe28c143bbab887a4ec2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Aug 2006 18:15:15 +0200 Subject: [PATCH 0985/1063] [ALSA] Add missing dB scale information to vxpocket driver Added the missing dB scale information for Mic volume to vxpocket driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pcmcia/vx/vxp_mixer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pcmcia/vx/vxp_mixer.c b/sound/pcmcia/vx/vxp_mixer.c index e237f6c2018f..bced7b623b12 100644 --- a/sound/pcmcia/vx/vxp_mixer.c +++ b/sound/pcmcia/vx/vxp_mixer.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "vxpocket.h" #define MIC_LEVEL_MIN 0 @@ -63,12 +64,17 @@ static int vx_mic_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_v return 0; } +static DECLARE_TLV_DB_SCALE(db_scale_mic, -21, 3, 0); + static struct snd_kcontrol_new vx_control_mic_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Mic Capture Volume", .info = vx_mic_level_info, .get = vx_mic_level_get, .put = vx_mic_level_put, + .tlv = { .p = db_scale_mic }, }; /* From 723b2b0d36fa7cea81a962af2d40d88520d5a5f1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Aug 2006 16:49:54 +0200 Subject: [PATCH 0986/1063] [ALSA] Clean up and add TLV support to AK4xxx i2c driver - Clean up the code in AK4xxx-ADDA i2c code. - Fix capture gain controls for AK5365 - Changed the static table for DAC/ADC mixer labels to use structs - Implemented TLV entries for each AK codec The volumes in AK4524, AK4528 and AK5365 are corrected with a table to be suitable for dB conversion. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/ak4xxx-adda.h | 40 ++- sound/i2c/other/ak4xxx-adda.c | 559 +++++++++++++++++++--------------- sound/pci/ice1712/revo.c | 47 +-- 3 files changed, 372 insertions(+), 274 deletions(-) diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h index 65ddfa3cac1f..026e4072a9a1 100644 --- a/include/sound/ak4xxx-adda.h +++ b/include/sound/ak4xxx-adda.h @@ -39,14 +39,26 @@ struct snd_ak4xxx_ops { #define AK4XXX_IMAGE_SIZE (AK4XXX_MAX_CHIPS * 16) /* 64 bytes */ +/* DAC label and channels */ +struct snd_akm4xxx_dac_channel { + char *name; /* mixer volume name */ + unsigned int num_channels; +}; + +/* ADC labels and channels */ +struct snd_akm4xxx_adc_channel { + char *name; /* capture gain volume label */ + char *gain_name; /* IPGA */ + char *switch_name; /* capture switch */ + unsigned int num_channels; +}; + struct snd_akm4xxx { struct snd_card *card; unsigned int num_adcs; /* AK4524 or AK4528 ADCs */ unsigned int num_dacs; /* AK4524 or AK4528 DACs */ unsigned char images[AK4XXX_IMAGE_SIZE]; /* saved register image */ - unsigned char ipga_gain[AK4XXX_MAX_CHIPS][2]; /* saved register image - * for IPGA (AK4528) - */ + unsigned char volumes[AK4XXX_IMAGE_SIZE]; /* saved volume values */ unsigned long private_value[AK4XXX_MAX_CHIPS]; /* helper for driver */ void *private_data[AK4XXX_MAX_CHIPS]; /* helper for driver */ /* template should fill the following fields */ @@ -56,10 +68,11 @@ struct snd_akm4xxx { SND_AK4355, SND_AK4358, SND_AK4381, SND_AK5365 } type; - unsigned int *num_stereo; /* array of combined counts - * for the mixer - */ - char **channel_names; /* array of mixer channel names */ + + /* (array) information of combined codecs */ + struct snd_akm4xxx_dac_channel *dac_info; + struct snd_akm4xxx_adc_channel *adc_info; + struct snd_ak4xxx_ops ops; }; @@ -73,9 +86,18 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak); (ak)->images[(chip) * 16 + (reg)] #define snd_akm4xxx_set(ak,chip,reg,val) \ ((ak)->images[(chip) * 16 + (reg)] = (val)) +#define snd_akm4xxx_get_vol(ak,chip,reg) \ + (ak)->volumes[(chip) * 16 + (reg)] +#define snd_akm4xxx_set_vol(ak,chip,reg,val) \ + ((ak)->volumes[(chip) * 16 + (reg)] = (val)) + +/* Warning: IPGA is tricky - we assume the addr + 4 is unused + * so far, it's OK for all AK codecs with IPGA: + * AK4524, AK4528 and EK5365 + */ #define snd_akm4xxx_get_ipga(ak,chip,reg) \ - (ak)->ipga_gain[chip][(reg)-4] + snd_akm4xxx_get_vol(ak, chip, (reg) + 4) #define snd_akm4xxx_set_ipga(ak,chip,reg,val) \ - ((ak)->ipga_gain[chip][(reg)-4] = (val)) + snd_akm4xxx_set_vol(ak, chip, (reg) + 4, val) #endif /* __SOUND_AK4XXX_ADDA_H */ diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index 89fc3cbc2356..c34cb4684607 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -28,12 +28,14 @@ #include #include #include +#include #include MODULE_AUTHOR("Jaroslav Kysela , Takashi Iwai "); MODULE_DESCRIPTION("Routines for control of AK452x / AK43xx AD/DA converters"); MODULE_LICENSE("GPL"); +/* write the given register and save the data to the cache */ void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg, unsigned char val) { @@ -41,15 +43,10 @@ void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg, ak->ops.write(ak, chip, reg, val); /* save the data */ - if (ak->type == SND_AK4524 || ak->type == SND_AK4528) { - if ((reg != 0x04 && reg != 0x05) || (val & 0x80) == 0) - snd_akm4xxx_set(ak, chip, reg, val); - else - snd_akm4xxx_set_ipga(ak, chip, reg, val); - } else { - /* AK4529, or else */ + /* don't overwrite with IPGA data */ + if ((ak->type != SND_AK4524 && ak->type != SND_AK5365) || + (reg != 0x04 && reg != 0x05) || (val & 0x80) == 0) snd_akm4xxx_set(ak, chip, reg, val); - } ak->ops.unlock(ak, chip); } @@ -78,7 +75,7 @@ static void ak4524_reset(struct snd_akm4xxx *ak, int state) /* IPGA */ for (reg = 0x04; reg < 0x06; reg++) snd_akm4xxx_write(ak, chip, reg, - snd_akm4xxx_get_ipga(ak, chip, reg)); + snd_akm4xxx_get_ipga(ak, chip, reg) | 0x80); } } @@ -144,6 +141,42 @@ void snd_akm4xxx_reset(struct snd_akm4xxx *ak, int state) EXPORT_SYMBOL(snd_akm4xxx_reset); + +/* + * Volume conversion table for non-linear volumes + * from -63.5dB (mute) to 0dB step 0.5dB + * + * Used for AK4524 input/ouput attenuation, AK4528, and + * AK5365 input attenuation + */ +static unsigned char vol_cvt_datt[128] = { + 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, + 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, + 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x09, 0x0a, + 0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, + 0x10, 0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x17, 0x18, 0x19, 0x1a, 0x1c, + 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x23, + 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, 0x2b, 0x2d, + 0x2e, 0x30, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, + 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3e, 0x3f, 0x40, + 0x41, 0x42, 0x43, 0x44, 0x46, 0x47, 0x48, 0x4a, + 0x4b, 0x4d, 0x4e, 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x58, 0x59, 0x5b, 0x5c, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x64, 0x65, 0x66, 0x67, 0x69, + 0x6a, 0x6c, 0x6d, 0x6f, 0x70, 0x71, 0x72, 0x73, + 0x75, 0x76, 0x77, 0x79, 0x7a, 0x7c, 0x7d, 0x7f, +}; + +/* + * dB tables + */ +static DECLARE_TLV_DB_SCALE(db_scale_vol_datt, -6350, 50, 1); +static DECLARE_TLV_DB_SCALE(db_scale_8bit, -12750, 50, 1); +static DECLARE_TLV_DB_SCALE(db_scale_7bit, -6350, 50, 1); +static DECLARE_TLV_DB_LINEAR(db_scale_linear, TLV_DB_GAIN_MUTE, 0); +static DECLARE_TLV_DB_SCALE(db_scale_ipga, 0, 50, 0); + /* * initialize all the ak4xxx chips */ @@ -240,6 +273,9 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) int chip, num_chips; unsigned char *ptr, reg, data, *inits; + memset(ak->images, 0, sizeof(ak->images)); + memset(ak->volumes, 0, sizeof(ak->volumes)); + switch (ak->type) { case SND_AK4524: inits = inits_ak4524; @@ -265,6 +301,9 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) inits = inits_ak4381; num_chips = ak->num_dacs / 2; break; + case SND_AK5365: + /* FIXME: any init sequence? */ + return; default: snd_BUG(); return; @@ -282,16 +321,21 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) EXPORT_SYMBOL(snd_akm4xxx_init); +/* + * Mixer callbacks + */ +#define AK_VOL_CVT (1<<21) /* need dB conversion */ +#define AK_NEEDSMSB (1<<22) /* need MSB update bit */ +#define AK_INVERT (1<<23) /* data is inverted */ #define AK_GET_CHIP(val) (((val) >> 8) & 0xff) #define AK_GET_ADDR(val) ((val) & 0xff) -#define AK_GET_SHIFT(val) (((val) >> 16) & 0x3f) +#define AK_GET_SHIFT(val) (((val) >> 16) & 0x1f) +#define AK_GET_VOL_CVT(val) (((val) >> 21) & 1) #define AK_GET_NEEDSMSB(val) (((val) >> 22) & 1) #define AK_GET_INVERT(val) (((val) >> 23) & 1) #define AK_GET_MASK(val) (((val) >> 24) & 0xff) #define AK_COMPOSE(chip,addr,shift,mask) \ (((chip) << 8) | (addr) | ((shift) << 16) | ((mask) << 24)) -#define AK_NEEDSMSB (1<<22) -#define AK_INVERT (1<<23) static int snd_akm4xxx_volume_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) @@ -311,37 +355,37 @@ static int snd_akm4xxx_volume_get(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); - int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); - int invert = AK_GET_INVERT(kcontrol->private_value); - unsigned int mask = AK_GET_MASK(kcontrol->private_value); - unsigned char val = snd_akm4xxx_get(ak, chip, addr); - if (needsmsb) - val &= 0x7f; - ucontrol->value.integer.value[0] = invert ? mask - val : val; + ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr); return 0; } +static int put_ak_reg(struct snd_kcontrol *kcontrol, int addr, + unsigned char nval) +{ + struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); + unsigned int mask = AK_GET_MASK(kcontrol->private_value); + int chip = AK_GET_CHIP(kcontrol->private_value); + + if (snd_akm4xxx_get_vol(ak, chip, addr) == nval) + return 0; + + snd_akm4xxx_set_vol(ak, chip, addr, nval); + if (AK_GET_VOL_CVT(kcontrol->private_value)) + nval = vol_cvt_datt[nval]; + if (AK_GET_INVERT(kcontrol->private_value)) + nval = mask - nval; + if (AK_GET_NEEDSMSB(kcontrol->private_value)) + nval |= 0x80; + snd_akm4xxx_write(ak, chip, addr, nval); + return 1; +} + static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { - struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); - int chip = AK_GET_CHIP(kcontrol->private_value); - int addr = AK_GET_ADDR(kcontrol->private_value); - int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); - int invert = AK_GET_INVERT(kcontrol->private_value); - unsigned int mask = AK_GET_MASK(kcontrol->private_value); - unsigned char nval = ucontrol->value.integer.value[0] % (mask+1); - int change; - - if (invert) - nval = mask - nval; - if (needsmsb) - nval |= 0x80; - change = snd_akm4xxx_get(ak, chip, addr) != nval; - if (change) - snd_akm4xxx_write(ak, chip, addr, nval); - return change; + return put_ak_reg(kcontrol, AK_GET_ADDR(kcontrol->private_value), + ucontrol->value.integer.value[0]); } static int snd_akm4xxx_stereo_volume_info(struct snd_kcontrol *kcontrol, @@ -362,66 +406,25 @@ static int snd_akm4xxx_stereo_volume_get(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); - int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); - int invert = AK_GET_INVERT(kcontrol->private_value); - unsigned int mask = AK_GET_MASK(kcontrol->private_value); - unsigned char val; - - val = snd_akm4xxx_get(ak, chip, addr); - if (needsmsb) - val &= 0x7f; - ucontrol->value.integer.value[0] = invert ? mask - val : val; - - val = snd_akm4xxx_get(ak, chip, addr+1); - if (needsmsb) - val &= 0x7f; - ucontrol->value.integer.value[1] = invert ? mask - val : val; + ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr); + ucontrol->value.integer.value[1] = snd_akm4xxx_get_vol(ak, chip, addr+1); return 0; } static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { - struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); - int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); - int needsmsb = AK_GET_NEEDSMSB(kcontrol->private_value); - int invert = AK_GET_INVERT(kcontrol->private_value); - unsigned int mask = AK_GET_MASK(kcontrol->private_value); - unsigned char nval = ucontrol->value.integer.value[0] % (mask+1); - int change0, change1; + int change; - if (invert) - nval = mask - nval; - if (needsmsb) - nval |= 0x80; - change0 = snd_akm4xxx_get(ak, chip, addr) != nval; - if (change0) - snd_akm4xxx_write(ak, chip, addr, nval); - - nval = ucontrol->value.integer.value[1] % (mask+1); - if (invert) - nval = mask - nval; - if (needsmsb) - nval |= 0x80; - change1 = snd_akm4xxx_get(ak, chip, addr+1) != nval; - if (change1) - snd_akm4xxx_write(ak, chip, addr+1, nval); - - - return change0 || change1; + change = put_ak_reg(kcontrol, addr, ucontrol->value.integer.value[0]); + change |= put_ak_reg(kcontrol, addr + 1, + ucontrol->value.integer.value[1]); + return change; } -static int snd_akm4xxx_ipga_gain_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; - uinfo->count = 1; - uinfo->value.integer.min = 0; - uinfo->value.integer.max = 36; - return 0; -} +#define snd_akm4xxx_ipga_gain_info snd_akm4xxx_volume_info static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) @@ -429,21 +432,57 @@ static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); + ucontrol->value.integer.value[0] = - snd_akm4xxx_get_ipga(ak, chip, addr) & 0x7f; + snd_akm4xxx_get_ipga(ak, chip, addr); return 0; } +static int put_ak_ipga(struct snd_kcontrol *kcontrol, int addr, + unsigned char nval) +{ + struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); + int chip = AK_GET_CHIP(kcontrol->private_value); + + if (snd_akm4xxx_get_ipga(ak, chip, addr) == nval) + return 0; + snd_akm4xxx_set_ipga(ak, chip, addr, nval); + snd_akm4xxx_write(ak, chip, addr, nval | 0x80); /* need MSB */ + return 1; +} + static int snd_akm4xxx_ipga_gain_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) +{ + return put_ak_ipga(kcontrol, AK_GET_ADDR(kcontrol->private_value), + ucontrol->value.integer.value[0]); +} + +#define snd_akm4xxx_stereo_gain_info snd_akm4xxx_stereo_volume_info + +static int snd_akm4xxx_stereo_gain_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int chip = AK_GET_CHIP(kcontrol->private_value); int addr = AK_GET_ADDR(kcontrol->private_value); - unsigned char nval = (ucontrol->value.integer.value[0] % 37) | 0x80; - int change = snd_akm4xxx_get_ipga(ak, chip, addr) != nval; - if (change) - snd_akm4xxx_write(ak, chip, addr, nval); + + ucontrol->value.integer.value[0] = + snd_akm4xxx_get_ipga(ak, chip, addr); + ucontrol->value.integer.value[1] = + snd_akm4xxx_get_ipga(ak, chip, addr + 1); + return 0; +} + +static int snd_akm4xxx_stereo_gain_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + int addr = AK_GET_ADDR(kcontrol->private_value); + int change; + + change = put_ak_ipga(kcontrol, addr, ucontrol->value.integer.value[0]); + change |= put_ak_ipga(kcontrol, addr + 1, + ucontrol->value.integer.value[1]); return change; } @@ -548,221 +587,247 @@ static int ak4xxx_switch_put(struct snd_kcontrol *kcontrol, * build AK4xxx controls */ -int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) +static int build_dac_controls(struct snd_akm4xxx *ak) { - unsigned int idx, num_emphs; - struct snd_kcontrol *ctl; - int err; - int mixer_ch = 0; - int num_stereo; - - ctl = kmalloc(sizeof(*ctl), GFP_KERNEL); - if (! ctl) - return -ENOMEM; + int idx, err, mixer_ch, num_stereo; + struct snd_kcontrol_new knew; + mixer_ch = 0; for (idx = 0; idx < ak->num_dacs; ) { - memset(ctl, 0, sizeof(*ctl)); - if (ak->channel_names == NULL) { - strcpy(ctl->id.name, "DAC Volume"); + memset(&knew, 0, sizeof(knew)); + if (! ak->dac_info || ! ak->dac_info[mixer_ch].name) { + knew.name = "DAC Volume"; + knew.index = mixer_ch + ak->idx_offset * 2; num_stereo = 1; - ctl->id.index = mixer_ch + ak->idx_offset * 2; } else { - strcpy(ctl->id.name, ak->channel_names[mixer_ch]); - num_stereo = ak->num_stereo[mixer_ch]; - ctl->id.index = 0; + knew.name = ak->dac_info[mixer_ch].name; + num_stereo = ak->dac_info[mixer_ch].num_channels; } - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; + knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + knew.count = 1; + knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ; if (num_stereo == 2) { - ctl->info = snd_akm4xxx_stereo_volume_info; - ctl->get = snd_akm4xxx_stereo_volume_get; - ctl->put = snd_akm4xxx_stereo_volume_put; + knew.info = snd_akm4xxx_stereo_volume_info; + knew.get = snd_akm4xxx_stereo_volume_get; + knew.put = snd_akm4xxx_stereo_volume_put; } else { - ctl->info = snd_akm4xxx_volume_info; - ctl->get = snd_akm4xxx_volume_get; - ctl->put = snd_akm4xxx_volume_put; + knew.info = snd_akm4xxx_volume_info; + knew.get = snd_akm4xxx_volume_get; + knew.put = snd_akm4xxx_volume_put; } switch (ak->type) { case SND_AK4524: /* register 6 & 7 */ - ctl->private_value = - AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127); + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127) | + AK_VOL_CVT; + knew.tlv.p = db_scale_vol_datt; break; case SND_AK4528: /* register 4 & 5 */ - ctl->private_value = - AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127); + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) | + AK_VOL_CVT; + knew.tlv.p = db_scale_vol_datt; break; case SND_AK4529: { /* registers 2-7 and b,c */ int val = idx < 6 ? idx + 2 : (idx - 6) + 0xb; - ctl->private_value = + knew.private_value = AK_COMPOSE(0, val, 0, 255) | AK_INVERT; + knew.tlv.p = db_scale_8bit; break; } case SND_AK4355: /* register 4-9, chip #0 only */ - ctl->private_value = AK_COMPOSE(0, idx + 4, 0, 255); + knew.private_value = AK_COMPOSE(0, idx + 4, 0, 255); + knew.tlv.p = db_scale_8bit; break; case SND_AK4358: { /* register 4-9 and 11-12, chip #0 only */ int addr = idx < 6 ? idx + 4 : idx + 5; - ctl->private_value = + knew.private_value = AK_COMPOSE(0, addr, 0, 127) | AK_NEEDSMSB; + knew.tlv.p = db_scale_7bit; break; } case SND_AK4381: /* register 3 & 4 */ - ctl->private_value = + knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 3, 0, 255); + knew.tlv.p = db_scale_linear; break; default: - err = -EINVAL; - goto __error; + return -EINVAL; } - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); + err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); if (err < 0) - goto __error; + return err; idx += num_stereo; mixer_ch++; } - for (idx = 0; idx < ak->num_adcs && ak->type == SND_AK4524; ++idx) { - memset(ctl, 0, sizeof(*ctl)); - strcpy(ctl->id.name, "ADC Volume"); - ctl->id.index = idx + ak->idx_offset * 2; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; - ctl->info = snd_akm4xxx_volume_info; - ctl->get = snd_akm4xxx_volume_get; - ctl->put = snd_akm4xxx_volume_put; - /* register 4 & 5 */ - ctl->private_value = - AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127); - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); - if (err < 0) - goto __error; + return 0; +} - memset(ctl, 0, sizeof(*ctl)); - strcpy(ctl->id.name, "IPGA Analog Capture Volume"); - ctl->id.index = idx + ak->idx_offset * 2; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; - ctl->info = snd_akm4xxx_ipga_gain_info; - ctl->get = snd_akm4xxx_ipga_gain_get; - ctl->put = snd_akm4xxx_ipga_gain_put; +static int build_adc_controls(struct snd_akm4xxx *ak) +{ + int idx, err, mixer_ch, num_stereo; + struct snd_kcontrol_new knew; + + mixer_ch = 0; + for (idx = 0; idx < ak->num_adcs;) { + memset(&knew, 0, sizeof(knew)); + if (! ak->adc_info || ! ak->adc_info[mixer_ch].name) { + knew.name = "ADC Volume"; + knew.index = mixer_ch + ak->idx_offset * 2; + num_stereo = 1; + } else { + knew.name = ak->adc_info[mixer_ch].name; + num_stereo = ak->adc_info[mixer_ch].num_channels; + } + knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + knew.count = 1; + knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ; + if (num_stereo == 2) { + knew.info = snd_akm4xxx_stereo_volume_info; + knew.get = snd_akm4xxx_stereo_volume_get; + knew.put = snd_akm4xxx_stereo_volume_put; + } else { + knew.info = snd_akm4xxx_volume_info; + knew.get = snd_akm4xxx_volume_get; + knew.put = snd_akm4xxx_volume_put; + } /* register 4 & 5 */ - ctl->private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, 0); - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) | + AK_VOL_CVT; + knew.tlv.p = db_scale_vol_datt; + err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); if (err < 0) - goto __error; + return err; + + if (! ak->adc_info || ! ak->adc_info[mixer_ch].gain_name) + knew.name = "IPGA Analog Capture Volume"; + else + knew.name = ak->adc_info[mixer_ch].gain_name; + if (num_stereo == 2) { + knew.info = snd_akm4xxx_stereo_gain_info; + knew.get = snd_akm4xxx_stereo_gain_get; + knew.put = snd_akm4xxx_stereo_gain_put; + } else { + knew.info = snd_akm4xxx_ipga_gain_info; + knew.get = snd_akm4xxx_ipga_gain_get; + knew.put = snd_akm4xxx_ipga_gain_put; + } + /* register 4 & 5 */ + if (ak->type == SND_AK4524) + knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, + 24); + else /* AK5365 */ + knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, + 36); + knew.tlv.p = db_scale_ipga; + err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); + if (err < 0) + return err; + + if (ak->type == SND_AK5365 && (idx % 2) == 0) { + if (! ak->adc_info || + ! ak->adc_info[mixer_ch].switch_name) + knew.name = "Capture Switch"; + else + knew.name = ak->adc_info[mixer_ch].switch_name; + knew.info = ak4xxx_switch_info; + knew.get = ak4xxx_switch_get; + knew.put = ak4xxx_switch_put; + knew.access = 0; + /* register 2, bit 0 (SMUTE): 0 = normal operation, + 1 = mute */ + knew.private_value = + AK_COMPOSE(idx/2, 2, 0, 0) | AK_INVERT; + err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); + if (err < 0) + return err; + } + + idx += num_stereo; + mixer_ch++; } + return 0; +} - if (ak->type == SND_AK5365) { - memset(ctl, 0, sizeof(*ctl)); - if (ak->channel_names == NULL) - strcpy(ctl->id.name, "Capture Volume"); - else - strcpy(ctl->id.name, ak->channel_names[0]); - ctl->id.index = ak->idx_offset * 2; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; - ctl->info = snd_akm4xxx_stereo_volume_info; - ctl->get = snd_akm4xxx_stereo_volume_get; - ctl->put = snd_akm4xxx_stereo_volume_put; - /* Registers 4 & 5 (see AK5365 data sheet, pages 34 and 35): - * valid values are from 0x00 (mute) to 0x98 (+12dB). */ - ctl->private_value = - AK_COMPOSE(0, 4, 0, 0x98); - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); - if (err < 0) - goto __error; +static int build_deemphasis(struct snd_akm4xxx *ak, int num_emphs) +{ + int idx, err; + struct snd_kcontrol_new knew; - memset(ctl, 0, sizeof(*ctl)); - if (ak->channel_names == NULL) - strcpy(ctl->id.name, "Capture Switch"); - else - strcpy(ctl->id.name, ak->channel_names[1]); - ctl->id.index = ak->idx_offset * 2; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; - ctl->info = ak4xxx_switch_info; - ctl->get = ak4xxx_switch_get; - ctl->put = ak4xxx_switch_put; - /* register 2, bit 0 (SMUTE): 0 = normal operation, 1 = mute */ - ctl->private_value = - AK_COMPOSE(0, 2, 0, 0) | AK_INVERT; - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); + for (idx = 0; idx < num_emphs; idx++) { + memset(&knew, 0, sizeof(knew)); + knew.name = "Deemphasis"; + knew.index = idx + ak->idx_offset; + knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + knew.count = 1; + knew.info = snd_akm4xxx_deemphasis_info; + knew.get = snd_akm4xxx_deemphasis_get; + knew.put = snd_akm4xxx_deemphasis_put; + switch (ak->type) { + case SND_AK4524: + case SND_AK4528: + /* register 3 */ + knew.private_value = AK_COMPOSE(idx, 3, 0, 0); + break; + case SND_AK4529: { + int shift = idx == 3 ? 6 : (2 - idx) * 2; + /* register 8 with shift */ + knew.private_value = AK_COMPOSE(0, 8, shift, 0); + break; + } + case SND_AK4355: + case SND_AK4358: + knew.private_value = AK_COMPOSE(idx, 3, 0, 0); + break; + case SND_AK4381: + knew.private_value = AK_COMPOSE(idx, 1, 1, 0); + break; + default: + return -EINVAL; + } + err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); if (err < 0) - goto __error; + return err; + } + return 0; +} + +int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) +{ + int err, num_emphs; + + err = build_dac_controls(ak); + if (err < 0) + return err; + + if (ak->type == SND_AK4524 || ak->type == SND_AK5365) { + err = build_adc_controls(ak); + if (err < 0) + return err; } if (ak->type == SND_AK4355 || ak->type == SND_AK4358) num_emphs = 1; else num_emphs = ak->num_dacs / 2; - for (idx = 0; idx < num_emphs; idx++) { - memset(ctl, 0, sizeof(*ctl)); - strcpy(ctl->id.name, "Deemphasis"); - ctl->id.index = idx + ak->idx_offset; - ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; - ctl->count = 1; - ctl->info = snd_akm4xxx_deemphasis_info; - ctl->get = snd_akm4xxx_deemphasis_get; - ctl->put = snd_akm4xxx_deemphasis_put; - switch (ak->type) { - case SND_AK4524: - case SND_AK4528: - /* register 3 */ - ctl->private_value = AK_COMPOSE(idx, 3, 0, 0); - break; - case SND_AK4529: { - int shift = idx == 3 ? 6 : (2 - idx) * 2; - /* register 8 with shift */ - ctl->private_value = AK_COMPOSE(0, 8, shift, 0); - break; - } - case SND_AK4355: - case SND_AK4358: - ctl->private_value = AK_COMPOSE(idx, 3, 0, 0); - break; - case SND_AK4381: - ctl->private_value = AK_COMPOSE(idx, 1, 1, 0); - break; - default: - err = -EINVAL; - goto __error; - } - ctl->private_data = ak; - err = snd_ctl_add(ak->card, - snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ| - SNDRV_CTL_ELEM_ACCESS_WRITE)); - if (err < 0) - goto __error; - } - err = 0; + err = build_deemphasis(ak, num_emphs); + if (err < 0) + return err; - __error: - kfree(ctl); - return err; + return 0; } - + EXPORT_SYMBOL(snd_akm4xxx_build_controls); static int __init alsa_akm4xxx_module_init(void) diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c index 1134a57f9e65..c9eefa9bbfff 100644 --- a/sound/pci/ice1712/revo.c +++ b/sound/pci/ice1712/revo.c @@ -87,19 +87,34 @@ static void revo_set_rate_val(struct snd_akm4xxx *ak, unsigned int rate) * initialize the chips on M-Audio Revolution cards */ -static unsigned int revo71_num_stereo_front[] = {2}; -static char *revo71_channel_names_front[] = {"PCM Playback Volume"}; +#define AK_DAC(xname,xch) { .name = xname, .num_channels = xch } -static unsigned int revo71_num_stereo_surround[] = {1, 1, 2, 2}; -static char *revo71_channel_names_surround[] = {"PCM Center Playback Volume", "PCM LFE Playback Volume", - "PCM Side Playback Volume", "PCM Rear Playback Volume"}; +static struct snd_akm4xxx_dac_channel revo71_front[] = { + AK_DAC("PCM Playback Volume", 2) +}; -static unsigned int revo51_num_stereo[] = {2, 1, 1, 2}; -static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playback Volume", - "PCM LFE Playback Volume", "PCM Rear Playback Volume"}; +static struct snd_akm4xxx_dac_channel revo71_surround[] = { + AK_DAC("PCM Center Playback Volume", 1), + AK_DAC("PCM LFE Playback Volume", 1), + AK_DAC("PCM Side Playback Volume", 2), + AK_DAC("PCM Rear Playback Volume", 2), +}; -static unsigned int revo51_adc_num_stereo[] = {2}; -static char *revo51_adc_channel_names[] = {"PCM Capture Volume","PCM Capture Switch"}; +static struct snd_akm4xxx_dac_channel revo51_dac[] = { + AK_DAC("PCM Playback Volume", 2), + AK_DAC("PCM Center Playback Volume", 1), + AK_DAC("PCM LFE Playback Volume", 1), + AK_DAC("PCM Rear Playback Volume", 2), +}; + +static struct snd_akm4xxx_adc_channel revo51_adc[] = { + { + .name = "PCM Capture Volume", + .gain_name = "PCM Capture Gain Volume", + .switch_name = "PCM Capture Switch", + .num_channels = 2 + }, +}; static struct snd_akm4xxx akm_revo_front __devinitdata = { .type = SND_AK4381, @@ -107,8 +122,7 @@ static struct snd_akm4xxx akm_revo_front __devinitdata = { .ops = { .set_rate_val = revo_set_rate_val }, - .num_stereo = revo71_num_stereo_front, - .channel_names = revo71_channel_names_front + .dac_info = revo71_front, }; static struct snd_ak4xxx_private akm_revo_front_priv __devinitdata = { @@ -130,8 +144,7 @@ static struct snd_akm4xxx akm_revo_surround __devinitdata = { .ops = { .set_rate_val = revo_set_rate_val }, - .num_stereo = revo71_num_stereo_surround, - .channel_names = revo71_channel_names_surround + .dac_info = revo71_surround, }; static struct snd_ak4xxx_private akm_revo_surround_priv __devinitdata = { @@ -152,8 +165,7 @@ static struct snd_akm4xxx akm_revo51 __devinitdata = { .ops = { .set_rate_val = revo_set_rate_val }, - .num_stereo = revo51_num_stereo, - .channel_names = revo51_channel_names + .dac_info = revo51_dac, }; static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = { @@ -171,8 +183,7 @@ static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = { static struct snd_akm4xxx akm_revo51_adc __devinitdata = { .type = SND_AK5365, .num_adcs = 2, - .num_stereo = revo51_adc_num_stereo, - .channel_names = revo51_adc_channel_names + .adc_info = revo51_adc, }; static struct snd_ak4xxx_private akm_revo51_adc_priv __devinitdata = { From 680ef792a1afdb3bf38e4a0296cce996a5b95317 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Aug 2006 16:56:30 +0200 Subject: [PATCH 0987/1063] [ALSA] Add dB scale information to ice1712 driver Added the dB scale information for native digital volumes of ice1712 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ice1712/ice1712.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index 9b8325d65d8d..dc69392eafa3 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -62,6 +62,7 @@ #include #include #include +#include #include @@ -1377,6 +1378,7 @@ static int snd_ice1712_pro_mixer_volume_put(struct snd_kcontrol *kcontrol, struc return change; } +static DECLARE_TLV_DB_SCALE(db_scale_playback, -14400, 150, 0); static struct snd_kcontrol_new snd_ice1712_multi_playback_ctrls[] __devinitdata = { { @@ -1390,12 +1392,15 @@ static struct snd_kcontrol_new snd_ice1712_multi_playback_ctrls[] __devinitdata }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Multi Playback Volume", .info = snd_ice1712_pro_mixer_volume_info, .get = snd_ice1712_pro_mixer_volume_get, .put = snd_ice1712_pro_mixer_volume_put, .private_value = 0, .count = 10, + .tlv = { .p = db_scale_playback } }, }; @@ -1420,11 +1425,14 @@ static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_switch __devinitd static struct snd_kcontrol_new snd_ice1712_multi_capture_analog_volume __devinitdata = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "H/W Multi Capture Volume", .info = snd_ice1712_pro_mixer_volume_info, .get = snd_ice1712_pro_mixer_volume_get, .put = snd_ice1712_pro_mixer_volume_put, .private_value = 10, + .tlv = { .p = db_scale_playback } }; static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_volume __devinitdata = { From f640c3205aca4fe231beccc9e719c946cf3fee7a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Aug 2006 16:57:37 +0200 Subject: [PATCH 0988/1063] [ALSA] Add dB scale information to ice1724 driver Added the dB scale information to each board support code of ice1724 driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ice1712/aureon.c | 104 ++++++++++++++++++++++++++------- sound/pci/ice1712/phase.c | 39 ++++++++++--- sound/pci/ice1712/pontis.c | 9 +++ sound/pci/ice1712/prodigy192.c | 14 +++++ 4 files changed, 139 insertions(+), 27 deletions(-) diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c index 9492f3d2455b..9e76cebd2d22 100644 --- a/sound/pci/ice1712/aureon.c +++ b/sound/pci/ice1712/aureon.c @@ -60,6 +60,7 @@ #include "ice1712.h" #include "envy24ht.h" #include "aureon.h" +#include /* WM8770 registers */ #define WM_DAC_ATTEN 0x00 /* DAC1-8 analog attenuation */ @@ -660,6 +661,12 @@ static int aureon_ac97_mmute_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e return change; } +static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1); +static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1); +static DECLARE_TLV_DB_SCALE(db_scale_wm_adc, -1200, 100, 0); +static DECLARE_TLV_DB_SCALE(db_scale_ac97_master, -4650, 150, 0); +static DECLARE_TLV_DB_SCALE(db_scale_ac97_gain, -3450, 150, 0); + /* * Logarithmic volume values for WM8770 * Computed as 20 * Log10(255 / x) @@ -1409,10 +1416,13 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Master Playback Volume", .info = wm_master_vol_info, .get = wm_master_vol_get, - .put = wm_master_vol_put + .put = wm_master_vol_put, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1424,11 +1434,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Front Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 0 + .private_value = (2 << 8) | 0, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1440,11 +1453,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Rear Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 2 + .private_value = (2 << 8) | 2, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1456,11 +1472,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Center Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (1 << 8) | 4 + .private_value = (1 << 8) | 4, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1472,11 +1491,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "LFE Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (1 << 8) | 5 + .private_value = (1 << 8) | 5, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1488,11 +1510,14 @@ static struct snd_kcontrol_new aureon_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Side Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 6 + .private_value = (2 << 8) | 6, + .tlv = { .p = db_scale_wm_dac } } }; @@ -1506,10 +1531,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "PCM Playback Volume", .info = wm_pcm_vol_info, .get = wm_pcm_vol_get, - .put = wm_pcm_vol_put + .put = wm_pcm_vol_put, + .tlv = { .p = db_scale_wm_pcm } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1520,10 +1548,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Capture Volume", .info = wm_adc_vol_info, .get = wm_adc_vol_get, - .put = wm_adc_vol_put + .put = wm_adc_vol_put, + .tlv = { .p = db_scale_wm_adc } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1567,11 +1598,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "AC97 Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_MASTER|AUREON_AC97_STEREO + .private_value = AC97_MASTER|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_master } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1583,11 +1617,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "CD Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_CD|AUREON_AC97_STEREO + .private_value = AC97_CD|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1599,11 +1636,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Aux Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_AUX|AUREON_AC97_STEREO + .private_value = AC97_AUX|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1615,11 +1655,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Line Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_LINE|AUREON_AC97_STEREO + .private_value = AC97_LINE|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1631,11 +1674,14 @@ static struct snd_kcontrol_new ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Mic Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_MIC + .private_value = AC97_MIC, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1657,11 +1703,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "AC97 Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_MASTER|AUREON_AC97_STEREO + .private_value = AC97_MASTER|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_master } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1673,11 +1722,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "CD Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_AUX|AUREON_AC97_STEREO + .private_value = AC97_AUX|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1685,15 +1737,18 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { .info = aureon_ac97_mute_info, .get = aureon_ac97_mute_get, .put = aureon_ac97_mute_put, - .private_value = AC97_CD, + .private_value = AC97_CD }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Phono Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_CD|AUREON_AC97_STEREO + .private_value = AC97_CD|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1705,11 +1760,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Line Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_LINE|AUREON_AC97_STEREO + .private_value = AC97_LINE|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1721,11 +1779,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Mic Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_MIC + .private_value = AC97_MIC, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1744,11 +1805,14 @@ static struct snd_kcontrol_new universe_ac97_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Aux Playback Volume", .info = aureon_ac97_vol_info, .get = aureon_ac97_vol_get, .put = aureon_ac97_vol_put, - .private_value = AC97_VIDEO|AUREON_AC97_STEREO + .private_value = AC97_VIDEO|AUREON_AC97_STEREO, + .tlv = { .p = db_scale_ac97_gain } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, diff --git a/sound/pci/ice1712/phase.c b/sound/pci/ice1712/phase.c index 502da1c8b5f7..e08d73f4ff85 100644 --- a/sound/pci/ice1712/phase.c +++ b/sound/pci/ice1712/phase.c @@ -46,6 +46,7 @@ #include "ice1712.h" #include "envy24ht.h" #include "phase.h" +#include /* WM8770 registers */ #define WM_DAC_ATTEN 0x00 /* DAC1-8 analog attenuation */ @@ -696,6 +697,9 @@ static int phase28_oversampling_put(struct snd_kcontrol *kcontrol, struct snd_ct return 0; } +static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1); +static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1); + static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -706,10 +710,13 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Master Playback Volume", .info = wm_master_vol_info, .get = wm_master_vol_get, - .put = wm_master_vol_put + .put = wm_master_vol_put, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -721,11 +728,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Front Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 0 + .private_value = (2 << 8) | 0, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -737,11 +747,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Rear Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 2 + .private_value = (2 << 8) | 2, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -753,11 +766,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Center Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (1 << 8) | 4 + .private_value = (1 << 8) | 4, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -769,11 +785,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "LFE Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (1 << 8) | 5 + .private_value = (1 << 8) | 5, + .tlv = { .p = db_scale_wm_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -785,11 +804,14 @@ static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Side Playback Volume", .info = wm_vol_info, .get = wm_vol_get, .put = wm_vol_put, - .private_value = (2 << 8) | 6 + .private_value = (2 << 8) | 6, + .tlv = { .p = db_scale_wm_dac } } }; @@ -803,10 +825,13 @@ static struct snd_kcontrol_new wm_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "PCM Playback Volume", .info = wm_pcm_vol_info, .get = wm_pcm_vol_get, - .put = wm_pcm_vol_put + .put = wm_pcm_vol_put, + .tlv = { .p = db_scale_wm_pcm } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, diff --git a/sound/pci/ice1712/pontis.c b/sound/pci/ice1712/pontis.c index 0efcad9260a5..6c74c2d2e7f3 100644 --- a/sound/pci/ice1712/pontis.c +++ b/sound/pci/ice1712/pontis.c @@ -31,6 +31,7 @@ #include #include +#include #include "ice1712.h" #include "envy24ht.h" @@ -564,6 +565,8 @@ static int pontis_gpio_data_put(struct snd_kcontrol *kcontrol, struct snd_ctl_el return changed; } +static DECLARE_TLV_DB_SCALE(db_scale_volume, -6400, 50, 1); + /* * mixers */ @@ -571,17 +574,23 @@ static int pontis_gpio_data_put(struct snd_kcontrol *kcontrol, struct snd_ctl_el static struct snd_kcontrol_new pontis_controls[] __devinitdata = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "PCM Playback Volume", .info = wm_dac_vol_info, .get = wm_dac_vol_get, .put = wm_dac_vol_put, + .tlv = { .p = db_scale_volume }, }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Capture Volume", .info = wm_adc_vol_info, .get = wm_adc_vol_get, .put = wm_adc_vol_put, + .tlv = { .p = db_scale_volume }, }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c index fdb5cb8fac97..41b2605daa3a 100644 --- a/sound/pci/ice1712/prodigy192.c +++ b/sound/pci/ice1712/prodigy192.c @@ -35,6 +35,7 @@ #include "envy24ht.h" #include "prodigy192.h" #include "stac946x.h" +#include static inline void stac9460_put(struct snd_ice1712 *ice, int reg, unsigned char val) { @@ -356,6 +357,9 @@ static int aureon_oversampling_put(struct snd_kcontrol *kcontrol, struct snd_ctl } #endif +static DECLARE_TLV_DB_SCALE(db_scale_dac, -19125, 75, 0); +static DECLARE_TLV_DB_SCALE(db_scale_adc, 0, 150, 0); + /* * mixers */ @@ -368,14 +372,18 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = { .get = stac9460_dac_mute_get, .put = stac9460_dac_mute_put, .private_value = 1, + .tlv = { .p = db_scale_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Master Playback Volume", .info = stac9460_dac_vol_info, .get = stac9460_dac_vol_get, .put = stac9460_dac_vol_put, .private_value = 1, + .tlv = { .p = db_scale_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -387,11 +395,14 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "DAC Volume", .count = 6, .info = stac9460_dac_vol_info, .get = stac9460_dac_vol_get, .put = stac9460_dac_vol_put, + .tlv = { .p = db_scale_dac } }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -404,11 +415,14 @@ static struct snd_kcontrol_new stac_controls[] __devinitdata = { }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "ADC Volume", .count = 1, .info = stac9460_adc_vol_info, .get = stac9460_adc_vol_get, .put = stac9460_adc_vol_put, + .tlv = { .p = db_scale_adc } }, #if 0 { From 929861c669a443cf667ec0d80ac73a567ed4543c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 31 Aug 2006 16:55:40 +0200 Subject: [PATCH 0989/1063] [ALSA] hda-intel - Remove volatile Removed volatile from the position buffer pointer. Also, use synchronize_irq() instead of unreliable msleep(1) in the driver remove callback. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c9ae9f778928..d56ea2125aa8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -255,7 +255,7 @@ enum { struct azx_dev { u32 *bdl; /* virtual address of the BDL */ dma_addr_t bdl_addr; /* physical address of the BDL */ - volatile u32 *posbuf; /* position buffer pointer */ + u32 *posbuf; /* position buffer pointer */ unsigned int bufsize; /* size of the play buffer in bytes */ unsigned int fragsize; /* size of each period in bytes */ @@ -1197,7 +1197,7 @@ static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) if (chip->position_fix == POS_FIX_POSBUF || chip->position_fix == POS_FIX_AUTO) { /* use the position buffer */ - pos = *azx_dev->posbuf; + pos = le32_to_cpu(*azx_dev->posbuf); if (chip->position_fix == POS_FIX_AUTO && azx_dev->period_intr == 1 && ! pos) { printk(KERN_WARNING @@ -1345,7 +1345,7 @@ static int __devinit azx_init_stream(struct azx *chip) struct azx_dev *azx_dev = &chip->azx_dev[i]; azx_dev->bdl = (u32 *)(chip->bdl.area + off); azx_dev->bdl_addr = chip->bdl.addr + off; - azx_dev->posbuf = (volatile u32 *)(chip->posbuf.area + i * 8); + azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8); /* offset: SDI0=0x80, SDI1=0xa0, ... SDO3=0x160 */ azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80); /* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */ @@ -1417,8 +1417,7 @@ static int azx_free(struct azx *chip) azx_writel(chip, DPLBASE, 0); azx_writel(chip, DPUBASE, 0); - /* wait a little for interrupts to finish */ - msleep(1); + synchronize_irq(chip->irq); } if (chip->irq >= 0) { From 927fc866025857c109219d4ed62d8c3cbc02713a Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 31 Aug 2006 17:03:43 +0200 Subject: [PATCH 0990/1063] [ALSA] sound/pci/hda/intel_hda: small cleanups Cleanup whitespace. Signed-off-by: Pavel Machek Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 52 ++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index d56ea2125aa8..cc50d13ee90c 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -274,8 +274,8 @@ struct azx_dev { /* for sanity check of position buffer */ unsigned int period_intr; - unsigned int opened: 1; - unsigned int running: 1; + unsigned int opened :1; + unsigned int running :1; }; /* CORB/RIRB */ @@ -333,9 +333,9 @@ struct azx { /* flags */ int position_fix; - unsigned int initialized: 1; - unsigned int single_cmd: 1; - unsigned int polling_mode: 1; + unsigned int initialized :1; + unsigned int single_cmd :1; + unsigned int polling_mode :1; }; /* driver types */ @@ -661,14 +661,14 @@ static int azx_reset(struct azx *chip) azx_writeb(chip, GCTL, azx_readb(chip, GCTL) | ICH6_GCTL_RESET); count = 50; - while (! azx_readb(chip, GCTL) && --count) + while (!azx_readb(chip, GCTL) && --count) msleep(1); - /* Brent Chartrand said to wait >= 540us for codecs to intialize */ + /* Brent Chartrand said to wait >= 540us for codecs to initialize */ msleep(1); /* check to see if controller is ready */ - if (! azx_readb(chip, GCTL)) { + if (!azx_readb(chip, GCTL)) { snd_printd("azx_reset: controller not ready!\n"); return -EBUSY; } @@ -677,7 +677,7 @@ static int azx_reset(struct azx *chip) azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN); /* detect codecs */ - if (! chip->codec_mask) { + if (!chip->codec_mask) { chip->codec_mask = azx_readw(chip, STATESTS); snd_printdd("codec_mask = 0x%x\n", chip->codec_mask); } @@ -785,7 +785,7 @@ static void azx_init_chip(struct azx *chip) azx_int_enable(chip); /* initialize the codec command I/O */ - if (! chip->single_cmd) + if (!chip->single_cmd) azx_init_cmd_io(chip); /* program the position buffer */ @@ -813,7 +813,7 @@ static void azx_init_chip(struct azx *chip) /* * interrupt handler */ -static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs) +static irqreturn_t azx_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct azx *chip = dev_id; struct azx_dev *azx_dev; @@ -1018,8 +1018,9 @@ static struct snd_pcm_hardware azx_pcm_hw = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE /*|*/ - /*SNDRV_PCM_INFO_RESUME*/), + /* No full-resume yet implemented */ + /* SNDRV_PCM_INFO_RESUME |*/ + SNDRV_PCM_INFO_PAUSE), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, @@ -1454,19 +1455,19 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, struct azx **rchip) { struct azx *chip; - int err = 0; + int err; static struct snd_device_ops ops = { .dev_free = azx_dev_free, }; *rchip = NULL; - if ((err = pci_enable_device(pci)) < 0) + err = pci_enable_device(pci); + if (err < 0) return err; chip = kzalloc(sizeof(*chip), GFP_KERNEL); - - if (NULL == chip) { + if (!chip) { snd_printk(KERN_ERR SFX "cannot allocate chip\n"); pci_disable_device(pci); return -ENOMEM; @@ -1492,13 +1493,14 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, } #endif - if ((err = pci_request_regions(pci, "ICH HD audio")) < 0) { + err = pci_request_regions(pci, "ICH HD audio"); + if (err < 0) { kfree(chip); pci_disable_device(pci); return err; } - chip->addr = pci_resource_start(pci,0); + chip->addr = pci_resource_start(pci, 0); chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci,0)); if (chip->remap_addr == NULL) { snd_printk(KERN_ERR SFX "ioremap error\n"); @@ -1542,7 +1544,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, } chip->num_streams = chip->playback_streams + chip->capture_streams; chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev), GFP_KERNEL); - if (! chip->azx_dev) { + if (!chip->azx_dev) { snd_printk(KERN_ERR "cannot malloc azx_dev\n"); goto errout; } @@ -1573,7 +1575,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, chip->initialized = 1; /* codec detection */ - if (! chip->codec_mask) { + if (!chip->codec_mask) { snd_printk(KERN_ERR SFX "no codecs found!\n"); err = -ENODEV; goto errout; @@ -1600,16 +1602,16 @@ static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id * { struct snd_card *card; struct azx *chip; - int err = 0; + int err; card = snd_card_new(index, id, THIS_MODULE, 0); - if (NULL == card) { + if (!card) { snd_printk(KERN_ERR SFX "Error creating card!\n"); return -ENOMEM; } - if ((err = azx_create(card, pci, pci_id->driver_data, - &chip)) < 0) { + err = azx_create(card, pci, pci_id->driver_data, &chip); + if (err < 0) { snd_card_free(card); return err; } From 2fd53a7e9b1392f9cc3002a24f3c13b2796e70c3 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Fri, 1 Sep 2006 17:15:36 +0200 Subject: [PATCH 0991/1063] [ALSA] [PPC,SOUND] Fix audio gpio state detection When booting with line out or headphone plugged, you won't hear anything. The problem is that after reset all channels are muted, but the actual value of the gpio port doesn't exactly match the active_val settings as expected by check_audio_gpio. For example, the line_mute port is set to 7, but check_audio_gpio would expect 0xd or 0xf, thus its return value indicates that it is not active, even though it is. AFAICS only looking at the low bit is enough to determine whether the port is active. Signed-off-by: Andreas Schwab Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/ppc/tumbler.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c index 6ae2d5b9aa4a..cdff53e4a17e 100644 --- a/sound/ppc/tumbler.c +++ b/sound/ppc/tumbler.c @@ -190,7 +190,7 @@ static int check_audio_gpio(struct pmac_gpio *gp) ret = do_gpio_read(gp); - return (ret & 0xd) == (gp->active_val & 0xd); + return (ret & 0x1) == (gp->active_val & 0x1); } static int read_audio_gpio(struct pmac_gpio *gp) @@ -198,7 +198,8 @@ static int read_audio_gpio(struct pmac_gpio *gp) int ret; if (! gp->addr) return 0; - ret = ((do_gpio_read(gp) & 0x02) !=0); + ret = do_gpio_read(gp); + ret = (ret & 0x02) !=0; return ret == gp->active_state; } From 35a49934a7180fd80fb0bb3777d125dd939df50e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 1 Sep 2006 17:09:44 +0200 Subject: [PATCH 0992/1063] [ALSA] Add dB scale information to mixart driver Added the dB scale information to mixart driver. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/mixart/mixart_mixer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/pci/mixart/mixart_mixer.c b/sound/pci/mixart/mixart_mixer.c index ed47b732c103..13de0f71d4b7 100644 --- a/sound/pci/mixart/mixart_mixer.c +++ b/sound/pci/mixart/mixart_mixer.c @@ -31,6 +31,7 @@ #include "mixart_core.h" #include "mixart_hwdep.h" #include +#include #include "mixart_mixer.h" static u32 mixart_analog_level[256] = { @@ -388,12 +389,17 @@ static int mixart_analog_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e return changed; } +static DECLARE_TLV_DB_SCALE(db_scale_analog, -9600, 50, 0); + static struct snd_kcontrol_new mixart_control_analog_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), /* name will be filled later */ .info = mixart_analog_vol_info, .get = mixart_analog_vol_get, .put = mixart_analog_vol_put, + .tlv = { .p = db_scale_analog }, }; /* shared */ @@ -866,14 +872,19 @@ static int mixart_pcm_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem return changed; } +static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0); + static struct snd_kcontrol_new snd_mixart_pcm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), /* name will be filled later */ /* count will be filled later */ .info = mixart_digital_vol_info, /* shared */ .get = mixart_pcm_vol_get, .put = mixart_pcm_vol_put, + .tlv = { .p = db_scale_digital }, }; @@ -984,10 +995,13 @@ static int mixart_monitor_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ static struct snd_kcontrol_new mixart_control_monitor_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Monitoring Volume", .info = mixart_digital_vol_info, /* shared */ .get = mixart_monitor_vol_get, .put = mixart_monitor_vol_put, + .tlv = { .p = db_scale_digital }, }; /* From 93ed150375187ae7917ed1e3b9b830b9d4065bad Mon Sep 17 00:00:00 2001 From: Tobin Davis Date: Fri, 1 Sep 2006 21:03:12 +0200 Subject: [PATCH 0993/1063] [ALSA] hda-codec - Add 5 stack audio support for Intel 965 systems This patch renames the 965_2112 function ids to 965_3ST, and adds functional support for 965_5ST (5 stack 7.1 surround). Signed-off-by: Tobin Davis Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- .../sound/alsa/ALSA-Configuration.txt | 10 ++ sound/pci/hda/patch_sigmatel.c | 152 ++++++++++-------- 2 files changed, 91 insertions(+), 71 deletions(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 48d3bdf2a7cd..a788dd7bc790 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -859,6 +859,16 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. laptop-dig ditto with SPDIF auto auto-config reading BIOS (default) + STAC9200/9205/9220/9221/9254 + ref Reference board + 3stack D945 3stack + 5stack D945 5stack + SPDIF + + STAC9227/9228/9229/927x + ref Reference board + 3stack D965 3stack + 5stack D965 5stack + SPDIF + STAC9872 vaio Setup for VAIO FE550G/SZ110 vaio-ar Setup for VAIO AR diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 8d5ad7c0db07..87169032be1f 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -42,9 +42,10 @@ #define STAC_D945GTP3 1 #define STAC_D945GTP5 2 #define STAC_MACMINI 3 -#define STAC_D965_2112 4 -#define STAC_D965_284B 5 -#define STAC_922X_MODELS 6 /* number of 922x models */ +#define STAC_922X_MODELS 4 /* number of 922x models */ +#define STAC_D965_3ST 4 +#define STAC_D965_5ST 5 +#define STAC_927X_MODELS 6 /* number of 922x models */ struct sigmatel_spec { struct snd_kcontrol_new *mixers[4]; @@ -111,24 +112,10 @@ static hda_nid_t stac922x_adc_nids[2] = { 0x06, 0x07, }; -static hda_nid_t stac9227_adc_nids[2] = { - 0x07, 0x08, -}; - -#if 0 -static hda_nid_t d965_2112_dac_nids[3] = { - 0x02, 0x03, 0x05, -}; -#endif - static hda_nid_t stac922x_mux_nids[2] = { 0x12, 0x13, }; -static hda_nid_t stac9227_mux_nids[2] = { - 0x15, 0x16, -}; - static hda_nid_t stac927x_adc_nids[3] = { 0x07, 0x08, 0x09 }; @@ -146,7 +133,8 @@ static hda_nid_t stac9205_mux_nids[2] = { }; static hda_nid_t stac9200_pin_nids[8] = { - 0x08, 0x09, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, + 0x08, 0x09, 0x0d, 0x0e, + 0x0f, 0x10, 0x11, 0x12, }; static hda_nid_t stac922x_pin_nids[10] = { @@ -206,17 +194,9 @@ static struct hda_verb stac922x_core_init[] = { {} }; -static struct hda_verb stac9227_core_init[] = { +static struct hda_verb d965_core_init[] = { /* set master volume and direct control */ - { 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff}, - /* unmute node 0x1b */ - { 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000}, - {} -}; - -static struct hda_verb d965_2112_core_init[] = { - /* set master volume and direct control */ - { 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff}, + { 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff}, /* unmute node 0x1b */ { 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000}, /* select node 0x03 as DAC */ @@ -386,6 +366,8 @@ static unsigned int *stac922x_brd_tbl[STAC_922X_MODELS] = { }; static struct hda_board_config stac922x_cfg_tbl[] = { + { .modelname = "5stack", .config = STAC_D945GTP5 }, + { .modelname = "3stack", .config = STAC_D945GTP3 }, { .modelname = "ref", .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2668, /* DFI LanParty */ @@ -471,99 +453,127 @@ static struct hda_board_config stac922x_cfg_tbl[] = { { .pci_subvendor = 0x8384, .pci_subdevice = 0x7680, .config = STAC_MACMINI }, /* Apple Mac Mini (early 2006) */ - { .pci_subvendor = PCI_VENDOR_ID_INTEL, - .pci_subdevice = 0x2112, - .config = STAC_D965_2112 }, - { .pci_subvendor = PCI_VENDOR_ID_INTEL, - .pci_subdevice = 0x284b, - .config = STAC_D965_284B }, {} /* terminator */ }; static unsigned int ref927x_pin_configs[14] = { - 0x01813122, 0x01a19021, 0x01014010, 0x01016011, - 0x01012012, 0x01011014, 0x40000100, 0x40000100, - 0x40000100, 0x40000100, 0x40000100, 0x01441030, - 0x01c41030, 0x40000100, + 0x02214020, 0x02a19080, 0x0181304e, 0x01014010, + 0x01a19040, 0x01011012, 0x01016011, 0x0101201f, + 0x183301f0, 0x18a001f0, 0x18a001f0, 0x01442070, + 0x01c42190, 0x40000100, }; -static unsigned int d965_2112_pin_configs[14] = { +static unsigned int d965_3st_pin_configs[14] = { 0x0221401f, 0x02a19120, 0x40000100, 0x01014011, 0x01a19021, 0x01813024, 0x40000100, 0x40000100, 0x40000100, 0x40000100, 0x40000100, 0x40000100, 0x40000100, 0x40000100 }; -static unsigned int *stac927x_brd_tbl[] = { +static unsigned int d965_5st_pin_configs[14] = { + 0x02214020, 0x02a19080, 0x0181304e, 0x01014010, + 0x01a19040, 0x01011012, 0x01016011, 0x40000100, + 0x40000100, 0x40000100, 0x40000100, 0x01442070, + 0x40000100, 0x40000100 +}; + +static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = { [STAC_REF] = ref927x_pin_configs, - [STAC_D965_2112] = d965_2112_pin_configs, + [STAC_D965_3ST] = d965_3st_pin_configs, + [STAC_D965_5ST] = d965_5st_pin_configs, }; static struct hda_board_config stac927x_cfg_tbl[] = { + { .modelname = "5stack", .config = STAC_D965_5ST }, + { .modelname = "3stack", .config = STAC_D965_3ST }, { .modelname = "ref", .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2668, /* DFI LanParty */ .config = STAC_REF }, /* SigmaTel reference board */ - /* SigmaTel 9227 reference board */ - { .pci_subvendor = PCI_VENDOR_ID_INTEL, - .pci_subdevice = 0x284b, - .config = STAC_D965_284B }, /* Intel 946 based systems */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x3d01, - .config = STAC_D965_2112 }, /* D946 configuration */ + .config = STAC_D965_3ST }, /* D946 configuration */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0xa301, - .config = STAC_D965_2112 }, /* Intel D946GZT - 3 stack */ - /* 965 based systems */ + .config = STAC_D965_3ST }, /* Intel D946GZT - 3 stack */ + /* 965 based 3 stack systems */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2116, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2115, - .config = STAC_D965_2112 }, /* Intel DQ965WC - 3 Stack */ + .config = STAC_D965_3ST }, /* Intel DQ965WC - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2114, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2113, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2112, - .config = STAC_D965_2112 }, /* Intel DG965MS - 3 Stack */ + .config = STAC_D965_3ST }, /* Intel DG965MS - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2111, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2110, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2009, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2008, - .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stack */ + .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2007, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2006, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2005, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2004, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2003, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2002, - .config = STAC_D965_2112 }, /* Intel D965 3Stack config */ + .config = STAC_D965_3ST }, /* Intel D965 3Stack config */ { .pci_subvendor = PCI_VENDOR_ID_INTEL, .pci_subdevice = 0x2001, - .config = STAC_D965_2112 }, /* Intel DQ965GF - 3 Stackg */ + .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack */ + /* 965 based 5 stack systems */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2301, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2302, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2303, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2304, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2305, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2501, + .config = STAC_D965_5ST }, /* Intel DG965MQ - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2502, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2503, + .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */ + { .pci_subvendor = PCI_VENDOR_ID_INTEL, + .pci_subdevice = 0x2504, + .config = STAC_D965_5ST }, /* Intel DQ965GF - 5 Stack */ {} /* terminator */ }; @@ -1546,18 +1556,18 @@ static int patch_stac927x(struct hda_codec *codec) } switch (spec->board_config) { - case STAC_D965_2112: + case STAC_D965_3ST: spec->adc_nids = stac927x_adc_nids; spec->mux_nids = stac927x_mux_nids; spec->num_muxes = 3; - spec->init = d965_2112_core_init; + spec->init = d965_core_init; spec->mixer = stac9227_mixer; break; - case STAC_D965_284B: - spec->adc_nids = stac9227_adc_nids; - spec->mux_nids = stac9227_mux_nids; - spec->num_muxes = 2; - spec->init = stac9227_core_init; + case STAC_D965_5ST: + spec->adc_nids = stac927x_adc_nids; + spec->mux_nids = stac927x_mux_nids; + spec->num_muxes = 3; + spec->init = d965_core_init; spec->mixer = stac9227_mixer; break; default: From bd25b7cae1e763b292f359170e16bccd01c7ee5c Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Mon, 4 Sep 2006 12:28:24 +0200 Subject: [PATCH 0994/1063] [ALSA] ac97: Fix AD1819 volume range AD1819 volume registers can hold extra bits which do not affect the actual volume. Add a res_table to the codec patch to fix the problem. PCM, line and mic volume were tested. Signed-off-by: Ville Syrjala Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_patch.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index bdd7f89234f6..9be4ceb6838e 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1395,6 +1395,17 @@ static void ad1888_resume(struct snd_ac97 *ac97) #endif +static const struct snd_ac97_res_table ad1819_restbl[] = { + { AC97_PHONE, 0x9f1f }, + { AC97_MIC, 0x9f1f }, + { AC97_LINE, 0x9f1f }, + { AC97_CD, 0x9f1f }, + { AC97_VIDEO, 0x9f1f }, + { AC97_AUX, 0x9f1f }, + { AC97_PCM, 0x9f1f }, + { } /* terminator */ +}; + int patch_ad1819(struct snd_ac97 * ac97) { unsigned short scfg; @@ -1402,6 +1413,7 @@ int patch_ad1819(struct snd_ac97 * ac97) // patch for Analog Devices scfg = snd_ac97_read(ac97, AC97_AD_SERIAL_CFG); snd_ac97_write_cache(ac97, AC97_AD_SERIAL_CFG, scfg | 0x7000); /* select all codecs */ + ac97->res_table = ad1819_restbl; return 0; } From 679e28eef835cbd30de78c2f80bf488cba1b7e40 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Mon, 4 Sep 2006 12:28:51 +0200 Subject: [PATCH 0995/1063] [ALSA] es1968: Fix hw volume Fix maestro2 hardware volume control. Tested on a Dell Inspiron 7000. Signed-off-by: Ville Syrjala Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/es1968.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index 3c5ab7c2e72d..f3c40385c87d 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -1905,7 +1905,7 @@ static void es1968_update_hw_volume(unsigned long private_data) /* Figure out which volume control button was pushed, based on differences from the default register values. */ - x = inb(chip->io_port + 0x1c); + x = inb(chip->io_port + 0x1c) & 0xee; /* Reset the volume control registers. */ outb(0x88, chip->io_port + 0x1c); outb(0x88, chip->io_port + 0x1d); @@ -1921,7 +1921,8 @@ static void es1968_update_hw_volume(unsigned long private_data) /* FIXME: we can't call snd_ac97_* functions since here is in tasklet. */ spin_lock_irqsave(&chip->ac97_lock, flags); val = chip->ac97->regs[AC97_MASTER]; - if (x & 1) { + switch (x) { + case 0x88: /* mute */ val ^= 0x8000; chip->ac97->regs[AC97_MASTER] = val; @@ -1929,26 +1930,31 @@ static void es1968_update_hw_volume(unsigned long private_data) outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX); snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE, &chip->master_switch->id); - } else { - val &= 0x7fff; - if (((x>>1) & 7) > 4) { - /* volume up */ - if ((val & 0xff) > 0) - val--; - if ((val & 0xff00) > 0) - val -= 0x0100; - } else { - /* volume down */ - if ((val & 0xff) < 0x1f) - val++; - if ((val & 0xff00) < 0x1f00) - val += 0x0100; - } + break; + case 0xaa: + /* volume up */ + if ((val & 0x7f) > 0) + val--; + if ((val & 0x7f00) > 0) + val -= 0x0100; chip->ac97->regs[AC97_MASTER] = val; outw(val, chip->io_port + ESM_AC97_DATA); outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX); snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE, &chip->master_volume->id); + break; + case 0x66: + /* volume down */ + if ((val & 0x7f) < 0x1f) + val++; + if ((val & 0x7f00) < 0x1f00) + val += 0x0100; + chip->ac97->regs[AC97_MASTER] = val; + outw(val, chip->io_port + ESM_AC97_DATA); + outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX); + snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE, + &chip->master_volume->id); + break; } spin_unlock_irqrestore(&chip->ac97_lock, flags); } From ea543f1ee61bbfdf6cac4b79d66c7840d5b00037 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Tue, 5 Sep 2006 20:25:05 +0200 Subject: [PATCH 0996/1063] [ALSA] sparc dbri: SMP fixes The dbri driver hangs when used in kernel compiled with SMP support due to inproper locking. The patch fixes it. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/sparc/dbri.c | 65 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 82d5e8072f2b..e4935fca12df 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -635,10 +635,16 @@ to send them to the DBRI. static void dbri_cmdwait(struct snd_dbri *dbri) { int maxloops = MAXLOOPS; + unsigned long flags; /* Delay if previous commands are still being processed */ - while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P)) + spin_lock_irqsave(&dbri->lock, flags); + while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P)) { + spin_unlock_irqrestore(&dbri->lock, flags); msleep_interruptible(1); + spin_lock_irqsave(&dbri->lock, flags); + } + spin_unlock_irqrestore(&dbri->lock, flags); if (maxloops == 0) { printk(KERN_ERR "DBRI: Chip never completed command buffer\n"); @@ -671,11 +677,12 @@ static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len) * the last WAIT cmd and force DBRI to reread the cmd. * The JUMP cmd points to the new cmd string. * It also releases the cmdlock spinlock. + * + * Lock must not be held before calling this. */ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) { s32 tmp, addr; - unsigned long flags; static int wait_id = 0; wait_id++; @@ -706,12 +713,10 @@ static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len) } #endif - spin_lock_irqsave(&dbri->lock, flags); /* Reread the last command */ tmp = sbus_readl(dbri->regs + REG0); tmp |= D_P; sbus_writel(tmp, dbri->regs + REG0); - spin_unlock_irqrestore(&dbri->lock, flags); dbri->cmdptr = cmd; spin_unlock(&dbri->cmdlock); @@ -777,9 +782,9 @@ static void dbri_initialize(struct snd_dbri * dbri) dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0); sbus_writel(dma_addr, dbri->regs + REG8); spin_unlock(&dbri->cmdlock); - dbri_cmdwait(dbri); spin_unlock_irqrestore(&dbri->lock, flags); + dbri_cmdwait(dbri); } /* @@ -840,6 +845,9 @@ static void reset_pipe(struct snd_dbri * dbri, int pipe) dbri->pipes[pipe].first_desc = -1; } +/* + * Lock must be held before calling this. + */ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp) { if (pipe < 0 || pipe > DBRI_MAX_PIPE) { @@ -866,6 +874,9 @@ static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp) reset_pipe(dbri, pipe); } +/* + * Lock must be held before calling this. + */ static void link_time_slot(struct snd_dbri * dbri, int pipe, int prevpipe, int nextpipe, int length, int cycle) @@ -920,6 +931,10 @@ static void link_time_slot(struct snd_dbri * dbri, int pipe, dbri_cmdsend(dbri, cmd, 4); } +#if 0 +/* + * Lock must be held before calling this. + */ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, enum in_or_out direction, int prevpipe, int nextpipe) @@ -952,6 +967,7 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, dbri_cmdsend(dbri, cmd, 4); } +#endif /* xmit_fixed() / recv_fixed() * @@ -965,11 +981,14 @@ static void unlink_time_slot(struct snd_dbri * dbri, int pipe, * the actual time slot is. The interrupt handler takes care of bit * ordering and alignment. An 8-bit time slot will always end up * in the low-order 8 bits, filled either MSB-first or LSB-first, - * depending on the settings passed to setup_pipe() + * depending on the settings passed to setup_pipe(). + * + * Lock must not be held before calling it. */ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) { s32 *cmd; + unsigned long flags; if (pipe < 16 || pipe > DBRI_MAX_PIPE) { printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n"); @@ -1002,8 +1021,11 @@ static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data) *(cmd++) = data; *(cmd++) = DBRI_CMD(D_PAUSE, 0, 0); + spin_lock_irqsave(&dbri->lock, flags); dbri_cmdsend(dbri, cmd, 3); + spin_unlock_irqrestore(&dbri->lock, flags); dbri_cmdwait(dbri); + } static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr) @@ -1039,6 +1061,8 @@ static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr) * be spread across multiple descriptors. * * All descriptors create a ring buffer. + * + * Lock must be held before calling this. */ static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period) { @@ -1186,6 +1210,9 @@ multiplexed serial interface which the DBRI can operate in either master enum master_or_slave { CHImaster, CHIslave }; +/* + * Lock must not be held before calling it. + */ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_slave, int bits_per_frame) { @@ -1258,9 +1285,14 @@ static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_sla In the standard SPARC audio configuration, the CS4215 codec is attached to the DBRI via the CHI interface and few of the DBRI's PIO pins. + * Lock must not be held before calling it. + */ static void cs4215_setup_pipes(struct snd_dbri * dbri) { + unsigned long flags; + + spin_lock_irqsave(&dbri->lock, flags); /* * Data mode: * Pipe 4: Send timeslots 1-4 (audio data) @@ -1284,6 +1316,7 @@ static void cs4215_setup_pipes(struct snd_dbri * dbri) setup_pipe(dbri, 17, D_SDP_FIXED | D_SDP_TO_SER | D_SDP_MSB); setup_pipe(dbri, 18, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB); setup_pipe(dbri, 19, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB); + spin_unlock_irqrestore(&dbri->lock, flags); dbri_cmdwait(dbri); } @@ -1358,6 +1391,7 @@ static void cs4215_open(struct snd_dbri * dbri) { int data_width; u32 tmp; + unsigned long flags; dprintk(D_MM, "cs4215_open: %d channels, %d bits\n", dbri->mm.channels, dbri->mm.precision); @@ -1382,6 +1416,7 @@ static void cs4215_open(struct snd_dbri * dbri) * bits. The CS4215, it seems, observes TSIN (the delayed signal) * even if it's the CHI master. Don't ask me... */ + spin_lock_irqsave(&dbri->lock, flags); tmp = sbus_readl(dbri->regs + REG0); tmp &= ~(D_C); /* Disable CHI */ sbus_writel(tmp, dbri->regs + REG0); @@ -1409,6 +1444,7 @@ static void cs4215_open(struct snd_dbri * dbri) tmp = sbus_readl(dbri->regs + REG0); tmp |= D_C; /* Enable CHI */ sbus_writel(tmp, dbri->regs + REG0); + spin_unlock_irqrestore(&dbri->lock, flags); cs4215_setdata(dbri, 0); } @@ -1420,6 +1456,7 @@ static int cs4215_setctrl(struct snd_dbri * dbri) { int i, val; u32 tmp; + unsigned long flags; /* FIXME - let the CPU do something useful during these delays */ @@ -1456,6 +1493,7 @@ static int cs4215_setctrl(struct snd_dbri * dbri) * done in hardware by a TI 248 that delays the DBRI->4215 * frame sync signal by eight clock cycles. Anybody know why? */ + spin_lock_irqsave(&dbri->lock, flags); tmp = sbus_readl(dbri->regs + REG0); tmp &= ~D_C; /* Disable CHI */ sbus_writel(tmp, dbri->regs + REG0); @@ -1472,14 +1510,17 @@ static int cs4215_setctrl(struct snd_dbri * dbri) link_time_slot(dbri, 17, 16, 16, 32, dbri->mm.offset); link_time_slot(dbri, 18, 16, 16, 8, dbri->mm.offset); link_time_slot(dbri, 19, 18, 16, 8, dbri->mm.offset + 48); + spin_unlock_irqrestore(&dbri->lock, flags); /* Wait for the chip to echo back CLB (Control Latch Bit) as zero */ dbri->mm.ctrl[0] &= ~CS4215_CLB; xmit_fixed(dbri, 17, *(int *)dbri->mm.ctrl); + spin_lock_irqsave(&dbri->lock, flags); tmp = sbus_readl(dbri->regs + REG0); tmp |= D_C; /* Enable CHI */ sbus_writel(tmp, dbri->regs + REG0); + spin_unlock_irqrestore(&dbri->lock, flags); for (i = 10; ((dbri->mm.status & 0xe4) != 0x20); --i) { msleep_interruptible(1); @@ -1688,6 +1729,7 @@ static void xmit_descs(struct snd_dbri *dbri) dbri->pipes[info->pipe].desc = first_td; } } + spin_unlock_irqrestore(&dbri->lock, flags); } @@ -2093,7 +2135,6 @@ static int snd_dbri_prepare(struct snd_pcm_substream *substream) { struct snd_dbri *dbri = snd_pcm_substream_chip(substream); struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream); - struct snd_pcm_runtime *runtime = substream->runtime; int ret; info->size = snd_pcm_lib_buffer_bytes(substream); @@ -2232,7 +2273,6 @@ static int snd_cs4215_put_volume(struct snd_kcontrol *kcontrol, { struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol); struct dbri_streaminfo *info = &dbri->stream_info[kcontrol->private_value]; - unsigned long flags; int changed = 0; if (info->left_gain != ucontrol->value.integer.value[0]) { @@ -2247,13 +2287,9 @@ static int snd_cs4215_put_volume(struct snd_kcontrol *kcontrol, /* First mute outputs, and wait 1/8000 sec (125 us) * to make sure this takes. This avoids clicking noises. */ - spin_lock_irqsave(&dbri->lock, flags); - cs4215_setdata(dbri, 1); udelay(125); cs4215_setdata(dbri, 0); - - spin_unlock_irqrestore(&dbri->lock, flags); } return changed; } @@ -2300,7 +2336,6 @@ static int snd_cs4215_put_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol); - unsigned long flags; int elem = kcontrol->private_value & 0xff; int shift = (kcontrol->private_value >> 8) & 0xff; int mask = (kcontrol->private_value >> 16) & 0xff; @@ -2333,13 +2368,9 @@ static int snd_cs4215_put_single(struct snd_kcontrol *kcontrol, /* First mute outputs, and wait 1/8000 sec (125 us) * to make sure this takes. This avoids clicking noises. */ - spin_lock_irqsave(&dbri->lock, flags); - cs4215_setdata(dbri, 1); udelay(125); cs4215_setdata(dbri, 0); - - spin_unlock_irqrestore(&dbri->lock, flags); } return changed; } From 311e70a4741c736795da082da7290164d9cf3726 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 12:13:37 +0200 Subject: [PATCH 0997/1063] [ALSA] hdsp - Fix auto-updating of firmware Fixed the auto-updating of firmware if the breakout box was switched off/on. The firmware binary itself was already cached but it wasn't loaded properly. Also, request_firmware() is issued if the box was with firmware at module loading time but later it's erased. The auto-update is triggered at each PCM action (open, prepare, etc) and at opening proc files. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/rme9652/hdsp.c | 48 +++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index e5a52da77b85..d3e07de433b0 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -726,22 +726,36 @@ static int hdsp_get_iobox_version (struct hdsp *hdsp) } -static int hdsp_check_for_firmware (struct hdsp *hdsp, int show_err) +#ifdef HDSP_FW_LOADER +static int __devinit hdsp_request_fw_loader(struct hdsp *hdsp); +#endif + +static int hdsp_check_for_firmware (struct hdsp *hdsp, int load_on_demand) { - if (hdsp->io_type == H9652 || hdsp->io_type == H9632) return 0; + if (hdsp->io_type == H9652 || hdsp->io_type == H9632) + return 0; if ((hdsp_read (hdsp, HDSP_statusRegister) & HDSP_DllError) != 0) { - snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n"); hdsp->state &= ~HDSP_FirmwareLoaded; - if (! show_err) + if (! load_on_demand) return -EIO; + snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n"); /* try to load firmware */ - if (hdsp->state & HDSP_FirmwareCached) { - if (snd_hdsp_load_firmware_from_cache(hdsp) != 0) - snd_printk(KERN_ERR "Hammerfall-DSP: Firmware loading from cache failed, please upload manually.\n"); - } else { - snd_printk(KERN_ERR "Hammerfall-DSP: No firmware loaded nor cached, please upload firmware.\n"); + if (! (hdsp->state & HDSP_FirmwareCached)) { +#ifdef HDSP_FW_LOADER + if (! hdsp_request_fw_loader(hdsp)) + return 0; +#endif + snd_printk(KERN_ERR + "Hammerfall-DSP: No firmware loaded nor " + "cached, please upload firmware.\n"); + return -EIO; + } + if (snd_hdsp_load_firmware_from_cache(hdsp) != 0) { + snd_printk(KERN_ERR + "Hammerfall-DSP: Firmware loading from " + "cache failed, please upload manually.\n"); + return -EIO; } - return -EIO; } return 0; } @@ -3181,8 +3195,16 @@ snd_hdsp_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) return; } } else { - snd_iprintf(buffer, "No firmware loaded nor cached, please upload firmware.\n"); - return; + int err = -EINVAL; +#ifdef HDSP_FW_LOADER + err = hdsp_request_fw_loader(hdsp); +#endif + if (err < 0) { + snd_iprintf(buffer, + "No firmware loaded nor cached, " + "please upload firmware.\n"); + return; + } } } @@ -3851,7 +3873,7 @@ static int snd_hdsp_trigger(struct snd_pcm_substream *substream, int cmd) if (hdsp_check_for_iobox (hdsp)) return -EIO; - if (hdsp_check_for_firmware(hdsp, 1)) + if (hdsp_check_for_firmware(hdsp, 0)) /* no auto-loading in trigger */ return -EIO; spin_lock(&hdsp->lock); From 55a29af5ed5d914f017e6a7c613a4d7cc34f82d9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 12:15:34 +0200 Subject: [PATCH 0998/1063] [ALSA] Add definition of TLV dB range compound Added the definition of TLV dB range compound. It contains one or more dB-range or linear-volume TLV entries with min/max ranges. Used for volume controls with non-linear curves. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/tlv.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/include/sound/tlv.h b/include/sound/tlv.h index 7905841643df..d93a96b91875 100644 --- a/include/sound/tlv.h +++ b/include/sound/tlv.h @@ -34,19 +34,26 @@ #define SNDRV_CTL_TLVT_CONTAINER 0 /* one level down - group of TLVs */ #define SNDRV_CTL_TLVT_DB_SCALE 1 /* dB scale */ #define SNDRV_CTL_TLVT_DB_LINEAR 2 /* linear volume */ +#define SNDRV_CTL_TLVT_DB_RANGE 3 /* dB range container */ +#define TLV_DB_SCALE_ITEM(min, step, mute) \ + SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \ + (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \ -unsigned int name[] = { \ - SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int), \ - (min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0) \ -} + unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) } /* linear volume between min_dB and max_dB (.01dB unit) */ +#define TLV_DB_LINEAR_ITEM(min_dB, max_dB) \ + SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \ + (min_dB), (max_dB) #define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB) \ -unsigned int name[] = { \ - SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \ - (min_dB), (max_dB) \ -} + unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) } + +/* dB range container */ +/* Each item is: */ +/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */ +#define TLV_DB_RANGE_HEAD(num) \ + SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int) #define TLV_DB_GAIN_MUTE -9999999 From 0b59397268ed418e139db3806f7956ffcb18b33d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 13:35:27 +0200 Subject: [PATCH 0999/1063] [ALSA] Add dB information to es1938 driver Added the dB information to ESS Solo (es1938) driver. The new compound dB range TLVs are used for non-linear native volume controls. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/es1938.c | 103 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c index cc0f34f68185..3784088bea84 100644 --- a/sound/pci/es1938.c +++ b/sound/pci/es1938.c @@ -62,6 +62,7 @@ #include #include #include +#include #include @@ -1164,6 +1165,14 @@ static int snd_es1938_reg_read(struct es1938 *chip, unsigned char reg) return snd_es1938_read(chip, reg); } +#define ES1938_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\ + .name = xname, .index = xindex, \ + .info = snd_es1938_info_single, \ + .get = snd_es1938_get_single, .put = snd_es1938_put_single, \ + .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \ + .tlv = { .p = xtlv } } #define ES1938_SINGLE(xname, xindex, reg, shift, mask, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_es1938_info_single, \ @@ -1217,6 +1226,14 @@ static int snd_es1938_put_single(struct snd_kcontrol *kcontrol, return snd_es1938_reg_bits(chip, reg, mask, val) != val; } +#define ES1938_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\ + .name = xname, .index = xindex, \ + .info = snd_es1938_info_double, \ + .get = snd_es1938_get_double, .put = snd_es1938_put_double, \ + .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \ + .tlv = { .p = xtlv } } #define ES1938_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \ .info = snd_es1938_info_double, \ @@ -1297,8 +1314,41 @@ static int snd_es1938_put_double(struct snd_kcontrol *kcontrol, return change; } +static unsigned int db_scale_master[] = { + TLV_DB_RANGE_HEAD(2), + 0, 54, TLV_DB_SCALE_ITEM(-3600, 50, 1), + 54, 63, TLV_DB_SCALE_ITEM(-900, 100, 0), +}; + +static unsigned int db_scale_audio1[] = { + TLV_DB_RANGE_HEAD(2), + 0, 8, TLV_DB_SCALE_ITEM(-3300, 300, 1), + 8, 15, TLV_DB_SCALE_ITEM(-900, 150, 0), +}; + +static unsigned int db_scale_audio2[] = { + TLV_DB_RANGE_HEAD(2), + 0, 8, TLV_DB_SCALE_ITEM(-3450, 300, 1), + 8, 15, TLV_DB_SCALE_ITEM(-1050, 150, 0), +}; + +static unsigned int db_scale_mic[] = { + TLV_DB_RANGE_HEAD(2), + 0, 8, TLV_DB_SCALE_ITEM(-2400, 300, 1), + 8, 15, TLV_DB_SCALE_ITEM(0, 150, 0), +}; + +static unsigned int db_scale_line[] = { + TLV_DB_RANGE_HEAD(2), + 0, 8, TLV_DB_SCALE_ITEM(-3150, 300, 1), + 8, 15, TLV_DB_SCALE_ITEM(-750, 150, 0), +}; + +static DECLARE_TLV_DB_SCALE(db_scale_capture, 0, 150, 0); + static struct snd_kcontrol_new snd_es1938_controls[] = { -ES1938_DOUBLE("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0), +ES1938_DOUBLE_TLV("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0, + db_scale_master), ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -1309,19 +1359,28 @@ ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1), }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Hardware Master Playback Switch", .access = SNDRV_CTL_ELEM_ACCESS_READ, .info = snd_es1938_info_hw_switch, .get = snd_es1938_get_hw_switch, + .tlv = { .p = db_scale_master }, }, ES1938_SINGLE("Hardware Volume Split", 0, 0x64, 7, 1, 0), -ES1938_DOUBLE("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0), +ES1938_DOUBLE_TLV("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0, + db_scale_line), ES1938_DOUBLE("CD Playback Volume", 0, 0x38, 0x38, 4, 0, 15, 0), -ES1938_DOUBLE("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0), -ES1938_DOUBLE("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0), -ES1938_DOUBLE("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0), -ES1938_DOUBLE("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0), -ES1938_DOUBLE("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0), +ES1938_DOUBLE_TLV("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0, + db_scale_mic), +ES1938_DOUBLE_TLV("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0, + db_scale_mic), +ES1938_DOUBLE_TLV("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0, + db_scale_capture), ES1938_SINGLE("PC Speaker Volume", 0, 0x3c, 0, 7, 0), ES1938_SINGLE("Record Monitor", 0, 0xa8, 3, 1, 0), ES1938_SINGLE("Capture Switch", 0, 0x1c, 4, 1, 1), @@ -1332,16 +1391,26 @@ ES1938_SINGLE("Capture Switch", 0, 0x1c, 4, 1, 1), .get = snd_es1938_get_mux, .put = snd_es1938_put_mux, }, -ES1938_DOUBLE("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0), -ES1938_DOUBLE("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0), -ES1938_DOUBLE("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0), -ES1938_DOUBLE("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0), -ES1938_DOUBLE("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0), -ES1938_DOUBLE("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0), -ES1938_DOUBLE("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0), -ES1938_DOUBLE("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0), -ES1938_DOUBLE("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0), -ES1938_DOUBLE("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0), +ES1938_DOUBLE_TLV("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0, + db_scale_audio2), +ES1938_DOUBLE_TLV("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0, + db_scale_mic), +ES1938_DOUBLE_TLV("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0, + db_scale_mic), +ES1938_DOUBLE_TLV("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0, + db_scale_line), +ES1938_DOUBLE_TLV("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0, + db_scale_audio2), +ES1938_DOUBLE_TLV("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0, + db_scale_audio1), ES1938_SINGLE("3D Control - Level", 0, 0x52, 0, 63, 0), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, From 160ea0dc6b86e2c0c4d325c06bf402bfdde7c1c7 Mon Sep 17 00:00:00 2001 From: Richard Fish Date: Wed, 6 Sep 2006 13:58:25 +0200 Subject: [PATCH 1000/1063] [ALSA] [snd-intel-hda] enable center/LFE speaker on some laptops This patch adds LFE mixer controls for laptops with a stac9200 and a mono speaker pin with amplifier. Signed-off-by: Richard Fish Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 87169032be1f..bcbbe111ab95 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1223,6 +1223,66 @@ static int stac9200_auto_create_hp_ctls(struct hda_codec *codec, return 0; } +/* add playback controls for LFE output */ +static int stac9200_auto_create_lfe_ctls(struct hda_codec *codec, + struct auto_pin_cfg *cfg) +{ + struct sigmatel_spec *spec = codec->spec; + int err; + hda_nid_t lfe_pin = 0x0; + int i; + + /* + * search speaker outs and line outs for a mono speaker pin + * with an amp. If one is found, add LFE controls + * for it. + */ + for (i = 0; i < spec->autocfg.speaker_outs && lfe_pin == 0x0; i++) { + hda_nid_t pin = spec->autocfg.speaker_pins[i]; + unsigned long wcaps = get_wcaps(codec, pin); + wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP); + if (wcaps == AC_WCAP_OUT_AMP) + /* found a mono speaker with an amp, must be lfe */ + lfe_pin = pin; + } + + /* if speaker_outs is 0, then speakers may be in line_outs */ + if (lfe_pin == 0 && spec->autocfg.speaker_outs == 0) { + for (i = 0; i < spec->autocfg.line_outs && lfe_pin == 0x0; i++) { + hda_nid_t pin = spec->autocfg.line_out_pins[i]; + unsigned long cfg; + cfg = snd_hda_codec_read(codec, pin, 0, + AC_VERB_GET_CONFIG_DEFAULT, + 0x00); + if (get_defcfg_device(cfg) == AC_JACK_SPEAKER) { + unsigned long wcaps = get_wcaps(codec, pin); + wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP); + if (wcaps == AC_WCAP_OUT_AMP) + /* found a mono speaker with an amp, + must be lfe */ + lfe_pin = pin; + } + } + } + + if (lfe_pin) { + err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, + "LFE Playback Volume", + HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0, + HDA_OUTPUT)); + if (err < 0) + return err; + err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, + "LFE Playback Switch", + HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0, + HDA_OUTPUT)); + if (err < 0) + return err; + } + + return 0; +} + static int stac9200_parse_auto_config(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; @@ -1237,6 +1297,9 @@ static int stac9200_parse_auto_config(struct hda_codec *codec) if ((err = stac9200_auto_create_hp_ctls(codec, &spec->autocfg)) < 0) return err; + if ((err = stac9200_auto_create_lfe_ctls(codec, &spec->autocfg)) < 0) + return err; + if (spec->autocfg.dig_out_pin) spec->multiout.dig_out_nid = 0x05; if (spec->autocfg.dig_in_pin) From a7da6ce564a80952d9c0b210deca5a8cd3474a31 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 14:03:14 +0200 Subject: [PATCH 1001/1063] [ALSA] hda-codec - Add independent headphone volume control This patch addes the support of the independent 'Headphone' volume control to the generic codec parser. Some codecs (e.g. Conexant) have separate connections to the headphone and the independent amp adjustment is needed. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_generic.c | 75 ++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index dedfc5b1083a..97e9af130b71 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -46,11 +46,18 @@ struct hda_gnode { }; /* patch-specific record */ + +#define MAX_PCM_VOLS 2 +struct pcm_vol { + struct hda_gnode *node; /* Node for PCM volume */ + unsigned int index; /* connection of PCM volume */ +}; + struct hda_gspec { struct hda_gnode *dac_node[2]; /* DAC node */ struct hda_gnode *out_pin_node[2]; /* Output pin (Line-Out) node */ - struct hda_gnode *pcm_vol_node[2]; /* Node for PCM volume */ - unsigned int pcm_vol_index[2]; /* connection of PCM volume */ + struct pcm_vol pcm_vol[MAX_PCM_VOLS]; /* PCM volumes */ + unsigned int pcm_vol_nodes; /* number of PCM volumes */ struct hda_gnode *adc_node; /* ADC node */ struct hda_gnode *cap_vol_node; /* Node for capture volume */ @@ -285,9 +292,11 @@ static int parse_output_path(struct hda_codec *codec, struct hda_gspec *spec, return node == spec->dac_node[dac_idx]; } spec->dac_node[dac_idx] = node; - if (node->wid_caps & AC_WCAP_OUT_AMP) { - spec->pcm_vol_node[dac_idx] = node; - spec->pcm_vol_index[dac_idx] = 0; + if ((node->wid_caps & AC_WCAP_OUT_AMP) && + spec->pcm_vol_nodes < MAX_PCM_VOLS) { + spec->pcm_vol[spec->pcm_vol_nodes].node = node; + spec->pcm_vol[spec->pcm_vol_nodes].index = 0; + spec->pcm_vol_nodes++; } return 1; /* found */ } @@ -307,13 +316,16 @@ static int parse_output_path(struct hda_codec *codec, struct hda_gspec *spec, select_input_connection(codec, node, i); unmute_input(codec, node, i); unmute_output(codec, node); - if (! spec->pcm_vol_node[dac_idx]) { - if (node->wid_caps & AC_WCAP_IN_AMP) { - spec->pcm_vol_node[dac_idx] = node; - spec->pcm_vol_index[dac_idx] = i; - } else if (node->wid_caps & AC_WCAP_OUT_AMP) { - spec->pcm_vol_node[dac_idx] = node; - spec->pcm_vol_index[dac_idx] = 0; + if (spec->dac_node[dac_idx] && + spec->pcm_vol_nodes < MAX_PCM_VOLS && + !(spec->dac_node[dac_idx]->wid_caps & + AC_WCAP_OUT_AMP)) { + if ((node->wid_caps & AC_WCAP_IN_AMP) || + (node->wid_caps & AC_WCAP_OUT_AMP)) { + int n = spec->pcm_vol_nodes; + spec->pcm_vol[n].node = node; + spec->pcm_vol[n].index = i; + spec->pcm_vol_nodes++; } } return 1; @@ -370,7 +382,9 @@ static struct hda_gnode *parse_output_jack(struct hda_codec *codec, /* set PIN-Out enable */ snd_hda_codec_write(codec, node->nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, - AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN); + AC_PINCTL_OUT_EN | + ((node->pin_caps & AC_PINCAP_HP_DRV) ? + AC_PINCTL_HP_EN : 0)); return node; } } @@ -745,18 +759,37 @@ static int check_existing_control(struct hda_codec *codec, const char *type, con /* * build output mixer controls */ +static int create_output_mixers(struct hda_codec *codec, const char **names) +{ + struct hda_gspec *spec = codec->spec; + int i, err; + + for (i = 0; i < spec->pcm_vol_nodes; i++) { + err = create_mixer(codec, spec->pcm_vol[i].node, + spec->pcm_vol[i].index, + names[i], "Playback"); + if (err < 0) + return err; + } + return 0; +} + static int build_output_controls(struct hda_codec *codec) { struct hda_gspec *spec = codec->spec; - static const char *types[2] = { "Master", "Headphone" }; - int i, err; + static const char *types_speaker[] = { "Speaker", "Headphone" }; + static const char *types_line[] = { "Front", "Headphone" }; - for (i = 0; i < 2 && spec->pcm_vol_node[i]; i++) { - err = create_mixer(codec, spec->pcm_vol_node[i], - spec->pcm_vol_index[i], - types[i], "Playback"); - if (err < 0) - return err; + switch (spec->pcm_vol_nodes) { + case 1: + return create_mixer(codec, spec->pcm_vol[0].node, + spec->pcm_vol[0].index, + "Master", "Playback"); + case 2: + if (defcfg_type(spec->out_pin_node[0]) == AC_JACK_SPEAKER) + return create_output_mixers(codec, types_speaker); + else + return create_output_mixers(codec, types_line); } return 0; } From 9d19f48cfe2570562c2c6226780a7ca627b0f1f1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 14:27:46 +0200 Subject: [PATCH 1002/1063] [ALSA] Add pcm_class attribute to PCM sysfs entry This patch adds a new attribute, pcm_class, to each PCM sysfs entry. It's useful to detect what kind of PCM stream is, for example, HAL can check whether it's a modem or not. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/core.h | 4 ++++ sound/core/pcm.c | 24 +++++++++++++++++++ sound/core/sound.c | 56 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/include/sound/core.h b/include/sound/core.h index 1359c532b68e..b056ea925ecf 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -26,6 +26,7 @@ #include /* struct mutex */ #include /* struct rw_semaphore */ #include /* pm_message_t */ +#include /* forward declarations */ #ifdef CONFIG_PCI @@ -186,6 +187,7 @@ struct snd_minor { int device; /* device number */ const struct file_operations *f_ops; /* file operations */ void *private_data; /* private data for f_ops->open */ + struct class_device *class_dev; /* class device for sysfs */ }; /* sound.c */ @@ -200,6 +202,8 @@ int snd_register_device(int type, struct snd_card *card, int dev, const char *name); int snd_unregister_device(int type, struct snd_card *card, int dev); void *snd_lookup_minor_data(unsigned int minor, int type); +int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev, + const struct class_device_attribute *attr); #ifdef CONFIG_SND_OSSEMUL int snd_register_oss_device(int type, struct snd_card *card, int dev, diff --git a/sound/core/pcm.c b/sound/core/pcm.c index ed3b09469560..bf8f412988b8 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -907,6 +907,28 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) substream->pstr->substream_opened--; } +static ssize_t show_pcm_class(struct class_device *class_device, char *buf) +{ + struct snd_pcm *pcm; + const char *str; + static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = { + [SNDRV_PCM_CLASS_GENERIC] = "generic", + [SNDRV_PCM_CLASS_MULTI] = "multi", + [SNDRV_PCM_CLASS_MODEM] = "modem", + [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer", + }; + + if (! (pcm = class_get_devdata(class_device)) || + pcm->dev_class > SNDRV_PCM_CLASS_LAST) + str = "none"; + else + str = strs[pcm->dev_class]; + return snprintf(buf, PAGE_SIZE, "%s\n", str); +} + +static struct class_device_attribute pcm_attrs = + __ATTR(pcm_class, S_IRUGO, show_pcm_class, NULL); + static int snd_pcm_dev_register(struct snd_device *device) { int cidx, err; @@ -945,6 +967,8 @@ static int snd_pcm_dev_register(struct snd_device *device) mutex_unlock(®ister_mutex); return err; } + snd_add_device_sysfs_file(devtype, pcm->card, pcm->device, + &pcm_attrs); for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) snd_pcm_timer_init(substream); } diff --git a/sound/core/sound.c b/sound/core/sound.c index b4430db3fa4c..efa476c5210a 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -268,7 +268,11 @@ int snd_register_device(int type, struct snd_card *card, int dev, snd_minors[minor] = preg; if (card) device = card->dev; - class_device_create(sound_class, NULL, MKDEV(major, minor), device, "%s", name); + preg->class_dev = class_device_create(sound_class, NULL, + MKDEV(major, minor), + device, "%s", name); + if (preg->class_dev) + class_set_devdata(preg->class_dev, private_data); mutex_unlock(&sound_mutex); return 0; @@ -276,6 +280,24 @@ int snd_register_device(int type, struct snd_card *card, int dev, EXPORT_SYMBOL(snd_register_device); +/* find the matching minor record + * return the index of snd_minor, or -1 if not found + */ +static int find_snd_minor(int type, struct snd_card *card, int dev) +{ + int cardnum, minor; + struct snd_minor *mptr; + + cardnum = card ? card->number : -1; + for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) + if ((mptr = snd_minors[minor]) != NULL && + mptr->type == type && + mptr->card == cardnum && + mptr->device == dev) + return minor; + return -1; +} + /** * snd_unregister_device - unregister the device on the given card * @type: the device type, SNDRV_DEVICE_TYPE_XXX @@ -289,32 +311,42 @@ EXPORT_SYMBOL(snd_register_device); */ int snd_unregister_device(int type, struct snd_card *card, int dev) { - int cardnum, minor; - struct snd_minor *mptr; + int minor; - cardnum = card ? card->number : -1; mutex_lock(&sound_mutex); - for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor) - if ((mptr = snd_minors[minor]) != NULL && - mptr->type == type && - mptr->card == cardnum && - mptr->device == dev) - break; - if (minor == ARRAY_SIZE(snd_minors)) { + minor = find_snd_minor(type, card, dev); + if (minor < 0) { mutex_unlock(&sound_mutex); return -EINVAL; } class_device_destroy(sound_class, MKDEV(major, minor)); + kfree(snd_minors[minor]); snd_minors[minor] = NULL; mutex_unlock(&sound_mutex); - kfree(mptr); return 0; } EXPORT_SYMBOL(snd_unregister_device); +int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev, + const struct class_device_attribute *attr) +{ + int minor, ret = -EINVAL; + struct class_device *cdev; + + mutex_lock(&sound_mutex); + minor = find_snd_minor(type, card, dev); + if (minor >= 0 && (cdev = snd_minors[minor]->class_dev) != NULL) + ret = class_device_create_file(cdev, attr); + mutex_unlock(&sound_mutex); + return ret; + +} + +EXPORT_SYMBOL(snd_add_device_sysfs_file); + #ifdef CONFIG_PROC_FS /* * INFO PART From cd417d4fe89638a2848980cb389b9781d4913173 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Sep 2006 16:03:11 +0200 Subject: [PATCH 1003/1063] [ALSA] hda-codec - Add support for LG LW25 laptop Added the support for LG LW25 laptop with ALC880 codec. It's the same codec model as LG LW20 (model=lg-lw). Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- Documentation/sound/alsa/ALSA-Configuration.txt | 2 +- sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index a788dd7bc790..1b749947233c 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -785,7 +785,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. uniwill 3-jack F1734 2-jack lg LG laptop (m1 express dual) - lg-lw LG LW20 laptop + lg-lw LG LW20/LW25 laptop tcl TCL S700 clevo Clevo laptops (m520G, m665n) test for testing/debugging purpose, almost all controls can be diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 65903812b307..d037051b66b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2270,6 +2270,7 @@ static struct hda_board_config alc880_cfg_tbl[] = { { .modelname = "lg-lw", .config = ALC880_LG_LW }, { .pci_subvendor = 0x1854, .pci_subdevice = 0x0018, .config = ALC880_LG_LW }, + { .pci_subvendor = 0x1854, .pci_subdevice = 0x0077, .config = ALC880_LG_LW }, #ifdef CONFIG_SND_DEBUG { .modelname = "test", .config = ALC880_TEST }, From dafbbb1fdbf103b24d0f7aa645625b6bd558c896 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Sep 2006 12:40:00 +0200 Subject: [PATCH 1004/1063] [ALSA] hda-intel - Fix pci_disable_msi() call Fix the order to call pci_disable_msi() to be after free_irq(). (Otherwise pci_disable_msi() bugs you.) Also, added a description of disable_msi option to documentation. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- Documentation/sound/alsa/ALSA-Configuration.txt | 1 + sound/pci/hda/hda_intel.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 1b749947233c..e6b57dd46a4f 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -758,6 +758,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size) single_cmd - Use single immediate commands to communicate with codecs (for debugging only) + disable_msi - Disable Message Signaled Interrupt (MSI) This module supports one card and autoprobe. diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index cc50d13ee90c..bfd74a526b85 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1422,8 +1422,9 @@ static int azx_free(struct azx *chip) } if (chip->irq >= 0) { - pci_disable_msi(chip->pci); free_irq(chip->irq, (void*)chip); + if (!disable_msi) + pci_disable_msi(chip->pci); } if (chip->remap_addr) iounmap(chip->remap_addr); From e08a007d1041e0bc3df6b855043d8efde91851aa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Sep 2006 17:52:14 +0200 Subject: [PATCH 1005/1063] [ALSA] hda-codec - Fix SPDIF device number of ALC codecs Assign the SPDIF always to the secondary device (dev#1) to keep the same configuration. Move the optional capture device to the third device (dev#2). hda_intel now just ignores the NULL entries in the pcm arrays from codecs. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 10 +++++++-- sound/pci/hda/patch_realtek.c | 38 ++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index bfd74a526b85..6309e0c67e6a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1242,7 +1242,12 @@ static int __devinit create_codec_pcm(struct azx *chip, struct hda_codec *codec, struct snd_pcm *pcm; struct azx_pcm *apcm; - snd_assert(cpcm->stream[0].substreams || cpcm->stream[1].substreams, return -EINVAL); + /* if no substreams are defined for both playback and capture, + * it's just a placeholder. ignore it. + */ + if (!cpcm->stream[0].substreams && !cpcm->stream[1].substreams) + return 0; + snd_assert(cpcm->name, return -EINVAL); err = snd_pcm_new(chip->card, cpcm->name, pcm_dev, @@ -1268,7 +1273,8 @@ static int __devinit create_codec_pcm(struct azx *chip, struct hda_codec *codec, snd_dma_pci_data(chip->pci), 1024 * 64, 1024 * 128); chip->pcm[pcm_dev] = pcm; - chip->pcm_devs = pcm_dev + 1; + if (chip->pcm_devs < pcm_dev + 1) + chip->pcm_devs = pcm_dev + 1; return 0; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d037051b66b4..ba9e050e2012 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1796,25 +1796,9 @@ static int alc_build_pcms(struct hda_codec *codec) } } - /* If the use of more than one ADC is requested for the current - * model, configure a second analog capture-only PCM. - */ - if (spec->num_adc_nids > 1) { - codec->num_pcms++; - info++; - info->name = spec->stream_name_analog; - /* No playback stream for second PCM */ - info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback; - info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0; - if (spec->stream_analog_capture) { - snd_assert(spec->adc_nids, return -EINVAL); - info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture); - info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1]; - } - } - + /* SPDIF for stream index #1 */ if (spec->multiout.dig_out_nid || spec->dig_in_nid) { - codec->num_pcms++; + codec->num_pcms = 2; info++; info->name = spec->stream_name_digital; if (spec->multiout.dig_out_nid && @@ -1829,6 +1813,24 @@ static int alc_build_pcms(struct hda_codec *codec) } } + /* If the use of more than one ADC is requested for the current + * model, configure a second analog capture-only PCM. + */ + /* Additional Analaog capture for index #2 */ + if (spec->num_adc_nids > 1 && spec->stream_analog_capture && + spec->adc_nids) { + codec->num_pcms = 3; + info++; + info->name = spec->stream_name_analog; + /* No playback stream for second PCM */ + info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback; + info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0; + if (spec->stream_analog_capture) { + info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture); + info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1]; + } + } + return 0; } From 8f88820ee49359ea33af42845456ce9dbf54d39a Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Thu, 7 Sep 2006 18:07:46 +0200 Subject: [PATCH 1006/1063] [ALSA] Fix WM9705 AC97 patch build error This patch fixes a build error (introduced by me) in ac97_patch.c wrt WM9705 touchscreen. o Removed spurious '3D' from character after |= operation (0x3D is ASCII for '=') Signed-off-by: Liam Girdwood Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/ac97/ac97_patch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 9be4ceb6838e..dc28b111a06d 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -481,7 +481,7 @@ int patch_wolfson05(struct snd_ac97 * ac97) ac97->build_ops = &patch_wolfson_wm9705_ops; #ifdef CONFIG_TOUCHSCREEN_WM9705 /* WM9705 touchscreen uses AUX and VIDEO for touch */ - ac97->flags |=3D AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX; + ac97->flags |= AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX; #endif return 0; } From 854b66e44260320c21ebe4b8a18e189f2e45b5be Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Sep 2006 12:27:38 +0200 Subject: [PATCH 1007/1063] [ALSA] ak4xxx - Remove bogus IPGA controls Remove IPGA volume controls and merge the IPGA range to ADC volume controls. These two volumes are not really independent but connected simply in different ranges 0-0x7f and 0x80-max. It doesn't make sense to provide two controls. Since both 0x7f and 0x80 specify 0dB, a hack is needed for IPGA range to skip 0x80 (increment one) for such controls. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/sound/ak4xxx-adda.h | 10 --- sound/i2c/other/ak4xxx-adda.c | 128 +++++----------------------------- sound/pci/ice1712/revo.c | 1 - 3 files changed, 18 insertions(+), 121 deletions(-) diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h index 026e4072a9a1..d0deca669b92 100644 --- a/include/sound/ak4xxx-adda.h +++ b/include/sound/ak4xxx-adda.h @@ -48,7 +48,6 @@ struct snd_akm4xxx_dac_channel { /* ADC labels and channels */ struct snd_akm4xxx_adc_channel { char *name; /* capture gain volume label */ - char *gain_name; /* IPGA */ char *switch_name; /* capture switch */ unsigned int num_channels; }; @@ -91,13 +90,4 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak); #define snd_akm4xxx_set_vol(ak,chip,reg,val) \ ((ak)->volumes[(chip) * 16 + (reg)] = (val)) -/* Warning: IPGA is tricky - we assume the addr + 4 is unused - * so far, it's OK for all AK codecs with IPGA: - * AK4524, AK4528 and EK5365 - */ -#define snd_akm4xxx_get_ipga(ak,chip,reg) \ - snd_akm4xxx_get_vol(ak, chip, (reg) + 4) -#define snd_akm4xxx_set_ipga(ak,chip,reg,val) \ - snd_akm4xxx_set_vol(ak, chip, (reg) + 4, val) - #endif /* __SOUND_AK4XXX_ADDA_H */ diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index c34cb4684607..5da49e2eb350 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -43,10 +43,7 @@ void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg, ak->ops.write(ak, chip, reg, val); /* save the data */ - /* don't overwrite with IPGA data */ - if ((ak->type != SND_AK4524 && ak->type != SND_AK5365) || - (reg != 0x04 && reg != 0x05) || (val & 0x80) == 0) - snd_akm4xxx_set(ak, chip, reg, val); + snd_akm4xxx_set(ak, chip, reg, val); ak->ops.unlock(ak, chip); } @@ -70,12 +67,6 @@ static void ak4524_reset(struct snd_akm4xxx *ak, int state) for (reg = 0x04; reg < maxreg; reg++) snd_akm4xxx_write(ak, chip, reg, snd_akm4xxx_get(ak, chip, reg)); - if (ak->type == SND_AK4528) - continue; - /* IPGA */ - for (reg = 0x04; reg < 0x06; reg++) - snd_akm4xxx_write(ak, chip, reg, - snd_akm4xxx_get_ipga(ak, chip, reg) | 0x80); } } @@ -175,7 +166,6 @@ static DECLARE_TLV_DB_SCALE(db_scale_vol_datt, -6350, 50, 1); static DECLARE_TLV_DB_SCALE(db_scale_8bit, -12750, 50, 1); static DECLARE_TLV_DB_SCALE(db_scale_7bit, -6350, 50, 1); static DECLARE_TLV_DB_LINEAR(db_scale_linear, TLV_DB_GAIN_MUTE, 0); -static DECLARE_TLV_DB_SCALE(db_scale_ipga, 0, 50, 0); /* * initialize all the ak4xxx chips @@ -190,8 +180,6 @@ void snd_akm4xxx_init(struct snd_akm4xxx *ak) 0x01, 0x03, /* 1: ADC/DAC enable */ 0x04, 0x00, /* 4: ADC left muted */ 0x05, 0x00, /* 5: ADC right muted */ - 0x04, 0x80, /* 4: ADC IPGA gain 0dB */ - 0x05, 0x80, /* 5: ADC IPGA gain 0dB */ 0x06, 0x00, /* 6: DAC left muted */ 0x07, 0x00, /* 7: DAC right muted */ 0xff, 0xff @@ -324,13 +312,15 @@ EXPORT_SYMBOL(snd_akm4xxx_init); /* * Mixer callbacks */ +#define AK_IPGA (1<<20) /* including IPGA */ #define AK_VOL_CVT (1<<21) /* need dB conversion */ #define AK_NEEDSMSB (1<<22) /* need MSB update bit */ #define AK_INVERT (1<<23) /* data is inverted */ #define AK_GET_CHIP(val) (((val) >> 8) & 0xff) #define AK_GET_ADDR(val) ((val) & 0xff) -#define AK_GET_SHIFT(val) (((val) >> 16) & 0x1f) +#define AK_GET_SHIFT(val) (((val) >> 16) & 0x0f) #define AK_GET_VOL_CVT(val) (((val) >> 21) & 1) +#define AK_GET_IPGA(val) (((val) >> 20) & 1) #define AK_GET_NEEDSMSB(val) (((val) >> 22) & 1) #define AK_GET_INVERT(val) (((val) >> 23) & 1) #define AK_GET_MASK(val) (((val) >> 24) & 0xff) @@ -371,8 +361,10 @@ static int put_ak_reg(struct snd_kcontrol *kcontrol, int addr, return 0; snd_akm4xxx_set_vol(ak, chip, addr, nval); - if (AK_GET_VOL_CVT(kcontrol->private_value)) + if (AK_GET_VOL_CVT(kcontrol->private_value) && nval < 128) nval = vol_cvt_datt[nval]; + if (AK_GET_IPGA(kcontrol->private_value) && nval >= 128) + nval++; /* need to correct + 1 since both 127 and 128 are 0dB */ if (AK_GET_INVERT(kcontrol->private_value)) nval = mask - nval; if (AK_GET_NEEDSMSB(kcontrol->private_value)) @@ -424,68 +416,6 @@ static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol, return change; } -#define snd_akm4xxx_ipga_gain_info snd_akm4xxx_volume_info - -static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); - int chip = AK_GET_CHIP(kcontrol->private_value); - int addr = AK_GET_ADDR(kcontrol->private_value); - - ucontrol->value.integer.value[0] = - snd_akm4xxx_get_ipga(ak, chip, addr); - return 0; -} - -static int put_ak_ipga(struct snd_kcontrol *kcontrol, int addr, - unsigned char nval) -{ - struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); - int chip = AK_GET_CHIP(kcontrol->private_value); - - if (snd_akm4xxx_get_ipga(ak, chip, addr) == nval) - return 0; - snd_akm4xxx_set_ipga(ak, chip, addr, nval); - snd_akm4xxx_write(ak, chip, addr, nval | 0x80); /* need MSB */ - return 1; -} - -static int snd_akm4xxx_ipga_gain_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - return put_ak_ipga(kcontrol, AK_GET_ADDR(kcontrol->private_value), - ucontrol->value.integer.value[0]); -} - -#define snd_akm4xxx_stereo_gain_info snd_akm4xxx_stereo_volume_info - -static int snd_akm4xxx_stereo_gain_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); - int chip = AK_GET_CHIP(kcontrol->private_value); - int addr = AK_GET_ADDR(kcontrol->private_value); - - ucontrol->value.integer.value[0] = - snd_akm4xxx_get_ipga(ak, chip, addr); - ucontrol->value.integer.value[1] = - snd_akm4xxx_get_ipga(ak, chip, addr + 1); - return 0; -} - -static int snd_akm4xxx_stereo_gain_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - int addr = AK_GET_ADDR(kcontrol->private_value); - int change; - - change = put_ak_ipga(kcontrol, addr, ucontrol->value.integer.value[0]); - change |= put_ak_ipga(kcontrol, addr + 1, - ucontrol->value.integer.value[1]); - return change; -} - static int snd_akm4xxx_deemphasis_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { @@ -702,35 +632,15 @@ static int build_adc_controls(struct snd_akm4xxx *ak) knew.put = snd_akm4xxx_volume_put; } /* register 4 & 5 */ - knew.private_value = - AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) | - AK_VOL_CVT; - knew.tlv.p = db_scale_vol_datt; - err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); - if (err < 0) - return err; - - if (! ak->adc_info || ! ak->adc_info[mixer_ch].gain_name) - knew.name = "IPGA Analog Capture Volume"; + if (ak->type == SND_AK5365) + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 4, 0, 151) | + AK_VOL_CVT | AK_IPGA; else - knew.name = ak->adc_info[mixer_ch].gain_name; - if (num_stereo == 2) { - knew.info = snd_akm4xxx_stereo_gain_info; - knew.get = snd_akm4xxx_stereo_gain_get; - knew.put = snd_akm4xxx_stereo_gain_put; - } else { - knew.info = snd_akm4xxx_ipga_gain_info; - knew.get = snd_akm4xxx_ipga_gain_get; - knew.put = snd_akm4xxx_ipga_gain_put; - } - /* register 4 & 5 */ - if (ak->type == SND_AK4524) - knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, - 24); - else /* AK5365 */ - knew.private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, - 36); - knew.tlv.p = db_scale_ipga; + knew.private_value = + AK_COMPOSE(idx/2, (idx%2) + 4, 0, 163) | + AK_VOL_CVT | AK_IPGA; + knew.tlv.p = db_scale_vol_datt; err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak)); if (err < 0) return err; @@ -811,11 +721,9 @@ int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) if (err < 0) return err; - if (ak->type == SND_AK4524 || ak->type == SND_AK5365) { - err = build_adc_controls(ak); - if (err < 0) - return err; - } + err = build_adc_controls(ak); + if (err < 0) + return err; if (ak->type == SND_AK4355 || ak->type == SND_AK4358) num_emphs = 1; diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c index c9eefa9bbfff..bf98ea34feb0 100644 --- a/sound/pci/ice1712/revo.c +++ b/sound/pci/ice1712/revo.c @@ -110,7 +110,6 @@ static struct snd_akm4xxx_dac_channel revo51_dac[] = { static struct snd_akm4xxx_adc_channel revo51_adc[] = { { .name = "PCM Capture Volume", - .gain_name = "PCM Capture Gain Volume", .switch_name = "PCM Capture Switch", .num_channels = 2 }, From 43001c9515cf87935c50e84b3e27b1f3b3776b5d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Sep 2006 12:30:03 +0200 Subject: [PATCH 1008/1063] [ALSA] hda-intel - Fix suspend/resume with MSI Fixed suspend/resume with MSI enablement. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 6309e0c67e6a..4d2df771112e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1381,6 +1381,10 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state) snd_pcm_suspend_all(chip->pcm[i]); snd_hda_suspend(chip->bus, state); azx_free_cmd_io(chip); + if (chip->irq >= 0) + free_irq(chip->irq, chip); + if (!disable_msi) + pci_disable_msi(chip->pci); pci_disable_device(pci); pci_save_state(pci); return 0; @@ -1393,6 +1397,12 @@ static int azx_resume(struct pci_dev *pci) pci_restore_state(pci); pci_enable_device(pci); + if (!disable_msi) + pci_enable_msi(pci); + /* FIXME: need proper error handling */ + request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED, + "HDA Intel", chip); + chip->irq = pci->irq; pci_set_master(pci); azx_init_chip(chip); snd_hda_resume(chip->bus); From 307192065c55dbc70159037c1e3006a9f761192b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 17 Sep 2006 21:59:25 +0200 Subject: [PATCH 1009/1063] [ALSA] aoa: add locking to tas codec Looks like I completely forgot to do this. This patch adds locking to the tas codec so two userspace programs can't hit the controls at the same time. Tested on my powerbook, but I obviously can't find any problems even without it since it doesn't do SMP. Signed-off-by: Johannes Berg Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/aoa/codecs/snd-aoa-codec-tas.c | 96 ++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 13 deletions(-) diff --git a/sound/aoa/codecs/snd-aoa-codec-tas.c b/sound/aoa/codecs/snd-aoa-codec-tas.c index 16c0b6b0a805..2ef55a17917c 100644 --- a/sound/aoa/codecs/snd-aoa-codec-tas.c +++ b/sound/aoa/codecs/snd-aoa-codec-tas.c @@ -66,6 +66,8 @@ #include #include #include +#include + MODULE_AUTHOR("Johannes Berg "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("tas codec driver for snd-aoa"); @@ -91,6 +93,10 @@ struct tas { u8 bass, treble; u8 acr; int drc_range; + /* protects hardware access against concurrency from + * userspace when hitting controls and during + * codec init/suspend/resume */ + struct mutex mtx; }; static int tas_reset_init(struct tas *tas); @@ -231,8 +237,10 @@ static int tas_snd_vol_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->cached_volume_l; ucontrol->value.integer.value[1] = tas->cached_volume_r; + mutex_unlock(&tas->mtx); return 0; } @@ -241,14 +249,18 @@ static int tas_snd_vol_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); if (tas->cached_volume_l == ucontrol->value.integer.value[0] - && tas->cached_volume_r == ucontrol->value.integer.value[1]) + && tas->cached_volume_r == ucontrol->value.integer.value[1]) { + mutex_unlock(&tas->mtx); return 0; + } tas->cached_volume_l = ucontrol->value.integer.value[0]; tas->cached_volume_r = ucontrol->value.integer.value[1]; if (tas->hw_enabled) tas_set_volume(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -276,8 +288,10 @@ static int tas_snd_mute_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = !tas->mute_l; ucontrol->value.integer.value[1] = !tas->mute_r; + mutex_unlock(&tas->mtx); return 0; } @@ -286,14 +300,18 @@ static int tas_snd_mute_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); if (tas->mute_l == !ucontrol->value.integer.value[0] - && tas->mute_r == !ucontrol->value.integer.value[1]) + && tas->mute_r == !ucontrol->value.integer.value[1]) { + mutex_unlock(&tas->mtx); return 0; + } tas->mute_l = !ucontrol->value.integer.value[0]; tas->mute_r = !ucontrol->value.integer.value[1]; if (tas->hw_enabled) tas_set_volume(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -322,8 +340,10 @@ static int tas_snd_mixer_get(struct snd_kcontrol *kcontrol, struct tas *tas = snd_kcontrol_chip(kcontrol); int idx = kcontrol->private_value; + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->mixer_l[idx]; ucontrol->value.integer.value[1] = tas->mixer_r[idx]; + mutex_unlock(&tas->mtx); return 0; } @@ -334,15 +354,19 @@ static int tas_snd_mixer_put(struct snd_kcontrol *kcontrol, struct tas *tas = snd_kcontrol_chip(kcontrol); int idx = kcontrol->private_value; + mutex_lock(&tas->mtx); if (tas->mixer_l[idx] == ucontrol->value.integer.value[0] - && tas->mixer_r[idx] == ucontrol->value.integer.value[1]) + && tas->mixer_r[idx] == ucontrol->value.integer.value[1]) { + mutex_unlock(&tas->mtx); return 0; + } tas->mixer_l[idx] = ucontrol->value.integer.value[0]; tas->mixer_r[idx] = ucontrol->value.integer.value[1]; if (tas->hw_enabled) tas_set_mixer(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -375,7 +399,9 @@ static int tas_snd_drc_range_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->drc_range; + mutex_unlock(&tas->mtx); return 0; } @@ -384,12 +410,16 @@ static int tas_snd_drc_range_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); - if (tas->drc_range == ucontrol->value.integer.value[0]) + mutex_lock(&tas->mtx); + if (tas->drc_range == ucontrol->value.integer.value[0]) { + mutex_unlock(&tas->mtx); return 0; + } tas->drc_range = ucontrol->value.integer.value[0]; if (tas->hw_enabled) tas3004_set_drc(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -417,7 +447,9 @@ static int tas_snd_drc_switch_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->drc_enabled; + mutex_unlock(&tas->mtx); return 0; } @@ -426,12 +458,16 @@ static int tas_snd_drc_switch_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); - if (tas->drc_enabled == ucontrol->value.integer.value[0]) + mutex_lock(&tas->mtx); + if (tas->drc_enabled == ucontrol->value.integer.value[0]) { + mutex_unlock(&tas->mtx); return 0; + } tas->drc_enabled = ucontrol->value.integer.value[0]; if (tas->hw_enabled) tas3004_set_drc(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -463,7 +499,9 @@ static int tas_snd_capture_source_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.enumerated.item[0] = !!(tas->acr & TAS_ACR_INPUT_B); + mutex_unlock(&tas->mtx); return 0; } @@ -471,15 +509,21 @@ static int tas_snd_capture_source_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct tas *tas = snd_kcontrol_chip(kcontrol); - int oldacr = tas->acr; + int oldacr; + + mutex_lock(&tas->mtx); + oldacr = tas->acr; tas->acr &= ~TAS_ACR_INPUT_B; if (ucontrol->value.enumerated.item[0]) tas->acr |= TAS_ACR_INPUT_B; - if (oldacr == tas->acr) + if (oldacr == tas->acr) { + mutex_unlock(&tas->mtx); return 0; + } if (tas->hw_enabled) tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr); + mutex_unlock(&tas->mtx); return 1; } @@ -518,7 +562,9 @@ static int tas_snd_treble_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->treble; + mutex_unlock(&tas->mtx); return 0; } @@ -527,12 +573,16 @@ static int tas_snd_treble_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); - if (tas->treble == ucontrol->value.integer.value[0]) + mutex_lock(&tas->mtx); + if (tas->treble == ucontrol->value.integer.value[0]) { + mutex_unlock(&tas->mtx); return 0; + } tas->treble = ucontrol->value.integer.value[0]; if (tas->hw_enabled) tas_set_treble(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -560,7 +610,9 @@ static int tas_snd_bass_get(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); + mutex_lock(&tas->mtx); ucontrol->value.integer.value[0] = tas->bass; + mutex_unlock(&tas->mtx); return 0; } @@ -569,12 +621,16 @@ static int tas_snd_bass_put(struct snd_kcontrol *kcontrol, { struct tas *tas = snd_kcontrol_chip(kcontrol); - if (tas->bass == ucontrol->value.integer.value[0]) + mutex_lock(&tas->mtx); + if (tas->bass == ucontrol->value.integer.value[0]) { + mutex_unlock(&tas->mtx); return 0; + } tas->bass = ucontrol->value.integer.value[0]; if (tas->hw_enabled) tas_set_bass(tas); + mutex_unlock(&tas->mtx); return 1; } @@ -628,16 +684,16 @@ static int tas_reset_init(struct tas *tas) tmp = TAS_MCS_SCLK64 | TAS_MCS_SPORT_MODE_I2S | TAS_MCS_SPORT_WL_24BIT; if (tas_write_reg(tas, TAS_REG_MCS, 1, &tmp)) - return -ENODEV; + goto outerr; tas->acr |= TAS_ACR_ANALOG_PDOWN | TAS_ACR_B_MONAUREAL | TAS_ACR_B_MON_SEL_RIGHT; if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr)) - return -ENODEV; + goto outerr; tmp = 0; if (tas_write_reg(tas, TAS_REG_MCS2, 1, &tmp)) - return -ENODEV; + goto outerr; tas3004_set_drc(tas); @@ -649,9 +705,11 @@ static int tas_reset_init(struct tas *tas) tas->acr &= ~TAS_ACR_ANALOG_PDOWN; if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr)) - return -ENODEV; + goto outerr; return 0; + outerr: + return -ENODEV; } static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock) @@ -666,11 +724,13 @@ static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock break; case CLOCK_SWITCH_SLAVE: /* Clocks are back, re-init the codec */ + mutex_lock(&tas->mtx); tas_reset_init(tas); tas_set_volume(tas); tas_set_mixer(tas); tas->hw_enabled = 1; tas->codec.gpio->methods->all_amps_restore(tas->codec.gpio); + mutex_unlock(&tas->mtx); break; default: /* doesn't happen as of now */ @@ -684,19 +744,23 @@ static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock * our i2c device is suspended, and then take note of that! */ static int tas_suspend(struct tas *tas) { + mutex_lock(&tas->mtx); tas->hw_enabled = 0; tas->acr |= TAS_ACR_ANALOG_PDOWN; tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr); + mutex_unlock(&tas->mtx); return 0; } static int tas_resume(struct tas *tas) { /* reset codec */ + mutex_lock(&tas->mtx); tas_reset_init(tas); tas_set_volume(tas); tas_set_mixer(tas); tas->hw_enabled = 1; + mutex_unlock(&tas->mtx); return 0; } @@ -739,11 +803,14 @@ static int tas_init_codec(struct aoa_codec *codec) return -EINVAL; } + mutex_lock(&tas->mtx); if (tas_reset_init(tas)) { printk(KERN_ERR PFX "tas failed to initialise\n"); + mutex_unlock(&tas->mtx); return -ENXIO; } tas->hw_enabled = 1; + mutex_unlock(&tas->mtx); if (tas->codec.soundbus_dev->attach_codec(tas->codec.soundbus_dev, aoa_get_card(), @@ -822,6 +889,7 @@ static int tas_create(struct i2c_adapter *adapter, if (!tas) return -ENOMEM; + mutex_init(&tas->mtx); tas->i2c.driver = &tas_driver; tas->i2c.adapter = adapter; tas->i2c.addr = addr; @@ -850,6 +918,7 @@ static int tas_create(struct i2c_adapter *adapter, detach: i2c_detach_client(&tas->i2c); fail: + mutex_destroy(&tas->mtx); kfree(tas); return -EINVAL; } @@ -908,6 +977,7 @@ static int tas_i2c_detach(struct i2c_client *client) /* power down codec chip */ tas_write_reg(tas, TAS_REG_ACR, 1, &tmp); + mutex_destroy(&tas->mtx); kfree(tas); return 0; } From 783eaf4671a4f5a95102aedb5a45e1f8adab945c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Sep 2006 22:00:51 +0200 Subject: [PATCH 1010/1063] [ALSA] powermac - Fix Oops when conflicting with aoa driver Fixed Oops when conflictin with aoa driver due to lack of i2c initialization. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/ppc/keywest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c index 59482a4cd446..272ae38e9b18 100644 --- a/sound/ppc/keywest.c +++ b/sound/ppc/keywest.c @@ -117,6 +117,9 @@ int __init snd_pmac_tumbler_post_init(void) { int err; + if (!keywest_ctx || !keywest_ctx->client) + return -ENXIO; + if ((err = keywest_ctx->init_client(keywest_ctx)) < 0) { snd_printk(KERN_ERR "tumbler: %i :cannot initialize the MCS\n", err); return err; From 2b1181ed83ee8b0afbf9ba3e4f789f00375b2a17 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Sep 2006 22:02:22 +0200 Subject: [PATCH 1011/1063] [ALSA] Add missing compat ioctls for ALSA control API Added the missing 32bit-compat ioctl entries for ALSA control API (espcially for recent additions of TLV stuff). Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/core/control_compat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 3c0161bb5ba4..ab48962c48ce 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -407,6 +407,10 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns case SNDRV_CTL_IOCTL_POWER_STATE: case SNDRV_CTL_IOCTL_ELEM_LOCK: case SNDRV_CTL_IOCTL_ELEM_UNLOCK: + case SNDRV_CTL_IOCTL_ELEM_REMOVE: + case SNDRV_CTL_IOCTL_TLV_READ: + case SNDRV_CTL_IOCTL_TLV_WRITE: + case SNDRV_CTL_IOCTL_TLV_COMMAND: return snd_ctl_ioctl(file, cmd, (unsigned long)argp); case SNDRV_CTL_IOCTL_ELEM_LIST32: return snd_ctl_elem_list_compat(ctl->card, argp); From 5720fddd62367bb44335ec83f6371ce91e9ead12 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Sep 2006 22:04:17 +0200 Subject: [PATCH 1012/1063] [ALSA] hda-codec - Add device id for Motorola si3054-compatible codec Added the device id for Motorola si3054-compatible modem codec on a Gateway laptop. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_si3054.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_si3054.c b/sound/pci/hda/patch_si3054.c index 250242cd6c70..76ec3d75fa9e 100644 --- a/sound/pci/hda/patch_si3054.c +++ b/sound/pci/hda/patch_si3054.c @@ -298,6 +298,7 @@ struct hda_codec_preset snd_hda_preset_si3054[] = { { .id = 0x163c3055, .name = "Si3054", .patch = patch_si3054 }, { .id = 0x163c3155, .name = "Si3054", .patch = patch_si3054 }, { .id = 0x11c13026, .name = "Si3054", .patch = patch_si3054 }, + { .id = 0x10573057, .name = "Si3054", .patch = patch_si3054 }, {} }; From a922625126cc9bf593d801879a965b9f0eae6958 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 17 Sep 2006 22:05:54 +0200 Subject: [PATCH 1013/1063] [ALSA] hda-codec - Add vendor ids for Motorola and Conexant Added string entries for Motorola and Conexant vendor ids. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index ff29d0f16903..e69db04b7eb8 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -51,8 +51,10 @@ struct hda_vendor_id { /* codec vendor labels */ static struct hda_vendor_id hda_vendor_ids[] = { { 0x10ec, "Realtek" }, + { 0x1057, "Motorola" }, { 0x11d4, "Analog Devices" }, { 0x13f6, "C-Media" }, + { 0x14f1, "Conexant" }, { 0x434d, "C-Media" }, { 0x8384, "SigmaTel" }, {} /* terminator */ From 33ef765131bcf82bc5fca3f25d8313fa4df93ce0 Mon Sep 17 00:00:00 2001 From: Nicolas Graziano Date: Tue, 19 Sep 2006 14:23:14 +0200 Subject: [PATCH 1014/1063] [ALSA] hda_intel prefer 24bit instead of 20bit If I understand the hda_intel code, for format > 20bit it only advertise the SNDRV_PCM_FMTBIT_S32_LE format and play it at 32 bit, 20 bit or 24 bit. But if the 20bit and 24bit are available, actually it prefer the 20bit format. This path is to prefer the 24bit format instead of 20bit. Signed-off-by: Nicolas Graziano Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e69db04b7eb8..8b2c080c85ae 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1505,10 +1505,10 @@ int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid, formats |= SNDRV_PCM_FMTBIT_S32_LE; if (val & AC_SUPPCM_BITS_32) bps = 32; - else if (val & AC_SUPPCM_BITS_20) - bps = 20; else if (val & AC_SUPPCM_BITS_24) bps = 24; + else if (val & AC_SUPPCM_BITS_20) + bps = 20; } } else if (streams == AC_SUPFMT_FLOAT32) { /* should be exclusive */ From eb06ed8f4c2440558ebf465e8baeac6367d90201 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Sep 2006 17:10:27 +0200 Subject: [PATCH 1015/1063] [ALSA] hda-codec - Support multiple headphone pins Some machines have multiple headpohne pins (usually on the lpatop and on the docking station) while the current hda-codec driver assumes a single headphone pin. Now it supports multiple hp pins (at least for detection). The sigmatel 92xx code supports this new multiple hp pins. It detects all hp pins for auto-muting, too. Also, the driver checks speaker pins in addition. In some cases, all line-out, speaker and hp-pins coexist. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 23 ++-- sound/pci/hda/hda_local.h | 3 +- sound/pci/hda/patch_analog.c | 4 +- sound/pci/hda/patch_realtek.c | 18 +-- sound/pci/hda/patch_sigmatel.c | 202 ++++++++++++++++++++++----------- 5 files changed, 164 insertions(+), 86 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 8b2c080c85ae..07360996caaa 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2012,7 +2012,7 @@ static int is_in_nid_list(hda_nid_t nid, hda_nid_t *list) * in the order of front, rear, CLFE, side, ... * * If more extra outputs (speaker and headphone) are found, the pins are - * assisnged to hp_pin and speaker_pins[], respectively. If no line-out jack + * assisnged to hp_pins[] and speaker_pins[], respectively. If no line-out jack * is detected, one of speaker of HP pins is assigned as the primary * output, i.e. to line_out_pins[0]. So, line_outs is always positive * if any analog output exists. @@ -2074,7 +2074,10 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c cfg->speaker_outs++; break; case AC_JACK_HP_OUT: - cfg->hp_pin = nid; + if (cfg->hp_outs >= ARRAY_SIZE(cfg->hp_pins)) + continue; + cfg->hp_pins[cfg->hp_outs] = nid; + cfg->hp_outs++; break; case AC_JACK_MIC_IN: if (loc == AC_JACK_LOC_FRONT) @@ -2147,8 +2150,10 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c cfg->speaker_outs, cfg->speaker_pins[0], cfg->speaker_pins[1], cfg->speaker_pins[2], cfg->speaker_pins[3], cfg->speaker_pins[4]); - snd_printd(" hp=0x%x, dig_out=0x%x, din_in=0x%x\n", - cfg->hp_pin, cfg->dig_out_pin, cfg->dig_in_pin); + snd_printd(" hp_outs=%d (0x%x/0x%x/0x%x/0x%x/0x%x)\n", + cfg->hp_outs, cfg->hp_pins[0], + cfg->hp_pins[1], cfg->hp_pins[2], + cfg->hp_pins[3], cfg->hp_pins[4]); snd_printd(" inputs: mic=0x%x, fmic=0x%x, line=0x%x, fline=0x%x," " cd=0x%x, aux=0x%x\n", cfg->input_pins[AUTO_PIN_MIC], @@ -2169,10 +2174,12 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c sizeof(cfg->speaker_pins)); cfg->speaker_outs = 0; memset(cfg->speaker_pins, 0, sizeof(cfg->speaker_pins)); - } else if (cfg->hp_pin) { - cfg->line_outs = 1; - cfg->line_out_pins[0] = cfg->hp_pin; - cfg->hp_pin = 0; + } else if (cfg->hp_outs) { + cfg->line_outs = cfg->hp_outs; + memcpy(cfg->line_out_pins, cfg->hp_pins, + sizeof(cfg->hp_pins)); + cfg->hp_outs = 0; + memset(cfg->hp_pins, 0, sizeof(cfg->hp_pins)); } } diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index ff24266fe353..f9416c36396e 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -229,7 +229,8 @@ struct auto_pin_cfg { hda_nid_t line_out_pins[5]; /* sorted in the order of Front/Surr/CLFE/Side */ int speaker_outs; hda_nid_t speaker_pins[5]; - hda_nid_t hp_pin; + int hp_outs; + hda_nid_t hp_pins[5]; hda_nid_t input_pins[AUTO_PIN_LAST]; hda_nid_t dig_out_pin; hda_nid_t dig_in_pin; diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 71abc2aa61a6..511df07fa2a3 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -2471,7 +2471,7 @@ static void ad1988_auto_init_extra_out(struct hda_codec *codec) pin = spec->autocfg.speaker_pins[0]; if (pin) /* connect to front */ ad1988_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0); - pin = spec->autocfg.hp_pin; + pin = spec->autocfg.hp_pins[0]; if (pin) /* connect to front */ ad1988_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); } @@ -2523,7 +2523,7 @@ static int ad1988_parse_auto_config(struct hda_codec *codec) (err = ad1988_auto_create_extra_out(codec, spec->autocfg.speaker_pins[0], "Speaker")) < 0 || - (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pin, + (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pins[0], "Headphone")) < 0 || (err = ad1988_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0) return err; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ba9e050e2012..d08d2e399c8f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2753,7 +2753,7 @@ static void alc880_auto_init_extra_out(struct hda_codec *codec) pin = spec->autocfg.speaker_pins[0]; if (pin) /* connect to front */ alc880_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0); - pin = spec->autocfg.hp_pin; + pin = spec->autocfg.hp_pins[0]; if (pin) /* connect to front */ alc880_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); } @@ -2794,7 +2794,7 @@ static int alc880_parse_auto_config(struct hda_codec *codec) (err = alc880_auto_create_extra_out(spec, spec->autocfg.speaker_pins[0], "Speaker")) < 0 || - (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pin, + (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pins[0], "Headphone")) < 0 || (err = alc880_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0) return err; @@ -3736,7 +3736,7 @@ static int alc260_auto_create_multi_out_ctls(struct alc_spec *spec, return err; } - nid = cfg->hp_pin; + nid = cfg->hp_pins[0]; if (nid) { err = alc260_add_playback_controls(spec, nid, "Headphone"); if (err < 0) @@ -3806,7 +3806,7 @@ static void alc260_auto_init_multi_out(struct hda_codec *codec) if (nid) alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0); - nid = spec->autocfg.hp_pin; + nid = spec->autocfg.hp_pins[0]; if (nid) alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0); } @@ -4526,7 +4526,7 @@ static void alc882_auto_init_hp_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t pin; - pin = spec->autocfg.hp_pin; + pin = spec->autocfg.hp_pins[0]; if (pin) /* connect to front */ alc882_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); /* use dac 0 */ } @@ -5207,7 +5207,7 @@ static void alc883_auto_init_hp_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t pin; - pin = spec->autocfg.hp_pin; + pin = spec->autocfg.hp_pins[0]; if (pin) /* connect to front */ /* use dac 0 */ alc883_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); @@ -5630,7 +5630,7 @@ static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct return err; } } - nid = cfg->hp_pin; + nid = cfg->hp_pins[0]; if (nid) { /* spec->multiout.hp_nid = 2; */ if (nid == 0x16) { @@ -6630,7 +6630,7 @@ static void alc861_auto_init_hp_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t pin; - pin = spec->autocfg.hp_pin; + pin = spec->autocfg.hp_pins[0]; if (pin) /* connect to front */ alc861_auto_set_output_and_unmute(codec, pin, PIN_HP, spec->multiout.dac_nids[0]); } @@ -6665,7 +6665,7 @@ static int alc861_parse_auto_config(struct hda_codec *codec) if ((err = alc861_auto_fill_dac_nids(spec, &spec->autocfg)) < 0 || (err = alc861_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 || - (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pin)) < 0 || + (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pins[0])) < 0 || (err = alc861_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0) return err; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index bcbbe111ab95..7cc064265204 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1011,11 +1011,29 @@ static int stac92xx_auto_fill_dac_nids(struct hda_codec *codec, return 0; } +/* create volume control/switch for the given prefx type */ +static int create_controls(struct sigmatel_spec *spec, const char *pfx, hda_nid_t nid, int chs) +{ + char name[32]; + int err; + + sprintf(name, "%s Playback Volume", pfx); + err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name, + HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT)); + if (err < 0) + return err; + sprintf(name, "%s Playback Switch", pfx); + err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name, + HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT)); + if (err < 0) + return err; + return 0; +} + /* add playback controls from the parsed DAC table */ static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec, const struct auto_pin_cfg *cfg) { - char name[32]; static const char *chname[4] = { "Front", "Surround", NULL /*CLFE*/, "Side" }; @@ -1030,26 +1048,15 @@ static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec, if (i == 2) { /* Center/LFE */ - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Center Playback Volume", - HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0) + err = create_controls(spec, "Center", nid, 1); + if (err < 0) return err; - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "LFE Playback Volume", - HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0) - return err; - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Center Playback Switch", - HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0) - return err; - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "LFE Playback Switch", - HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0) + err = create_controls(spec, "LFE", nid, 2); + if (err < 0) return err; } else { - sprintf(name, "%s Playback Volume", chname[i]); - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name, - HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0) - return err; - sprintf(name, "%s Playback Switch", chname[i]); - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name, - HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0) + err = create_controls(spec, chname[i], nid, 3); + if (err < 0) return err; } } @@ -1065,39 +1072,85 @@ static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec, return 0; } -/* add playback controls for HP output */ -static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec, struct auto_pin_cfg *cfg) +static int check_in_dac_nids(struct sigmatel_spec *spec, hda_nid_t nid) +{ + int i; + + for (i = 0; i < spec->multiout.num_dacs; i++) { + if (spec->multiout.dac_nids[i] == nid) + return 1; + } + if (spec->multiout.hp_nid == nid) + return 1; + return 0; +} + +static int add_spec_dacs(struct sigmatel_spec *spec, hda_nid_t nid) +{ + if (!spec->multiout.hp_nid) + spec->multiout.hp_nid = nid; + else if (spec->multiout.num_dacs > 4) { + printk(KERN_WARNING "stac92xx: No space for DAC 0x%x\n", nid); + return 1; + } else { + spec->multiout.dac_nids[spec->multiout.num_dacs] = nid; + spec->multiout.num_dacs++; + } + return 0; +} + +/* add playback controls for Speaker and HP outputs */ +static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec, + struct auto_pin_cfg *cfg) { struct sigmatel_spec *spec = codec->spec; - hda_nid_t pin = cfg->hp_pin; hda_nid_t nid; - int i, err; - unsigned int wid_caps; + int i, old_num_dacs, err; - if (! pin) - return 0; - - wid_caps = get_wcaps(codec, pin); - if (wid_caps & AC_WCAP_UNSOL_CAP) - spec->hp_detect = 1; - - nid = snd_hda_codec_read(codec, pin, 0, AC_VERB_GET_CONNECT_LIST, 0) & 0xff; - for (i = 0; i < cfg->line_outs; i++) { - if (! spec->multiout.dac_nids[i]) + old_num_dacs = spec->multiout.num_dacs; + for (i = 0; i < cfg->hp_outs; i++) { + unsigned int wid_caps = get_wcaps(codec, cfg->hp_pins[i]); + if (wid_caps & AC_WCAP_UNSOL_CAP) + spec->hp_detect = 1; + nid = snd_hda_codec_read(codec, cfg->hp_pins[i], 0, + AC_VERB_GET_CONNECT_LIST, 0) & 0xff; + if (check_in_dac_nids(spec, nid)) + nid = 0; + if (! nid) continue; - if (spec->multiout.dac_nids[i] == nid) - return 0; + add_spec_dacs(spec, nid); + } + for (i = 0; i < cfg->speaker_outs; i++) { + nid = snd_hda_codec_read(codec, cfg->speaker_pins[0], 0, + AC_VERB_GET_CONNECT_LIST, 0) & 0xff; + if (check_in_dac_nids(spec, nid)) + nid = 0; + if (check_in_dac_nids(spec, nid)) + nid = 0; + if (! nid) + continue; + add_spec_dacs(spec, nid); } - spec->multiout.hp_nid = nid; - - /* control HP volume/switch on the output mixer amp */ - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Headphone Playback Volume", - HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0) - return err; - if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Headphone Playback Switch", - HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0) - return err; + for (i = old_num_dacs; i < spec->multiout.num_dacs; i++) { + static const char *pfxs[] = { + "Speaker", "External Speaker", "Speaker2", + }; + err = create_controls(spec, pfxs[i - old_num_dacs], + spec->multiout.dac_nids[i], 3); + if (err < 0) + return err; + } + if (spec->multiout.hp_nid) { + const char *pfx; + if (old_num_dacs == spec->multiout.num_dacs) + pfx = "Master"; + else + pfx = "Headphone"; + err = create_controls(spec, pfx, spec->multiout.hp_nid, 3); + if (err < 0) + return err; + } return 0; } @@ -1160,11 +1213,20 @@ static void stac92xx_auto_init_multi_out(struct hda_codec *codec) static void stac92xx_auto_init_hp_out(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - hda_nid_t pin; + int i; - pin = spec->autocfg.hp_pin; - if (pin) /* connect to front */ - stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN); + for (i = 0; i < spec->autocfg.hp_outs; i++) { + hda_nid_t pin; + pin = spec->autocfg.hp_pins[i]; + if (pin) /* connect to front */ + stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN); + } + for (i = 0; i < spec->autocfg.speaker_outs; i++) { + hda_nid_t pin; + pin = spec->autocfg.speaker_pins[i]; + if (pin) /* connect to front */ + stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN); + } } static int stac92xx_parse_auto_config(struct hda_codec *codec, hda_nid_t dig_out, hda_nid_t dig_in) @@ -1210,7 +1272,7 @@ static int stac9200_auto_create_hp_ctls(struct hda_codec *codec, struct auto_pin_cfg *cfg) { struct sigmatel_spec *spec = codec->spec; - hda_nid_t pin = cfg->hp_pin; + hda_nid_t pin = cfg->hp_pins[0]; unsigned int wid_caps; if (! pin) @@ -1266,16 +1328,7 @@ static int stac9200_auto_create_lfe_ctls(struct hda_codec *codec, } if (lfe_pin) { - err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, - "LFE Playback Volume", - HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0, - HDA_OUTPUT)); - if (err < 0) - return err; - err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, - "LFE Playback Switch", - HDA_COMPOSE_AMP_VAL(lfe_pin, 1, 0, - HDA_OUTPUT)); + err = create_controls(spec, "LFE", lfe_pin, 1); if (err < 0) return err; } @@ -1363,9 +1416,11 @@ static int stac92xx_init(struct hda_codec *codec) /* set up pins */ if (spec->hp_detect) { /* Enable unsolicited responses on the HP widget */ - snd_hda_codec_write(codec, cfg->hp_pin, 0, - AC_VERB_SET_UNSOLICITED_ENABLE, - STAC_UNSOL_ENABLE); + for (i = 0; i < cfg->hp_outs; i++) + if (get_wcaps(codec, cfg->hp_pins[i]) & AC_WCAP_UNSOL_CAP) + snd_hda_codec_write(codec, cfg->hp_pins[i], 0, + AC_VERB_SET_UNSOLICITED_ENABLE, + STAC_UNSOL_ENABLE); /* fake event to set up pins */ codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26); /* enable the headphones by default. If/when unsol_event detection works, this will be ignored */ @@ -1447,21 +1502,36 @@ static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res) if ((res >> 26) != STAC_HP_EVENT) return; - presence = snd_hda_codec_read(codec, cfg->hp_pin, 0, - AC_VERB_GET_PIN_SENSE, 0x00) >> 31; + presence = 0; + for (i = 0; i < cfg->hp_outs; i++) { + int p = snd_hda_codec_read(codec, cfg->hp_pins[i], 0, + AC_VERB_GET_PIN_SENSE, 0x00); + if (p & (1 << 31)) + presence++; + } if (presence) { /* disable lineouts, enable hp */ for (i = 0; i < cfg->line_outs; i++) stac92xx_reset_pinctl(codec, cfg->line_out_pins[i], AC_PINCTL_OUT_EN); - stac92xx_set_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN); + for (i = 0; i < cfg->speaker_outs; i++) + stac92xx_reset_pinctl(codec, cfg->speaker_pins[i], + AC_PINCTL_OUT_EN); + for (i = 0; i < cfg->hp_outs; i++) + stac92xx_set_pinctl(codec, cfg->hp_pins[i], + AC_PINCTL_OUT_EN); } else { /* enable lineouts, disable hp */ for (i = 0; i < cfg->line_outs; i++) stac92xx_set_pinctl(codec, cfg->line_out_pins[i], AC_PINCTL_OUT_EN); - stac92xx_reset_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN); + for (i = 0; i < cfg->speaker_outs; i++) + stac92xx_set_pinctl(codec, cfg->speaker_pins[i], + AC_PINCTL_OUT_EN); + for (i = 0; i < cfg->hp_outs; i++) + stac92xx_reset_pinctl(codec, cfg->hp_pins[i], + AC_PINCTL_OUT_EN); } } From e6f8f108a19638d7c6535ab393a228ed9d4804a6 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 21 Sep 2006 11:31:58 +0200 Subject: [PATCH 1016/1063] [ALSA] sound core: Use SEEK_{SET,CUR,END} instead of hardcoded values sound core: Use SEEK_{SET,CUR,END} instead of hardcoded values Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/core/info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/core/info.c b/sound/core/info.c index 9663b6be9c3a..e43662b33f16 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -175,15 +175,15 @@ static loff_t snd_info_entry_llseek(struct file *file, loff_t offset, int orig) switch (entry->content) { case SNDRV_INFO_CONTENT_TEXT: switch (orig) { - case 0: /* SEEK_SET */ + case SEEK_SET: file->f_pos = offset; ret = file->f_pos; goto out; - case 1: /* SEEK_CUR */ + case SEEK_CUR: file->f_pos += offset; ret = file->f_pos; goto out; - case 2: /* SEEK_END */ + case SEEK_END: default: ret = -EINVAL; goto out; From dd47a33806bfe93c08b071c4d26a2390cbbc9e65 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 21 Sep 2006 11:32:43 +0200 Subject: [PATCH 1017/1063] [ALSA] opl4: Use SEEK_{SET,CUR,END} instead of hardcoded values opl4: Use SEEK_{SET,CUR,END} instead of hardcoded values Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/drivers/opl4/opl4_proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c index 11dd811771a4..1679300b7583 100644 --- a/sound/drivers/opl4/opl4_proc.c +++ b/sound/drivers/opl4/opl4_proc.c @@ -105,13 +105,13 @@ static long long snd_opl4_mem_proc_llseek(struct snd_info_entry *entry, void *fi struct file *file, long long offset, int orig) { switch (orig) { - case 0: /* SEEK_SET */ + case SEEK_SET: file->f_pos = offset; break; - case 1: /* SEEK_CUR */ + case SEEK_CUR: file->f_pos += offset; break; - case 2: /* SEEK_END, offset is negative */ + case SEEK_END: /* offset is negative */ file->f_pos = entry->size + offset; break; default: From d158da81ee9a1fa70d980f58b0f143fa873ca9ed Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 21 Sep 2006 11:33:14 +0200 Subject: [PATCH 1018/1063] [ALSA] gus: Use SEEK_{SET,CUR,END} instead of hardcoded values gus: Use SEEK_{SET,CUR,END} instead of hardcoded values Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/isa/gus/gus_mem_proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/isa/gus/gus_mem_proc.c b/sound/isa/gus/gus_mem_proc.c index 4080255007d5..80f0a83818b2 100644 --- a/sound/isa/gus/gus_mem_proc.c +++ b/sound/isa/gus/gus_mem_proc.c @@ -61,13 +61,13 @@ static long long snd_gf1_mem_proc_llseek(struct snd_info_entry *entry, struct gus_proc_private *priv = entry->private_data; switch (orig) { - case 0: /* SEEK_SET */ + case SEEK_SET: file->f_pos = offset; break; - case 1: /* SEEK_CUR */ + case SEEK_CUR: file->f_pos += offset; break; - case 2: /* SEEK_END, offset is negative */ + case SEEK_END: /* offset is negative */ file->f_pos = priv->size + offset; break; default: From 7ffffecc7c4df08ad89723ca32d936ff09b5b3ff Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 21 Sep 2006 11:33:42 +0200 Subject: [PATCH 1019/1063] [ALSA] mixart: Use SEEK_{SET,CUR,END} instead of hardcoded values mixart: Use SEEK_{SET,CUR,END} instead of hardcoded values Signed-off-by: Josef 'Jeff' Sipek Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/mixart/mixart.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c index cc43ecd67906..216aee5f93e7 100644 --- a/sound/pci/mixart/mixart.c +++ b/sound/pci/mixart/mixart.c @@ -1109,13 +1109,13 @@ static long long snd_mixart_BA0_llseek(struct snd_info_entry *entry, offset = offset & ~3; /* 4 bytes aligned */ switch(orig) { - case 0: /* SEEK_SET */ + case SEEK_SET: file->f_pos = offset; break; - case 1: /* SEEK_CUR */ + case SEEK_CUR: file->f_pos += offset; break; - case 2: /* SEEK_END, offset is negative */ + case SEEK_END: /* offset is negative */ file->f_pos = MIXART_BA0_SIZE + offset; break; default: @@ -1135,13 +1135,13 @@ static long long snd_mixart_BA1_llseek(struct snd_info_entry *entry, offset = offset & ~3; /* 4 bytes aligned */ switch(orig) { - case 0: /* SEEK_SET */ + case SEEK_SET: file->f_pos = offset; break; - case 1: /* SEEK_CUR */ + case SEEK_CUR: file->f_pos += offset; break; - case 2: /* SEEK_END, offset is negative */ + case SEEK_END: /* offset is negative */ file->f_pos = MIXART_BA1_SIZE + offset; break; default: From 314634bc81325dcfeb31ed138647d428b1f26cbf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Sep 2006 11:56:18 +0200 Subject: [PATCH 1020/1063] [ALSA] hda-codec - Fix mic input with STAC92xx codecs Fixed mic input with STAC92xx codecs. The mic pin was sometimes set to OUTPUT by the headphone jack detection. Also, try to assign a secondary mic as front-mic (or vice versa) in the auto-detection if possible. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_codec.c | 19 ++++++-- sound/pci/hda/patch_sigmatel.c | 88 +++++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 07360996caaa..9c3d7ac08068 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2079,12 +2079,21 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, struct auto_pin_cfg *c cfg->hp_pins[cfg->hp_outs] = nid; cfg->hp_outs++; break; - case AC_JACK_MIC_IN: - if (loc == AC_JACK_LOC_FRONT) - cfg->input_pins[AUTO_PIN_FRONT_MIC] = nid; - else - cfg->input_pins[AUTO_PIN_MIC] = nid; + case AC_JACK_MIC_IN: { + int preferred, alt; + if (loc == AC_JACK_LOC_FRONT) { + preferred = AUTO_PIN_FRONT_MIC; + alt = AUTO_PIN_MIC; + } else { + preferred = AUTO_PIN_MIC; + alt = AUTO_PIN_FRONT_MIC; + } + if (!cfg->input_pins[preferred]) + cfg->input_pins[preferred] = nid; + else if (!cfg->input_pins[alt]) + cfg->input_pins[alt] = nid; break; + } case AC_JACK_LINE_IN: if (loc == AC_JACK_LOC_FRONT) cfg->input_pins[AUTO_PIN_FRONT_LINE] = nid; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 7cc064265204..92f48a725853 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -36,7 +36,6 @@ #define NUM_CONTROL_ALLOC 32 #define STAC_HP_EVENT 0x37 -#define STAC_UNSOL_ENABLE (AC_USRSP_EN | STAC_HP_EVENT) #define STAC_REF 0 #define STAC_D945GTP3 1 @@ -1164,23 +1163,28 @@ static int stac92xx_auto_create_analog_input_ctls(struct hda_codec *codec, const int i, j, k; for (i = 0; i < AUTO_PIN_LAST; i++) { - int index = -1; - if (cfg->input_pins[i]) { - imux->items[imux->num_items].label = auto_pin_cfg_labels[i]; + int index; - for (j=0; jnum_muxes; j++) { - int num_cons = snd_hda_get_connections(codec, spec->mux_nids[j], con_lst, HDA_MAX_NUM_INPUTS); - for (k=0; kinput_pins[i]) { - index = k; - break; - } - if (index >= 0) - break; - } - imux->items[imux->num_items].index = index; - imux->num_items++; + if (!cfg->input_pins[i]) + continue; + index = -1; + for (j = 0; j < spec->num_muxes; j++) { + int num_cons; + num_cons = snd_hda_get_connections(codec, + spec->mux_nids[j], + con_lst, + HDA_MAX_NUM_INPUTS); + for (k = 0; k < num_cons; k++) + if (con_lst[k] == cfg->input_pins[i]) { + index = k; + goto found; + } } + continue; + found: + imux->items[imux->num_items].label = auto_pin_cfg_labels[i]; + imux->items[imux->num_items].index = index; + imux->num_items++; } if (imux->num_items == 1) { @@ -1405,6 +1409,15 @@ static void stac922x_gpio_mute(struct hda_codec *codec, int pin, int muted) AC_VERB_SET_GPIO_DATA, gpiostate); } +static void enable_pin_detect(struct hda_codec *codec, hda_nid_t nid, + unsigned int event) +{ + if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_UNSOLICITED_ENABLE, + (AC_USRSP_EN | event)); +} + static int stac92xx_init(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; @@ -1417,13 +1430,13 @@ static int stac92xx_init(struct hda_codec *codec) if (spec->hp_detect) { /* Enable unsolicited responses on the HP widget */ for (i = 0; i < cfg->hp_outs; i++) - if (get_wcaps(codec, cfg->hp_pins[i]) & AC_WCAP_UNSOL_CAP) - snd_hda_codec_write(codec, cfg->hp_pins[i], 0, - AC_VERB_SET_UNSOLICITED_ENABLE, - STAC_UNSOL_ENABLE); + enable_pin_detect(codec, cfg->hp_pins[i], + STAC_HP_EVENT); /* fake event to set up pins */ codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26); - /* enable the headphones by default. If/when unsol_event detection works, this will be ignored */ + /* enable the headphones by default. + * If/when unsol_event detection works, this will be ignored + */ stac92xx_auto_init_hp_out(codec); } else { stac92xx_auto_init_multi_out(codec); @@ -1478,6 +1491,8 @@ static void stac92xx_set_pinctl(struct hda_codec *codec, hda_nid_t nid, { unsigned int pin_ctl = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_WIDGET_CONTROL, 0x00); + if (flag == AC_PINCTL_OUT_EN && (pin_ctl & AC_PINCTL_IN_EN)) + return; snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_ctl | flag); @@ -1493,21 +1508,27 @@ static void stac92xx_reset_pinctl(struct hda_codec *codec, hda_nid_t nid, pin_ctl & ~flag); } -static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res) +static int get_pin_presence(struct hda_codec *codec, hda_nid_t nid) +{ + if (!nid) + return 0; + if (snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_SENSE, 0x00) + & (1 << 31)) + return 1; + return 0; +} + +static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res) { struct sigmatel_spec *spec = codec->spec; struct auto_pin_cfg *cfg = &spec->autocfg; int i, presence; - if ((res >> 26) != STAC_HP_EVENT) - return; - presence = 0; for (i = 0; i < cfg->hp_outs; i++) { - int p = snd_hda_codec_read(codec, cfg->hp_pins[i], 0, - AC_VERB_GET_PIN_SENSE, 0x00); - if (p & (1 << 31)) - presence++; + presence = get_pin_presence(codec, cfg->hp_pins[i]); + if (presence) + break; } if (presence) { @@ -1535,6 +1556,15 @@ static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res) } } +static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res) +{ + switch (res >> 26) { + case STAC_HP_EVENT: + stac92xx_hp_detect(codec, res); + break; + } +} + #ifdef CONFIG_PM static int stac92xx_resume(struct hda_codec *codec) { From 5c79b1f887f8edcd399baa164b66a1c08566c994 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Sep 2006 13:34:13 +0200 Subject: [PATCH 1021/1063] [ALSA] hda-intel - A slight cleanup of timeout check in azx_get_response() A slight cleanup of timeout check in azx_get_response() to check jiffies for HZ-independent timeout. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/hda_intel.c | 46 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4d2df771112e..e9d4cb4d07e1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -520,38 +520,36 @@ static void azx_update_rirb(struct azx *chip) static unsigned int azx_rirb_get_response(struct hda_codec *codec) { struct azx *chip = codec->bus->private_data; - int timeout = 50; + unsigned long timeout; - for (;;) { + again: + timeout = jiffies + msecs_to_jiffies(1000); + do { if (chip->polling_mode) { spin_lock_irq(&chip->reg_lock); azx_update_rirb(chip); spin_unlock_irq(&chip->reg_lock); } if (! chip->rirb.cmds) - break; - if (! --timeout) { - if (! chip->polling_mode) { - snd_printk(KERN_WARNING "hda_intel: " - "azx_get_response timeout, " - "switching to polling mode...\n"); - chip->polling_mode = 1; - timeout = 50; - continue; - } - snd_printk(KERN_ERR - "hda_intel: azx_get_response timeout, " - "switching to single_cmd mode...\n"); - chip->rirb.rp = azx_readb(chip, RIRBWP); - chip->rirb.cmds = 0; - /* switch to single_cmd mode */ - chip->single_cmd = 1; - azx_free_cmd_io(chip); - return -1; - } - msleep(1); + return chip->rirb.res; /* the last value */ + schedule_timeout_interruptible(1); + } while (time_after_eq(timeout, jiffies)); + + if (!chip->polling_mode) { + snd_printk(KERN_WARNING "hda_intel: azx_get_response timeout, " + "switching to polling mode...\n"); + chip->polling_mode = 1; + goto again; } - return chip->rirb.res; /* the last value */ + + snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, " + "switching to single_cmd mode...\n"); + chip->rirb.rp = azx_readb(chip, RIRBWP); + chip->rirb.cmds = 0; + /* switch to single_cmd mode */ + chip->single_cmd = 1; + azx_free_cmd_io(chip); + return -1; } /* From eb995a8c82dba4a8e027c99ac5001fbc287a115c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Sep 2006 14:28:21 +0200 Subject: [PATCH 1022/1063] [ALSA] hda-codec - Fix headphone auto-toggle on sigmatel codec Fix/optimize the headphone auto-toggle function on sigmatel codecs. The headphone pins are kept as output. When headhpones are unplugged, you cannot hear anyway ;) Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/hda/patch_sigmatel.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 92f48a725853..731b7b97ee71 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1432,12 +1432,9 @@ static int stac92xx_init(struct hda_codec *codec) for (i = 0; i < cfg->hp_outs; i++) enable_pin_detect(codec, cfg->hp_pins[i], STAC_HP_EVENT); + stac92xx_auto_init_hp_out(codec); /* fake event to set up pins */ codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26); - /* enable the headphones by default. - * If/when unsol_event detection works, this will be ignored - */ - stac92xx_auto_init_hp_out(codec); } else { stac92xx_auto_init_multi_out(codec); stac92xx_auto_init_hp_out(codec); @@ -1539,9 +1536,6 @@ static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res) for (i = 0; i < cfg->speaker_outs; i++) stac92xx_reset_pinctl(codec, cfg->speaker_pins[i], AC_PINCTL_OUT_EN); - for (i = 0; i < cfg->hp_outs; i++) - stac92xx_set_pinctl(codec, cfg->hp_pins[i], - AC_PINCTL_OUT_EN); } else { /* enable lineouts, disable hp */ for (i = 0; i < cfg->line_outs; i++) @@ -1550,9 +1544,6 @@ static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res) for (i = 0; i < cfg->speaker_outs; i++) stac92xx_set_pinctl(codec, cfg->speaker_pins[i], AC_PINCTL_OUT_EN); - for (i = 0; i < cfg->hp_outs; i++) - stac92xx_reset_pinctl(codec, cfg->hp_pins[i], - AC_PINCTL_OUT_EN); } } From 92b9ac78f934616d08c72747607bfb0fa51ee52d Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 22 Sep 2006 10:57:36 +0200 Subject: [PATCH 1023/1063] [ALSA] usb-audio: increase number of packets per URB To decrease the USB interrupts rate, increase both the default and the maximum number of packets per URB. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/usb/usbaudio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 664dd4c21e66..49248fa7aef4 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -68,7 +68,7 @@ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card */ static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Vendor ID for this card */ static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Product ID for this card */ -static int nrpacks = 4; /* max. number of packets per urb */ +static int nrpacks = 8; /* max. number of packets per urb */ static int async_unlink = 1; static int device_setup[SNDRV_CARDS]; /* device parameter for this card*/ @@ -100,7 +100,7 @@ MODULE_PARM_DESC(device_setup, "Specific device setup (if needed)."); * */ -#define MAX_PACKS 10 +#define MAX_PACKS 20 #define MAX_PACKS_HS (MAX_PACKS * 8) /* in high speed mode */ #define MAX_URBS 8 #define SYNC_URBS 4 /* always four urbs for sync */ From dbf91dd47d90e1d91d5daf37ca30728f4e11c5e3 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 22 Sep 2006 10:58:40 +0200 Subject: [PATCH 1024/1063] [ALSA] ES1938: remove duplicate field initialization Remove the duplicate and inconsistent initialization of the kcontrol access field. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/pci/es1938.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c index 3784088bea84..3ce5a4e7e31f 100644 --- a/sound/pci/es1938.c +++ b/sound/pci/es1938.c @@ -1359,10 +1359,9 @@ ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1), }, { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, - .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + .access = (SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_TLV_READ), .name = "Hardware Master Playback Switch", - .access = SNDRV_CTL_ELEM_ACCESS_READ, .info = snd_es1938_info_hw_switch, .get = snd_es1938_get_hw_switch, .tlv = { .p = db_scale_master }, From fef8a0c03daa1aaf3f83e45da2b14674c073a9f5 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 22 Sep 2006 11:00:51 +0200 Subject: [PATCH 1025/1063] [ALSA] usb-audio: add mixer control names for the Aureon 5.1 MkII Add a mixer name map for the TerraTec Aureon 5.1 MkII USB. Signed-off-by: Clemens Ladisch Signed-off-by: Jaroslav Kysela --- sound/usb/usbmixer_maps.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/usb/usbmixer_maps.c b/sound/usb/usbmixer_maps.c index 37accb68652d..7c4dcb3f436a 100644 --- a/sound/usb/usbmixer_maps.c +++ b/sound/usb/usbmixer_maps.c @@ -234,6 +234,26 @@ static struct usbmix_name_map justlink_map[] = { { 0 } /* terminator */ }; +/* TerraTec Aureon 5.1 MkII USB */ +static struct usbmix_name_map aureon_51_2_map[] = { + /* 1: IT USB */ + /* 2: IT Mic */ + /* 3: IT Line */ + /* 4: IT SPDIF */ + /* 5: OT SPDIF */ + /* 6: OT Speaker */ + /* 7: OT USB */ + { 8, "Capture Source" }, /* SU */ + { 9, "Master Playback" }, /* FU */ + { 10, "Mic Capture" }, /* FU */ + { 11, "Line Capture" }, /* FU */ + { 12, "IEC958 In Capture" }, /* FU */ + { 13, "Mic Playback" }, /* FU */ + { 14, "Line Playback" }, /* FU */ + /* 15: MU */ + {} /* terminator */ +}; + /* * Control map entries */ @@ -276,6 +296,10 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x0c45, 0x1158), .map = justlink_map, }, + { + .id = USB_ID(0x0ccd, 0x0028), + .map = aureon_51_2_map, + }, { 0 } /* terminator */ }; From 8b0c4149e82170ebc44b96e9ed96545f8ebd7c81 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Sep 2006 15:27:55 +0200 Subject: [PATCH 1026/1063] [ALSA] Move CONFIG_SND_AC97_POWER_SAVE to pci/Kconfig Moved the entry of CONFIG_SND_AC97_POWER_SAVE from drivers/Kconfig to more appropriate place, pci/Kconfig. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/drivers/Kconfig | 13 ------------- sound/pci/Kconfig | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 952c7f170101..7971285dfd5b 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -113,17 +113,4 @@ config SND_MPU401 To compile this driver as a module, choose M here: the module will be called snd-mpu401. -config SND_AC97_POWER_SAVE - bool "AC97 Power-Saving Mode" - depends on SND_AC97_CODEC && EXPERIMENTAL - default n - help - Say Y here to enable the aggressive power-saving support of - AC97 codecs. In this mode, the power-mode is dynamically - controlled at each open/close. - - The mode is activated by passing power_save=1 option to - snd-ac97-codec driver. You can toggle it dynamically over - sysfs, too. - endmenu diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index dffb6be76800..8a6b1803c763 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -744,4 +744,17 @@ config SND_YMFPCI To compile this driver as a module, choose M here: the module will be called snd-ymfpci. +config SND_AC97_POWER_SAVE + bool "AC97 Power-Saving Mode" + depends on SND_AC97_CODEC && EXPERIMENTAL + default n + help + Say Y here to enable the aggressive power-saving support of + AC97 codecs. In this mode, the power-mode is dynamically + controlled at each open/close. + + The mode is activated by passing power_save=1 option to + snd-ac97-codec driver. You can toggle it dynamically over + sysfs, too. + endmenu From f0063c4489a00ed5395378ef80a7edea4272f20b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Sep 2006 15:30:42 +0200 Subject: [PATCH 1027/1063] [ALSA] intel8x0m - Free irq in suspend Free the irq handler in suspend and reacquire in resume as well as intel8x0 audio driver does. Some devices may change the irq line dynamically during suspend/resume. Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- sound/pci/intel8x0m.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 91850281f89b..268e2f7241ea 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1045,6 +1045,8 @@ static int intel8x0m_suspend(struct pci_dev *pci, pm_message_t state) for (i = 0; i < chip->pcm_devs; i++) snd_pcm_suspend_all(chip->pcm[i]); snd_ac97_suspend(chip->ac97); + if (chip->irq >= 0) + free_irq(chip->irq, chip); pci_disable_device(pci); pci_save_state(pci); return 0; @@ -1058,6 +1060,9 @@ static int intel8x0m_resume(struct pci_dev *pci) pci_restore_state(pci); pci_enable_device(pci); pci_set_master(pci); + request_irq(pci->irq, snd_intel8x0_interrupt, IRQF_DISABLED|IRQF_SHARED, + card->shortname, chip); + chip->irq = pci->irq; snd_intel8x0_chip_init(chip, 0); snd_ac97_resume(chip->ac97); From 892e4fba1cb5cdc70f3acc65e024e541c0b2d559 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Sep 2006 10:24:36 +0100 Subject: [PATCH 1028/1063] [MTD] Fix dependencies with CONFIG_MTD=m CMDLINEPARTS shouldn't be selectable, and neither should SSFDC, which can be a tristate anyway. Signed-off-by: David Woodhouse --- drivers/mtd/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 717e90448fc6..a03e862851db 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -101,7 +101,7 @@ config MTD_REDBOOT_PARTS_READONLY config MTD_CMDLINE_PARTS bool "Command line partition table parsing" - depends on MTD_PARTITIONS = "y" + depends on MTD_PARTITIONS = "y" && MTD = "y" ---help--- Allow generic configuration of the MTD partition tables via the kernel command line. Multiple flash resources are supported for hardware where @@ -264,7 +264,7 @@ config RFD_FTL http://www.gensw.com/pages/prod/bios/rfd.htm config SSFDC - bool "NAND SSFDC (SmartMedia) read only translation layer" + tristate "NAND SSFDC (SmartMedia) read only translation layer" depends on MTD default n help From 9a05eded5d17a425b9d9ed9dd80f518429dde4e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Sep 2006 10:56:24 +0100 Subject: [PATCH 1029/1063] [MTD] SSFDC translation layer minor cleanup Don't include . Don't say 'MB' where you mean 'MiB'. Don't allocate 512 bytes on the stack. Signed-off-by: David Woodhouse --- drivers/mtd/ssfdc.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index ddbf015f4119..cf60a5e87f19 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -10,7 +10,6 @@ * published by the Free Software Foundation. */ -#include #include #include #include @@ -29,7 +28,7 @@ struct ssfdcr_record { int cis_block; /* block n. containing CIS/IDI */ int erase_size; /* phys_block_size */ unsigned short *logic_block_map; /* all zones (max 8192 phys blocks on - the 128MB) */ + the 128MiB) */ int map_len; /* n. phys_blocks on the card */ }; @@ -43,11 +42,11 @@ struct ssfdcr_record { #define MAX_LOGIC_BLK_PER_ZONE 1000 #define MAX_PHYS_BLK_PER_ZONE 1024 -#define KB(x) ( (x) * 1024L ) -#define MB(x) ( KB(x) * 1024L ) +#define KiB(x) ( (x) * 1024L ) +#define MiB(x) ( KiB(x) * 1024L ) /** CHS Table - 1MB 2MB 4MB 8MB 16MB 32MB 64MB 128MB + 1MiB 2MiB 4MiB 8MiB 16MiB 32MiB 64MiB 128MiB NCylinder 125 125 250 250 500 500 500 500 NHead 4 4 4 4 4 8 8 16 NSector 4 8 8 16 16 16 32 32 @@ -64,14 +63,14 @@ typedef struct { /* Must be ordered by size */ static const chs_entry_t chs_table[] = { - { MB( 1), 125, 4, 4 }, - { MB( 2), 125, 4, 8 }, - { MB( 4), 250, 4, 8 }, - { MB( 8), 250, 4, 16 }, - { MB( 16), 500, 4, 16 }, - { MB( 32), 500, 8, 16 }, - { MB( 64), 500, 8, 32 }, - { MB(128), 500, 16, 32 }, + { MiB( 1), 125, 4, 4 }, + { MiB( 2), 125, 4, 8 }, + { MiB( 4), 250, 4, 8 }, + { MiB( 8), 250, 4, 16 }, + { MiB( 16), 500, 4, 16 }, + { MiB( 32), 500, 8, 16 }, + { MiB( 64), 500, 8, 32 }, + { MiB(128), 500, 16, 32 }, { 0 }, }; @@ -109,14 +108,19 @@ static int get_valid_cis_sector(struct mtd_info *mtd) int ret, k, cis_sector; size_t retlen; loff_t offset; - uint8_t sect_buf[SECTOR_SIZE]; + uint8_t *sect_buf; + + cis_sector = -1; + + sect_buf = kmalloc(SECTOR_SIZE, GFP_KERNEL); + if (!sect_buf) + goto out; /* * Look for CIS/IDI sector on the first GOOD block (give up after 4 bad * blocks). If the first good block doesn't contain CIS number the flash * is not SSFDC formatted */ - cis_sector = -1; for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) { if (!mtd->block_isbad(mtd, offset)) { ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, @@ -140,6 +144,8 @@ static int get_valid_cis_sector(struct mtd_info *mtd) } } + kfree(sect_buf); + out: return cis_sector; } From 08d3ad6a518051bfaefd5d6a8005e20c036996c3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 23 Sep 2006 16:20:48 +0100 Subject: [PATCH 1030/1063] [MTD] Whitespace cleanup in SSFDC driver. Says akpm: ' - search for "( " and " )", fix.' Signed-off-by: David Woodhouse --- drivers/mtd/ssfdc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c index cf60a5e87f19..79d3bb659bfe 100644 --- a/drivers/mtd/ssfdc.c +++ b/drivers/mtd/ssfdc.c @@ -127,11 +127,11 @@ static int get_valid_cis_sector(struct mtd_info *mtd) sect_buf); /* CIS pattern match on the sector buffer */ - if ( ret < 0 || retlen != SECTOR_SIZE ) { + if (ret < 0 || retlen != SECTOR_SIZE) { printk(KERN_WARNING "SSFDC_RO:can't read CIS/IDI sector\n"); - } else if ( !memcmp(sect_buf, cis_numbers, - sizeof(cis_numbers)) ) { + } else if (!memcmp(sect_buf, cis_numbers, + sizeof(cis_numbers))) { /* Found */ cis_sector = (int)(offset >> SECTOR_SHIFT); } else { @@ -233,7 +233,7 @@ static int get_logical_address(uint8_t *oob_buf) } } - if ( !ok ) + if (!ok) block_address = -2; DEBUG(MTD_DEBUG_LEVEL3, "SSFDC_RO: get_logical_address() %d\n", @@ -251,8 +251,8 @@ static int build_logical_block_map(struct ssfdcr_record *ssfdc) struct mtd_info *mtd = ssfdc->mbd.mtd; DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: build_block_map() nblks=%d (%luK)\n", - ssfdc->map_len, (unsigned long)ssfdc->map_len * - ssfdc->erase_size / 1024 ); + ssfdc->map_len, + (unsigned long)ssfdc->map_len * ssfdc->erase_size / 1024); /* Scan every physical block, skip CIS block */ for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len; @@ -329,21 +329,21 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) /* Set geometry */ ssfdc->heads = 16; ssfdc->sectors = 32; - get_chs( mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors); + get_chs(mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors); ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) / ((long)ssfdc->sectors * (long)ssfdc->heads)); DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n", ssfdc->cylinders, ssfdc->heads , ssfdc->sectors, (long)ssfdc->cylinders * (long)ssfdc->heads * - (long)ssfdc->sectors ); + (long)ssfdc->sectors); ssfdc->mbd.size = (long)ssfdc->heads * (long)ssfdc->cylinders * (long)ssfdc->sectors; /* Allocate logical block map */ - ssfdc->logic_block_map = kmalloc( sizeof(ssfdc->logic_block_map[0]) * - ssfdc->map_len, GFP_KERNEL); + ssfdc->logic_block_map = kmalloc(sizeof(ssfdc->logic_block_map[0]) * + ssfdc->map_len, GFP_KERNEL); if (!ssfdc->logic_block_map) { printk(KERN_WARNING "SSFDC_RO: out of memory for data structures\n"); @@ -414,7 +414,7 @@ static int ssfdcr_readsect(struct mtd_blktrans_dev *dev, "SSFDC_RO: ssfdcr_readsect() phys_sect_no=%lu\n", sect_no); - if (read_physical_sector( ssfdc->mbd.mtd, buf, sect_no ) < 0) + if (read_physical_sector(ssfdc->mbd.mtd, buf, sect_no) < 0) return -EIO; } else { memset(buf, 0xff, SECTOR_SIZE); From f2d719c65ad8f10afa7bec11315faa7badf4ecb9 Mon Sep 17 00:00:00 2001 From: Alexis Bruemmer Date: Thu, 7 Sep 2006 14:32:16 -0700 Subject: [PATCH 1031/1063] [SCSI] aic94xx: Removes Reliance on FLASH Manufacture IDs This patch removes the reliance on FLASH Manufacture IDs for validation. Signed-off-by: Alexis Bruemmer Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_sds.c | 51 ++---------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c index eec1e0db0e0f..83574b5b4e69 100644 --- a/drivers/scsi/aic94xx/aic94xx_sds.c +++ b/drivers/scsi/aic94xx/aic94xx_sds.c @@ -376,7 +376,6 @@ out: /* ---------- FLASH stuff ---------- */ #define FLASH_RESET 0xF0 -#define FLASH_MANUF_AMD 1 #define FLASH_SIZE 0x200000 #define FLASH_DIR_COOKIE "*** ADAPTEC FLASH DIRECTORY *** " @@ -627,7 +626,7 @@ static int asd_find_flash_dir(struct asd_ha_struct *asd_ha, static int asd_flash_getid(struct asd_ha_struct *asd_ha) { int err = 0; - u32 reg, inc; + u32 reg; reg = asd_read_reg_dword(asd_ha, EXSICNFGR); @@ -648,53 +647,7 @@ static int asd_flash_getid(struct asd_ha_struct *asd_ha) ASD_DPRINTK("couldn't reset flash(%d)\n", err); return err; } - /* Get flash info. This would most likely be AMD Am29LV family flash. - * First try the sequence for word mode. It is the same as for - * 008B (byte mode only), 160B (word mode) and 800D (word mode). - */ - reg = asd_ha->hw_prof.flash.bar; - inc = asd_ha->hw_prof.flash.wide ? 2 : 1; - asd_write_reg_byte(asd_ha, reg + 0x555, 0xAA); - asd_write_reg_byte(asd_ha, reg + 0x2AA, 0x55); - asd_write_reg_byte(asd_ha, reg + 0x555, 0x90); - asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg); - asd_ha->hw_prof.flash.dev_id= asd_read_reg_byte(asd_ha,reg+inc); - asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha,reg+inc+inc); - /* Get out of autoselect mode. */ - err = asd_reset_flash(asd_ha); - - if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) { - ASD_DPRINTK("0Found FLASH(%d) manuf:%d, dev_id:0x%x, " - "sec_prot:%d\n", - asd_ha->hw_prof.flash.wide ? 16 : 8, - asd_ha->hw_prof.flash.manuf, - asd_ha->hw_prof.flash.dev_id, - asd_ha->hw_prof.flash.sec_prot); - return 0; - } - - /* Ok, try the sequence for byte mode of 160B and 800D. - * We may actually never need this. - */ - asd_write_reg_byte(asd_ha, reg + 0xAAA, 0xAA); - asd_write_reg_byte(asd_ha, reg + 0x555, 0x55); - asd_write_reg_byte(asd_ha, reg + 0xAAA, 0x90); - asd_ha->hw_prof.flash.manuf = asd_read_reg_byte(asd_ha, reg); - asd_ha->hw_prof.flash.dev_id = asd_read_reg_byte(asd_ha, reg + 2); - asd_ha->hw_prof.flash.sec_prot = asd_read_reg_byte(asd_ha, reg + 4); - err = asd_reset_flash(asd_ha); - - if (asd_ha->hw_prof.flash.manuf == FLASH_MANUF_AMD) { - ASD_DPRINTK("1Found FLASH(%d) manuf:%d, dev_id:0x%x, " - "sec_prot:%d\n", - asd_ha->hw_prof.flash.wide ? 16 : 8, - asd_ha->hw_prof.flash.manuf, - asd_ha->hw_prof.flash.dev_id, - asd_ha->hw_prof.flash.sec_prot); - return 0; - } - - return -ENOENT; + return 0; } static u16 asd_calc_flash_chksum(u16 *p, int size) From 10d19ae5e1715c27db7009df6d59179774e7b8a1 Mon Sep 17 00:00:00 2001 From: "malahal@us.ibm.com" Date: Thu, 7 Sep 2006 15:12:42 -0700 Subject: [PATCH 1032/1063] [SCSI] aic94xx: Fix for a typo in aic94xx_init() Signed-off-by: Malahal Naineni Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 302b54fddf3c..ee2ccad70487 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -828,7 +828,7 @@ static int __init aic94xx_init(void) aic94xx_transport_template = sas_domain_attach_transport(&aic94xx_transport_functions); - if (err) + if (!aic94xx_transport_template) goto out_destroy_caches; err = pci_register_driver(&aic94xx_pci_driver); From 5fcda4224529c4e550c917668d5e96c1d3e7039b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 14 Sep 2006 17:04:58 -0500 Subject: [PATCH 1033/1063] [SCSI] aha152x: remove static host array Fix this driver not to use a static two element host array instead use a list. This should fix panic on multiple eject reinsert of the pcmcia version of this device. Signed-off-by: James Bottomley --- drivers/scsi/aha152x.c | 53 +++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index f974869ea323..fb6a476eb873 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -253,6 +253,7 @@ #include #include #include +#include #include #include @@ -262,6 +263,8 @@ #include #include "aha152x.h" +static LIST_HEAD(aha152x_host_list); + /* DEFINES */ @@ -423,8 +426,6 @@ MODULE_DEVICE_TABLE(isapnp, id_table); #endif /* !PCMCIA */ -static int registered_count=0; -static struct Scsi_Host *aha152x_host[2]; static struct scsi_host_template aha152x_driver_template; /* @@ -541,6 +542,7 @@ struct aha152x_hostdata { #ifdef __ISAPNP__ struct pnp_dev *pnpdev; #endif + struct list_head host_list; }; @@ -755,20 +757,9 @@ static inline Scsi_Cmnd *remove_SC(Scsi_Cmnd **SC, Scsi_Cmnd *SCp) return ptr; } -static inline struct Scsi_Host *lookup_irq(int irqno) -{ - int i; - - for(i=0; iirq==irqno) - return aha152x_host[i]; - - return NULL; -} - static irqreturn_t swintr(int irqno, void *dev_id, struct pt_regs *regs) { - struct Scsi_Host *shpnt = lookup_irq(irqno); + struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id; if (!shpnt) { printk(KERN_ERR "aha152x: catched software interrupt %d for unknown controller.\n", irqno); @@ -791,10 +782,11 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup) return NULL; } - /* need to have host registered before triggering any interrupt */ - aha152x_host[registered_count] = shpnt; - memset(HOSTDATA(shpnt), 0, sizeof *HOSTDATA(shpnt)); + INIT_LIST_HEAD(&HOSTDATA(shpnt)->host_list); + + /* need to have host registered before triggering any interrupt */ + list_add_tail(&HOSTDATA(shpnt)->host_list, &aha152x_host_list); shpnt->io_port = setup->io_port; shpnt->n_io_port = IO_RANGE; @@ -907,12 +899,10 @@ struct Scsi_Host *aha152x_probe_one(struct aha152x_setup *setup) scsi_scan_host(shpnt); - registered_count++; - return shpnt; out_host_put: - aha152x_host[registered_count]=NULL; + list_del(&HOSTDATA(shpnt)->host_list); scsi_host_put(shpnt); return NULL; @@ -937,6 +927,7 @@ void aha152x_release(struct Scsi_Host *shpnt) #endif scsi_remove_host(shpnt); + list_del(&HOSTDATA(shpnt)->host_list); scsi_host_put(shpnt); } @@ -1459,9 +1450,12 @@ static struct work_struct aha152x_tq; */ static void run(void) { - int i; - for (i = 0; i0; + return 1; } static void __exit aha152x_exit(void) { - int i; + struct aha152x_hostdata *hd; - for(i=0; i Date: Fri, 15 Sep 2006 14:43:11 +0200 Subject: [PATCH 1034/1063] [SCSI] megaraid: Make megaraid_ioctl() check copy_to_user() return value Check copy_to_user() return value in drivers/scsi/megaraid.c::megadev_ioctl() This gets rid of this little warning: drivers/scsi/megaraid.c:3661: warning: ignoring return value of 'copy_to_user', declared with attribute warn_unused_result Signed-off-by: Jesper Juhl Acked-by: "Ju, Seokmann" Signed-off-by: James Bottomley --- drivers/scsi/megaraid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index ccb0055ac73a..b87bef69ba0f 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -3656,8 +3656,9 @@ megadev_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, * Send the request sense data also, irrespective of * whether the user has asked for it or not. */ - copy_to_user(upthru->reqsensearea, - pthru->reqsensearea, 14); + if (copy_to_user(upthru->reqsensearea, + pthru->reqsensearea, 14)) + rval = -EFAULT; freemem_and_return: if( pthru->dataxferlen ) { From 4c8bd7eeee4c8f157fb61fb64b57500990b42e0e Mon Sep 17 00:00:00 2001 From: David Miller Date: Fri, 22 Sep 2006 22:31:36 -0700 Subject: [PATCH 1035/1063] [KERNEL] Do not truncate to 'int' in ALIGN() macro. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 851aa1bcfc1a..2b2ae4fdce8b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -31,7 +31,7 @@ extern const char linux_banner[]; #define STACK_MAGIC 0xdeadbeef #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) From 5f77043f0f7851aa6139fb9a8b297497b540b397 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 24 Sep 2006 00:40:41 +1000 Subject: [PATCH 1036/1063] [CRYPTO] hmac: Fix hmac_init update call The crypto_hash_update call in hmac_init gave the number 1 instead of the length of the sg list in bytes. This is a missed conversion from the digest => hash change. As tcrypt only tests crypto_hash_digest it didn't catch this. Signed-off-by: Herbert Xu Signed-off-by: Linus Torvalds --- crypto/hmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/hmac.c b/crypto/hmac.c index f403b6946047..d52b234835cf 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -98,7 +98,7 @@ static int hmac_init(struct hash_desc *pdesc) sg_set_buf(&tmp, ipad, bs); return unlikely(crypto_hash_init(&desc)) ?: - crypto_hash_update(&desc, &tmp, 1); + crypto_hash_update(&desc, &tmp, bs); } static int hmac_update(struct hash_desc *pdesc, From d7b2004528a967f2ba0bf31b1eb0da6a876960e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 16:44:16 +0100 Subject: [PATCH 1037/1063] [PATCH] missing includes from infiniband merge indirect chains of includes are arch-specific and can't be relied upon... (hell, even attempt to build it for itanic would trigger vmalloc.h ones; err.h triggers on e.g. alpha). Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/infiniband/core/mad_priv.h | 1 + drivers/infiniband/hw/amso1100/c2_provider.c | 1 + drivers/infiniband/hw/amso1100/c2_rnic.c | 1 + drivers/infiniband/hw/ipath/ipath_diag.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 1da9adbccaec..d06b59083f6e 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -38,6 +38,7 @@ #define __IB_MAD_PRIV_H__ #include +#include #include #include #include diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 8fddc8cccdf3..dd6af551108b 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 1c3c9d65ecea..f49a32b7a8f6 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -50,6 +50,7 @@ #include #include #include +#include #include diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 28b6b46c106a..29958b6e0214 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -43,6 +43,7 @@ #include #include +#include #include #include "ipath_kernel.h" From 13b5aeccc4350e5069c723e8f9becd7208ee02f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 16:44:58 +0100 Subject: [PATCH 1038/1063] [PATCH] more fallout from get_property returning pointer to const Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powermac/feature.c | 4 ++-- arch/powerpc/platforms/powermac/smp.c | 2 +- drivers/char/briq_panel.c | 2 +- drivers/video/riva/fbdev.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 13fcaf5b1796..e49621be6640 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value) if (np == NULL) return -ENODEV; for (np = np->child; np != NULL; np = np->sibling) { - u32 *num = get_property(np, "reg", NULL); - u32 *rst = get_property(np, "soft-reset", NULL); + const u32 *num = get_property(np, "reg", NULL); + const u32 *rst = get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) continue; if (param == *num) { diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 653eeb64d1e2..1949b657b092 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -702,7 +702,7 @@ static void __init smp_core99_setup(int ncpus) /* GPIO based HW sync on ppc32 Core99 */ if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) { struct device_node *cpu; - u32 *tbprop = NULL; + const u32 *tbprop = NULL; core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ cpu = of_find_node_by_type(NULL, "cpu"); diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c index a0e5eac5f33a..caae795dd3e7 100644 --- a/drivers/char/briq_panel.c +++ b/drivers/char/briq_panel.c @@ -202,7 +202,7 @@ static struct miscdevice briq_panel_miscdev = { static int __init briq_panel_init(void) { struct device_node *root = find_path_device("/"); - char *machine; + const char *machine; int i; machine = get_property(root, "model", NULL); diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c index 67d1e1c8813d..61a4665fb486 100644 --- a/drivers/video/riva/fbdev.c +++ b/drivers/video/riva/fbdev.c @@ -1827,7 +1827,7 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd) struct riva_par *par = info->par; struct device_node *dp; unsigned char *pedid = NULL; - unsigned char *disptype = NULL; + const unsigned char *disptype = NULL; static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL }; int i; From 2efc80cb8ddc341d81de996920e3b2ad8a12b1f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 16:45:55 +0100 Subject: [PATCH 1039/1063] [PATCH] #elif that should've been #elif defined #elif CONFIG_44x in ibm4xx.h should've been #elif defined(CONFIG_44x) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/asm-ppc/ibm4xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-ppc/ibm4xx.h b/include/asm-ppc/ibm4xx.h index cf62b69cb69a..499c14691c71 100644 --- a/include/asm-ppc/ibm4xx.h +++ b/include/asm-ppc/ibm4xx.h @@ -86,7 +86,7 @@ void ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5, #define PCI_DRAM_OFFSET 0 #endif -#elif CONFIG_44x +#elif defined(CONFIG_44x) #if defined(CONFIG_BAMBOO) #include From 4ac493b1d5bfd332f3dee64baaa620961bab6cdc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 18:20:56 +0100 Subject: [PATCH 1040/1063] [PATCH] briq_panel: read() and write() get __user pointers, damnit annotated, fixed a roothole in ->write(). Dereferencing user-supplied pointer is a Bad Idea(tm)... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/char/briq_panel.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c index caae795dd3e7..b8c22255f6ad 100644 --- a/drivers/char/briq_panel.c +++ b/drivers/char/briq_panel.c @@ -87,7 +87,7 @@ static int briq_panel_release(struct inode *ino, struct file *filep) return 0; } -static ssize_t briq_panel_read(struct file *file, char *buf, size_t count, +static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned short c; @@ -135,7 +135,7 @@ static void scroll_vfd( void ) vfd_cursor = 20; } -static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len, +static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { size_t indx = len; @@ -150,19 +150,22 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len, return -EBUSY; for (;;) { + char c; if (!indx) break; + if (get_user(c, buf)) + return -EFAULT; if (esc) { - set_led(*buf); + set_led(c); esc = 0; - } else if (*buf == 27) { + } else if (c == 27) { esc = 1; - } else if (*buf == 12) { + } else if (c == 12) { /* do a form feed */ for (i=0; i<40; i++) vfd[i] = ' '; vfd_cursor = 0; - } else if (*buf == 10) { + } else if (c == 10) { if (vfd_cursor < 20) vfd_cursor = 20; else if (vfd_cursor < 40) @@ -175,7 +178,7 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len, /* just a character */ if (vfd_cursor > 39) scroll_vfd(); - vfd[vfd_cursor++] = *buf; + vfd[vfd_cursor++] = c; } indx--; buf++; From 79da342c31ea839277060c1d2086aaf3b5cd85a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2006 18:21:35 +0100 Subject: [PATCH 1041/1063] [PATCH] more get_property() fallout Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/video/riva/fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c index 61a4665fb486..4acde4f7dbf8 100644 --- a/drivers/video/riva/fbdev.c +++ b/drivers/video/riva/fbdev.c @@ -1826,7 +1826,7 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd) { struct riva_par *par = info->par; struct device_node *dp; - unsigned char *pedid = NULL; + const unsigned char *pedid = NULL; const unsigned char *disptype = NULL; static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL }; From 231839102b54512ced7d3ee7fc9b8bcf5e3b583b Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sat, 16 Sep 2006 20:30:47 -0400 Subject: [PATCH 1042/1063] [SCSI] scsi_debug version 1.80 See http://www.torque.net/sg/sdebug26.html for more information on the scsi_debug driver. ChangeLog: - add 'vpd_use_hostno' parameter to allow simulated hosts to see the same set of targets (and luns). For testing multipath software. - add 'fake_rw' parameter to ignore the data in READ and WRITE commands - add support for log subpages (new in SPC-4) - yield appropriate block descriptor for MODE SENSE commands (only for pdt=0 (i.e. disks)) - REQUEST SENSE response no longer shows the stopped power condition (SAT changed to agree with SPC-3) Signed-off-by: Douglas Gilbert Signed-off-by: James Bottomley --- drivers/scsi/scsi_debug.c | 230 +++++++++++++++++++++++++++++++------- 1 file changed, 189 insertions(+), 41 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index a80303c6b3fd..9c0f35820e3e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1,5 +1,4 @@ /* - * linux/kernel/scsi_debug.c * vvvvvvvvvvvvvvvvvvvvvvv Original vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv * Copyright (C) 1992 Eric Youngdale * Simulate a host adapter with 2 disks attached. Do a lot of checking @@ -8,7 +7,9 @@ * ^^^^^^^^^^^^^^^^^^^^^^^ Original ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * * This version is more generic, simulating a variable number of disk - * (or disk like devices) sharing a common amount of RAM + * (or disk like devices) sharing a common amount of RAM. To be more + * realistic, the simulated devices have the transport attributes of + * SAS disks. * * * For documentation see http://www.torque.net/sg/sdebug26.html @@ -50,8 +51,8 @@ #include "scsi_logging.h" #include "scsi_debug.h" -#define SCSI_DEBUG_VERSION "1.79" -static const char * scsi_debug_version_date = "20060604"; +#define SCSI_DEBUG_VERSION "1.80" +static const char * scsi_debug_version_date = "20060914"; /* Additional Sense Code (ASC) used */ #define NO_ADDITIONAL_SENSE 0x0 @@ -86,6 +87,8 @@ static const char * scsi_debug_version_date = "20060604"; #define DEF_D_SENSE 0 #define DEF_NO_LUN_0 0 #define DEF_VIRTUAL_GB 0 +#define DEF_FAKE_RW 0 +#define DEF_VPD_USE_HOSTNO 1 /* bit mask values for scsi_debug_opts */ #define SCSI_DEBUG_OPT_NOISE 1 @@ -127,6 +130,8 @@ static int scsi_debug_ptype = DEF_PTYPE; /* SCSI peripheral type (0==disk) */ static int scsi_debug_dsense = DEF_D_SENSE; static int scsi_debug_no_lun_0 = DEF_NO_LUN_0; static int scsi_debug_virtual_gb = DEF_VIRTUAL_GB; +static int scsi_debug_fake_rw = DEF_FAKE_RW; +static int scsi_debug_vpd_use_hostno = DEF_VPD_USE_HOSTNO; static int scsi_debug_cmnd_count = 0; @@ -423,6 +428,8 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done) case READ_6: if ((errsts = check_readiness(SCpnt, 0, devip))) break; + if (scsi_debug_fake_rw) + break; if ((*cmd) == READ_16) { for (lba = 0, j = 0; j < 8; ++j) { if (j > 0) @@ -465,6 +472,8 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done) case WRITE_6: if ((errsts = check_readiness(SCpnt, 0, devip))) break; + if (scsi_debug_fake_rw) + break; if ((*cmd) == WRITE_16) { for (lba = 0, j = 0; j < 8; ++j) { if (j > 0) @@ -941,6 +950,8 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target, char lu_id_str[6]; int host_no = devip->sdbg_host->shost->host_no; + if (0 == scsi_debug_vpd_use_hostno) + host_no = 0; lu_id_num = devip->wlun ? -1 : (((host_no + 1) * 2000) + (devip->target * 1000) + devip->lun); target_dev_id = ((host_no + 1) * 2000) + @@ -1059,19 +1070,6 @@ static int resp_requests(struct scsi_cmnd * scp, arr[12] = THRESHOLD_EXCEEDED; arr[13] = 0xff; /* TEST set and MRIE==6 */ } - } else if (devip->stopped) { - if (want_dsense) { - arr[0] = 0x72; - arr[1] = 0x0; /* NO_SENSE in sense_key */ - arr[2] = LOW_POWER_COND_ON; - arr[3] = 0x0; /* TEST set and MRIE==6 */ - } else { - arr[0] = 0x70; - arr[2] = 0x0; /* NO_SENSE in sense_key */ - arr[7] = 0xa; /* 18 byte sense buffer */ - arr[12] = LOW_POWER_COND_ON; - arr[13] = 0x0; /* TEST set and MRIE==6 */ - } } else { memcpy(arr, sbuff, SDEBUG_SENSE_LEN); if ((cmd[1] & 1) && (! scsi_debug_dsense)) { @@ -1325,21 +1323,26 @@ static int resp_sas_sha_m_spg(unsigned char * p, int pcontrol) static int resp_mode_sense(struct scsi_cmnd * scp, int target, struct sdebug_dev_info * devip) { - unsigned char dbd; - int pcontrol, pcode, subpcode; + unsigned char dbd, llbaa; + int pcontrol, pcode, subpcode, bd_len; unsigned char dev_spec; - int alloc_len, msense_6, offset, len, errsts, target_dev_id; + int k, alloc_len, msense_6, offset, len, errsts, target_dev_id; unsigned char * ap; unsigned char arr[SDEBUG_MAX_MSENSE_SZ]; unsigned char *cmd = (unsigned char *)scp->cmnd; if ((errsts = check_readiness(scp, 1, devip))) return errsts; - dbd = cmd[1] & 0x8; + dbd = !!(cmd[1] & 0x8); pcontrol = (cmd[2] & 0xc0) >> 6; pcode = cmd[2] & 0x3f; subpcode = cmd[3]; msense_6 = (MODE_SENSE == cmd[0]); + llbaa = msense_6 ? 0 : !!(cmd[1] & 0x10); + if ((0 == scsi_debug_ptype) && (0 == dbd)) + bd_len = llbaa ? 16 : 8; + else + bd_len = 0; alloc_len = msense_6 ? cmd[4] : ((cmd[7] << 8) | cmd[8]); memset(arr, 0, SDEBUG_MAX_MSENSE_SZ); if (0x3 == pcontrol) { /* Saving values not supported */ @@ -1349,15 +1352,58 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target, } target_dev_id = ((devip->sdbg_host->shost->host_no + 1) * 2000) + (devip->target * 1000) - 3; - dev_spec = DEV_READONLY(target) ? 0x80 : 0x0; + /* set DPOFUA bit for disks */ + if (0 == scsi_debug_ptype) + dev_spec = (DEV_READONLY(target) ? 0x80 : 0x0) | 0x10; + else + dev_spec = 0x0; if (msense_6) { arr[2] = dev_spec; + arr[3] = bd_len; offset = 4; } else { arr[3] = dev_spec; + if (16 == bd_len) + arr[4] = 0x1; /* set LONGLBA bit */ + arr[7] = bd_len; /* assume 255 or less */ offset = 8; } ap = arr + offset; + if ((bd_len > 0) && (0 == sdebug_capacity)) { + if (scsi_debug_virtual_gb > 0) { + sdebug_capacity = 2048 * 1024; + sdebug_capacity *= scsi_debug_virtual_gb; + } else + sdebug_capacity = sdebug_store_sectors; + } + if (8 == bd_len) { + if (sdebug_capacity > 0xfffffffe) { + ap[0] = 0xff; + ap[1] = 0xff; + ap[2] = 0xff; + ap[3] = 0xff; + } else { + ap[0] = (sdebug_capacity >> 24) & 0xff; + ap[1] = (sdebug_capacity >> 16) & 0xff; + ap[2] = (sdebug_capacity >> 8) & 0xff; + ap[3] = sdebug_capacity & 0xff; + } + ap[6] = (SECT_SIZE_PER(target) >> 8) & 0xff; + ap[7] = SECT_SIZE_PER(target) & 0xff; + offset += bd_len; + ap = arr + offset; + } else if (16 == bd_len) { + unsigned long long capac = sdebug_capacity; + + for (k = 0; k < 8; ++k, capac >>= 8) + ap[7 - k] = capac & 0xff; + ap[12] = (SECT_SIZE_PER(target) >> 24) & 0xff; + ap[13] = (SECT_SIZE_PER(target) >> 16) & 0xff; + ap[14] = (SECT_SIZE_PER(target) >> 8) & 0xff; + ap[15] = SECT_SIZE_PER(target) & 0xff; + offset += bd_len; + ap = arr + offset; + } if ((subpcode > 0x0) && (subpcode < 0xff) && (0x19 != pcode)) { /* TODO: Control Extension page */ @@ -1471,7 +1517,7 @@ static int resp_mode_select(struct scsi_cmnd * scp, int mselect6, " IO sent=%d bytes\n", param_len, res); md_len = mselect6 ? (arr[0] + 1) : ((arr[0] << 8) + arr[1] + 2); bd_len = mselect6 ? arr[3] : ((arr[6] << 8) + arr[7]); - if ((md_len > 2) || (0 != bd_len)) { + if (md_len > 2) { mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_PARAM_LIST, 0); return check_condition_result; @@ -1544,7 +1590,7 @@ static int resp_ie_l_pg(unsigned char * arr) static int resp_log_sense(struct scsi_cmnd * scp, struct sdebug_dev_info * devip) { - int ppc, sp, pcontrol, pcode, alloc_len, errsts, len, n; + int ppc, sp, pcontrol, pcode, subpcode, alloc_len, errsts, len, n; unsigned char arr[SDEBUG_MAX_LSENSE_SZ]; unsigned char *cmd = (unsigned char *)scp->cmnd; @@ -1560,23 +1606,63 @@ static int resp_log_sense(struct scsi_cmnd * scp, } pcontrol = (cmd[2] & 0xc0) >> 6; pcode = cmd[2] & 0x3f; + subpcode = cmd[3] & 0xff; alloc_len = (cmd[7] << 8) + cmd[8]; arr[0] = pcode; - switch (pcode) { - case 0x0: /* Supported log pages log page */ - n = 4; - arr[n++] = 0x0; /* this page */ - arr[n++] = 0xd; /* Temperature */ - arr[n++] = 0x2f; /* Informational exceptions */ - arr[3] = n - 4; - break; - case 0xd: /* Temperature log page */ - arr[3] = resp_temp_l_pg(arr + 4); - break; - case 0x2f: /* Informational exceptions log page */ - arr[3] = resp_ie_l_pg(arr + 4); - break; - default: + if (0 == subpcode) { + switch (pcode) { + case 0x0: /* Supported log pages log page */ + n = 4; + arr[n++] = 0x0; /* this page */ + arr[n++] = 0xd; /* Temperature */ + arr[n++] = 0x2f; /* Informational exceptions */ + arr[3] = n - 4; + break; + case 0xd: /* Temperature log page */ + arr[3] = resp_temp_l_pg(arr + 4); + break; + case 0x2f: /* Informational exceptions log page */ + arr[3] = resp_ie_l_pg(arr + 4); + break; + default: + mk_sense_buffer(devip, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + } else if (0xff == subpcode) { + arr[0] |= 0x40; + arr[1] = subpcode; + switch (pcode) { + case 0x0: /* Supported log pages and subpages log page */ + n = 4; + arr[n++] = 0x0; + arr[n++] = 0x0; /* 0,0 page */ + arr[n++] = 0x0; + arr[n++] = 0xff; /* this page */ + arr[n++] = 0xd; + arr[n++] = 0x0; /* Temperature */ + arr[n++] = 0x2f; + arr[n++] = 0x0; /* Informational exceptions */ + arr[3] = n - 4; + break; + case 0xd: /* Temperature subpages */ + n = 4; + arr[n++] = 0xd; + arr[n++] = 0x0; /* Temperature */ + arr[3] = n - 4; + break; + case 0x2f: /* Informational exceptions subpages */ + n = 4; + arr[n++] = 0x2f; + arr[n++] = 0x0; /* Informational exceptions */ + arr[3] = n - 4; + break; + default: + mk_sense_buffer(devip, ILLEGAL_REQUEST, + INVALID_FIELD_IN_CDB, 0); + return check_condition_result; + } + } else { mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); return check_condition_result; @@ -2151,11 +2237,18 @@ static int schedule_resp(struct scsi_cmnd * cmnd, } } +/* Note: The following macros create attribute files in the + /sys/module/scsi_debug/parameters directory. Unfortunately this + driver is unaware of a change and cannot trigger auxiliary actions + as it can when the corresponding attribute in the + /sys/bus/pseudo/drivers/scsi_debug directory is changed. + */ module_param_named(add_host, scsi_debug_add_host, int, S_IRUGO | S_IWUSR); module_param_named(delay, scsi_debug_delay, int, S_IRUGO | S_IWUSR); module_param_named(dev_size_mb, scsi_debug_dev_size_mb, int, S_IRUGO); module_param_named(dsense, scsi_debug_dsense, int, S_IRUGO | S_IWUSR); module_param_named(every_nth, scsi_debug_every_nth, int, S_IRUGO | S_IWUSR); +module_param_named(fake_rw, scsi_debug_fake_rw, int, S_IRUGO | S_IWUSR); module_param_named(max_luns, scsi_debug_max_luns, int, S_IRUGO | S_IWUSR); module_param_named(no_lun_0, scsi_debug_no_lun_0, int, S_IRUGO | S_IWUSR); module_param_named(num_parts, scsi_debug_num_parts, int, S_IRUGO); @@ -2164,6 +2257,8 @@ module_param_named(opts, scsi_debug_opts, int, S_IRUGO | S_IWUSR); module_param_named(ptype, scsi_debug_ptype, int, S_IRUGO | S_IWUSR); module_param_named(scsi_level, scsi_debug_scsi_level, int, S_IRUGO); module_param_named(virtual_gb, scsi_debug_virtual_gb, int, S_IRUGO | S_IWUSR); +module_param_named(vpd_use_hostno, scsi_debug_vpd_use_hostno, int, + S_IRUGO | S_IWUSR); MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert"); MODULE_DESCRIPTION("SCSI debug adapter driver"); @@ -2175,6 +2270,7 @@ MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)"); MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)"); MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)"); MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)"); +MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)"); MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)"); MODULE_PARM_DESC(num_parts, "number of partitions(def=0)"); @@ -2183,6 +2279,7 @@ MODULE_PARM_DESC(opts, "1->noise, 2->medium_error, 4->... (def=0)"); MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])"); MODULE_PARM_DESC(scsi_level, "SCSI level to simulate(def=5[SPC-3])"); MODULE_PARM_DESC(virtual_gb, "virtual gigabyte size (def=0 -> use dev_size_mb)"); +MODULE_PARM_DESC(vpd_use_hostno, "0 -> dev ids ignore hostno (def=1 -> unique dev ids)"); static char sdebug_info[256]; @@ -2334,6 +2431,24 @@ static ssize_t sdebug_dsense_store(struct device_driver * ddp, DRIVER_ATTR(dsense, S_IRUGO | S_IWUSR, sdebug_dsense_show, sdebug_dsense_store); +static ssize_t sdebug_fake_rw_show(struct device_driver * ddp, char * buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_fake_rw); +} +static ssize_t sdebug_fake_rw_store(struct device_driver * ddp, + const char * buf, size_t count) +{ + int n; + + if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) { + scsi_debug_fake_rw = n; + return count; + } + return -EINVAL; +} +DRIVER_ATTR(fake_rw, S_IRUGO | S_IWUSR, sdebug_fake_rw_show, + sdebug_fake_rw_store); + static ssize_t sdebug_no_lun_0_show(struct device_driver * ddp, char * buf) { return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_no_lun_0); @@ -2487,6 +2602,31 @@ static ssize_t sdebug_add_host_store(struct device_driver * ddp, DRIVER_ATTR(add_host, S_IRUGO | S_IWUSR, sdebug_add_host_show, sdebug_add_host_store); +static ssize_t sdebug_vpd_use_hostno_show(struct device_driver * ddp, + char * buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_vpd_use_hostno); +} +static ssize_t sdebug_vpd_use_hostno_store(struct device_driver * ddp, + const char * buf, size_t count) +{ + int n; + + if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) { + scsi_debug_vpd_use_hostno = n; + return count; + } + return -EINVAL; +} +DRIVER_ATTR(vpd_use_hostno, S_IRUGO | S_IWUSR, sdebug_vpd_use_hostno_show, + sdebug_vpd_use_hostno_store); + +/* Note: The following function creates attribute files in the + /sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these + files (over those found in the /sys/module/scsi_debug/parameters + directory) is that auxiliary actions can be triggered when an attribute + is changed. For example see: sdebug_add_host_store() above. + */ static int do_create_driverfs_files(void) { int ret; @@ -2496,23 +2636,31 @@ static int do_create_driverfs_files(void) ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dsense); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_every_nth); + ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_fake_rw); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_max_luns); - ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts); + ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_parts); + ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_ptype); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_opts); ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_scsi_level); + ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb); + ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno); return ret; } static void do_remove_driverfs_files(void) { + driver_remove_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno); + driver_remove_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_scsi_level); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_opts); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_ptype); - driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_tgts); + driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts); + driver_remove_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_max_luns); + driver_remove_file(&sdebug_driverfs_driver, &driver_attr_fake_rw); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_every_nth); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dsense); driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb); From 3e3c60e3a8c7013d55768aa7256bb5a7f66b0bb4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 6 Sep 2006 09:04:40 -0500 Subject: [PATCH 1043/1063] [SCSI] aic7xxx: avoid checking SBLKCTL register for certain cards For cards that don't support LVD, checking the SBLKCTL register to determine the bus singalling doesn't work. So, check that the card supports LVD first (AHC_ULTRA2) before checking the register. Signed-off-by: James Bottomley --- drivers/scsi/aic7xxx/aic7xxx_osm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index e5bb4d87b307..0b3c01ac4259 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -2539,15 +2539,23 @@ static void ahc_linux_set_iu(struct scsi_target *starget, int iu) static void ahc_linux_get_signalling(struct Scsi_Host *shost) { struct ahc_softc *ahc = *(struct ahc_softc **)shost->hostdata; - u8 mode = ahc_inb(ahc, SBLKCTL); + u8 mode; - if (mode & ENAB40) - spi_signalling(shost) = SPI_SIGNAL_LVD; - else if (mode & ENAB20) + if (!(ahc->features & AHC_ULTRA2)) { + /* non-LVD chipset, may not have SBLKCTL reg */ spi_signalling(shost) = ahc->features & AHC_HVD ? SPI_SIGNAL_HVD : SPI_SIGNAL_SE; + return; + } + + mode = ahc_inb(ahc, SBLKCTL); + + if (mode & ENAB40) + spi_signalling(shost) = SPI_SIGNAL_LVD; + else if (mode & ENAB20) + spi_signalling(shost) = SPI_SIGNAL_SE; else spi_signalling(shost) = SPI_SIGNAL_UNKNOWN; } From cf2b5d3fcab77a9390293920ec5b49e67eced200 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Sun, 17 Sep 2006 07:38:15 +0200 Subject: [PATCH 1044/1063] [SCSI] aic7xxx: pause sequencer before touching SBLKCTL Some cards need to pause the sequencer before the SBLKCTL register is touched. This fixes a PCI related oops seen on powerpc macs with this card caused by trying to ascertain the bus signalling before beginning domain validation. Signed-off-by: James Bottomley --- drivers/scsi/aic7xxx/aic7xxx_osm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index 0b3c01ac4259..64c8b88a429f 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -2539,6 +2539,7 @@ static void ahc_linux_set_iu(struct scsi_target *starget, int iu) static void ahc_linux_get_signalling(struct Scsi_Host *shost) { struct ahc_softc *ahc = *(struct ahc_softc **)shost->hostdata; + unsigned long flags; u8 mode; if (!(ahc->features & AHC_ULTRA2)) { @@ -2550,7 +2551,11 @@ static void ahc_linux_get_signalling(struct Scsi_Host *shost) return; } + ahc_lock(ahc, &flags); + ahc_pause(ahc); mode = ahc_inb(ahc, SBLKCTL); + ahc_unpause(ahc); + ahc_unlock(ahc, &flags); if (mode & ENAB40) spi_signalling(shost) = SPI_SIGNAL_LVD; From d136205182b1ea4897da31e325a296f8831a6796 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Sep 2006 22:28:04 +0200 Subject: [PATCH 1045/1063] [SCSI] zfcp: remove zfcp_ccw_unregister function Remove unused zfcp_ccw_unregister function (leftover from zfcp's module_exit era). Signed-off-by: Heiko Carstens Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_ccw.c | 13 ------------- drivers/s390/scsi/zfcp_ext.h | 1 - 2 files changed, 14 deletions(-) diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index fdabadeaa9ee..81680efa1721 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -274,19 +274,6 @@ zfcp_ccw_register(void) return retval; } -/** - * zfcp_ccw_unregister - ccw unregister function - * - * Unregisters the driver from common i/o layer. Function will be called at - * module unload/system shutdown. - */ -void __exit -zfcp_ccw_unregister(void) -{ - zfcp_sysfs_driver_remove_files(&zfcp_ccw_driver.driver); - ccw_driver_unregister(&zfcp_ccw_driver); -} - /** * zfcp_ccw_shutdown - gets called on reboot/shutdown * diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 146d7a2b4c4a..b45d1bf297a8 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -55,7 +55,6 @@ extern void zfcp_unit_dequeue(struct zfcp_unit *); /******************************* S/390 IO ************************************/ extern int zfcp_ccw_register(void); -extern void zfcp_ccw_unregister(void); extern void zfcp_qdio_zero_sbals(struct qdio_buffer **, int, int); extern int zfcp_qdio_allocate(struct zfcp_adapter *); From dd52e0eaf891cd85bf2ca057c15ed6bfd76db4e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 18 Sep 2006 22:28:49 +0200 Subject: [PATCH 1046/1063] [SCSI] zfcp: create private slab caches to guarantee proper data alignment Create private slab caches in order to guarantee proper alignment of data structures that get passed to hardware. Sidenote: with this patch slab cache debugging will finally work on s390 (at least no known problems left). Furthermore this patch does some minor cleanups: - store ptr for transport template in struct zfcp_data Signed-off-by: Heiko Carstens Signed-off-by: Andreas Herrmann Compile fix ups and Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_aux.c | 76 ++++++++++++++++++++++++++--------- drivers/s390/scsi/zfcp_def.h | 16 +++++--- drivers/s390/scsi/zfcp_ext.h | 1 - drivers/s390/scsi/zfcp_fsf.c | 33 ++++++++++----- drivers/s390/scsi/zfcp_scsi.c | 5 +-- 5 files changed, 92 insertions(+), 39 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index adc9d8f2c28f..d2b094d9c34f 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -299,11 +299,45 @@ zfcp_init_device_configure(void) return; } +static int calc_alignment(int size) +{ + int align = 1; + + if (!size) + return 0; + + while ((size - align) > 0) + align <<= 1; + + return align; +} + static int __init zfcp_module_init(void) { + int retval = -ENOMEM; + int size, align; - int retval = 0; + size = sizeof(struct zfcp_fsf_req_qtcb); + align = calc_alignment(size); + zfcp_data.fsf_req_qtcb_cache = + kmem_cache_create("zfcp_fsf", size, align, 0, NULL, NULL); + if (!zfcp_data.fsf_req_qtcb_cache) + goto out; + + size = sizeof(struct fsf_status_read_buffer); + align = calc_alignment(size); + zfcp_data.sr_buffer_cache = + kmem_cache_create("zfcp_sr", size, align, 0, NULL, NULL); + if (!zfcp_data.sr_buffer_cache) + goto out_sr_cache; + + size = sizeof(struct zfcp_gid_pn_data); + align = calc_alignment(size); + zfcp_data.gid_pn_cache = + kmem_cache_create("zfcp_gid", size, align, 0, NULL, NULL); + if (!zfcp_data.gid_pn_cache) + goto out_gid_cache; atomic_set(&zfcp_data.loglevel, loglevel); @@ -313,15 +347,16 @@ zfcp_module_init(void) /* initialize adapters to be removed list head */ INIT_LIST_HEAD(&zfcp_data.adapter_remove_lh); - zfcp_transport_template = fc_attach_transport(&zfcp_transport_functions); - if (!zfcp_transport_template) - return -ENODEV; + zfcp_data.scsi_transport_template = + fc_attach_transport(&zfcp_transport_functions); + if (!zfcp_data.scsi_transport_template) + goto out_transport; retval = misc_register(&zfcp_cfdc_misc); if (retval != 0) { ZFCP_LOG_INFO("registration of misc device " "zfcp_cfdc failed\n"); - goto out; + goto out_misc; } ZFCP_LOG_TRACE("major/minor for zfcp_cfdc: %d/%d\n", @@ -333,9 +368,6 @@ zfcp_module_init(void) /* initialise configuration rw lock */ rwlock_init(&zfcp_data.config_lock); - /* save address of data structure managing the driver module */ - zfcp_data.scsi_host_template.module = THIS_MODULE; - /* setup dynamic I/O */ retval = zfcp_ccw_register(); if (retval) { @@ -350,6 +382,14 @@ zfcp_module_init(void) out_ccw_register: misc_deregister(&zfcp_cfdc_misc); + out_misc: + fc_release_transport(zfcp_data.scsi_transport_template); + out_transport: + kmem_cache_destroy(zfcp_data.gid_pn_cache); + out_gid_cache: + kmem_cache_destroy(zfcp_data.sr_buffer_cache); + out_sr_cache: + kmem_cache_destroy(zfcp_data.fsf_req_qtcb_cache); out: return retval; } @@ -935,20 +975,20 @@ static int zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter) { adapter->pool.fsf_req_erp = - mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ERP_NR, - sizeof(struct zfcp_fsf_req_pool_element)); + mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ERP_NR, + zfcp_data.fsf_req_qtcb_cache); if (!adapter->pool.fsf_req_erp) return -ENOMEM; adapter->pool.fsf_req_scsi = - mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_SCSI_NR, - sizeof(struct zfcp_fsf_req_pool_element)); + mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_SCSI_NR, + zfcp_data.fsf_req_qtcb_cache); if (!adapter->pool.fsf_req_scsi) return -ENOMEM; adapter->pool.fsf_req_abort = - mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ABORT_NR, - sizeof(struct zfcp_fsf_req_pool_element)); + mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ABORT_NR, + zfcp_data.fsf_req_qtcb_cache); if (!adapter->pool.fsf_req_abort) return -ENOMEM; @@ -959,14 +999,14 @@ zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter) return -ENOMEM; adapter->pool.data_status_read = - mempool_create_kmalloc_pool(ZFCP_POOL_STATUS_READ_NR, - sizeof(struct fsf_status_read_buffer)); + mempool_create_slab_pool(ZFCP_POOL_STATUS_READ_NR, + zfcp_data.sr_buffer_cache); if (!adapter->pool.data_status_read) return -ENOMEM; adapter->pool.data_gid_pn = - mempool_create_kmalloc_pool(ZFCP_POOL_DATA_GID_PN_NR, - sizeof(struct zfcp_gid_pn_data)); + mempool_create_slab_pool(ZFCP_POOL_DATA_GID_PN_NR, + zfcp_data.gid_pn_cache); if (!adapter->pool.data_gid_pn) return -ENOMEM; diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 7c84b3d4bd94..ef1cd49184e8 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -19,7 +19,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - #ifndef ZFCP_DEF_H #define ZFCP_DEF_H @@ -32,6 +31,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -39,14 +42,11 @@ #include #include #include -#include "zfcp_fsf.h" #include #include #include #include -#include -#include -#include +#include "zfcp_fsf.h" /********************* GENERAL DEFINES *********************************/ @@ -1016,6 +1016,7 @@ typedef void zfcp_fsf_req_handler_t(struct zfcp_fsf_req*); /* driver data */ struct zfcp_data { struct scsi_host_template scsi_host_template; + struct scsi_transport_template *scsi_transport_template; atomic_t status; /* Module status flags */ struct list_head adapter_list_head; /* head of adapter list */ struct list_head adapter_remove_lh; /* head of adapters to be @@ -1031,6 +1032,9 @@ struct zfcp_data { wwn_t init_wwpn; fcp_lun_t init_fcp_lun; char *driver_version; + kmem_cache_t *fsf_req_qtcb_cache; + kmem_cache_t *sr_buffer_cache; + kmem_cache_t *gid_pn_cache; }; /** @@ -1051,7 +1055,7 @@ struct zfcp_sg_list { #define ZFCP_POOL_DATA_GID_PN_NR 1 /* struct used by memory pools for fsf_requests */ -struct zfcp_fsf_req_pool_element { +struct zfcp_fsf_req_qtcb { struct zfcp_fsf_req fsf_req; struct fsf_qtcb qtcb; }; diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index b45d1bf297a8..4f4ef0c4ca7b 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -130,7 +130,6 @@ extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *, struct scsi_cmnd *, struct timer_list *); extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, struct timer_list *); -extern struct scsi_transport_template *zfcp_transport_template; extern struct fc_function_template zfcp_transport_functions; /******************************** ERP ****************************************/ diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index ff2eacf5ec8c..4913ffbb2fc8 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -100,14 +100,19 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags) if (req_flags & ZFCP_REQ_NO_QTCB) size = sizeof(struct zfcp_fsf_req); else - size = sizeof(struct zfcp_fsf_req_pool_element); + size = sizeof(struct zfcp_fsf_req_qtcb); - if (likely(pool != NULL)) + if (likely(pool)) ptr = mempool_alloc(pool, GFP_ATOMIC); - else - ptr = kmalloc(size, GFP_ATOMIC); + else { + if (req_flags & ZFCP_REQ_NO_QTCB) + ptr = kmalloc(size, GFP_ATOMIC); + else + ptr = kmem_cache_alloc(zfcp_data.fsf_req_qtcb_cache, + SLAB_ATOMIC); + } - if (unlikely(NULL == ptr)) + if (unlikely(!ptr)) goto out; memset(ptr, 0, size); @@ -115,9 +120,8 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags) if (req_flags & ZFCP_REQ_NO_QTCB) { fsf_req = (struct zfcp_fsf_req *) ptr; } else { - fsf_req = &((struct zfcp_fsf_req_pool_element *) ptr)->fsf_req; - fsf_req->qtcb = - &((struct zfcp_fsf_req_pool_element *) ptr)->qtcb; + fsf_req = &((struct zfcp_fsf_req_qtcb *) ptr)->fsf_req; + fsf_req->qtcb = &((struct zfcp_fsf_req_qtcb *) ptr)->qtcb; } fsf_req->pool = pool; @@ -139,10 +143,17 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags) void zfcp_fsf_req_free(struct zfcp_fsf_req *fsf_req) { - if (likely(fsf_req->pool != NULL)) + if (likely(fsf_req->pool)) { mempool_free(fsf_req, fsf_req->pool); - else - kfree(fsf_req); + return; + } + + if (fsf_req->qtcb) { + kmem_cache_free(zfcp_data.fsf_req_qtcb_cache, fsf_req); + return; + } + + kfree(fsf_req); } /** diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 1bb55086db9f..4857cccb1d5b 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -39,11 +39,10 @@ static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int, static struct device_attribute *zfcp_sysfs_sdev_attrs[]; -struct scsi_transport_template *zfcp_transport_template; - struct zfcp_data zfcp_data = { .scsi_host_template = { .name = ZFCP_NAME, + .module = THIS_MODULE, .proc_name = "zfcp", .slave_alloc = zfcp_scsi_slave_alloc, .slave_configure = zfcp_scsi_slave_configure, @@ -607,7 +606,7 @@ zfcp_adapter_scsi_register(struct zfcp_adapter *adapter) adapter->scsi_host->max_channel = 0; adapter->scsi_host->unique_id = unique_id++; /* FIXME */ adapter->scsi_host->max_cmd_len = ZFCP_MAX_SCSI_CMND_LENGTH; - adapter->scsi_host->transportt = zfcp_transport_template; + adapter->scsi_host->transportt = zfcp_data.scsi_transport_template; /* * save a pointer to our own adapter data structure within From 4eff4a36516d72e4f6ede901141214a7e05607e7 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 18 Sep 2006 22:29:20 +0200 Subject: [PATCH 1047/1063] [SCSI] zfcp: fix: use correct req_id in eh_abort_handler zfcp's eh_abort_handler used the wrong request ID to identify the request to be aborted. The bug was introduced with commit fea9d6c7bcd8ff1d60ff74f27ba483b3820b18a3 for improved management of request IDs. The bug is fixed with this patch. Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_aux.c | 4 +++ drivers/s390/scsi/zfcp_dbf.c | 13 ++++---- drivers/s390/scsi/zfcp_ext.h | 2 +- drivers/s390/scsi/zfcp_fsf.c | 9 +++-- drivers/s390/scsi/zfcp_scsi.c | 63 +++++++++++++++-------------------- 5 files changed, 42 insertions(+), 49 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index d2b094d9c34f..504c9219961c 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -189,6 +189,10 @@ struct zfcp_fsf_req *zfcp_reqlist_ismember(struct zfcp_adapter *adapter, struct zfcp_fsf_req *request, *tmp; unsigned int i; + /* 0 is reserved as an invalid req_id */ + if (req_id == 0) + return NULL; + i = req_id % REQUEST_LIST_SIZE; list_for_each_entry_safe(request, tmp, &adapter->req_list[i], list) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index c033145d0f19..0aa3b1ac76af 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -707,7 +707,7 @@ _zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level, struct zfcp_adapter *adapter, struct scsi_cmnd *scsi_cmnd, struct zfcp_fsf_req *fsf_req, - struct zfcp_fsf_req *old_fsf_req) + unsigned long old_req_id) { struct zfcp_scsi_dbf_record *rec = &adapter->scsi_dbf_buf; struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec; @@ -768,8 +768,7 @@ _zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level, rec->fsf_seqno = fsf_req->seq_no; rec->fsf_issued = fsf_req->issued; } - rec->type.old_fsf_reqid = - (unsigned long) old_fsf_req; + rec->type.old_fsf_reqid = old_req_id; } else { strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE); dump->total_size = buflen; @@ -794,17 +793,17 @@ zfcp_scsi_dbf_event_result(const char *tag, int level, struct zfcp_fsf_req *fsf_req) { _zfcp_scsi_dbf_event_common("rslt", tag, level, - adapter, scsi_cmnd, fsf_req, NULL); + adapter, scsi_cmnd, fsf_req, 0); } inline void zfcp_scsi_dbf_event_abort(const char *tag, struct zfcp_adapter *adapter, struct scsi_cmnd *scsi_cmnd, struct zfcp_fsf_req *new_fsf_req, - struct zfcp_fsf_req *old_fsf_req) + unsigned long old_req_id) { _zfcp_scsi_dbf_event_common("abrt", tag, 1, - adapter, scsi_cmnd, new_fsf_req, old_fsf_req); + adapter, scsi_cmnd, new_fsf_req, old_req_id); } inline void @@ -814,7 +813,7 @@ zfcp_scsi_dbf_event_devreset(const char *tag, u8 flag, struct zfcp_unit *unit, struct zfcp_adapter *adapter = unit->port->adapter; _zfcp_scsi_dbf_event_common(flag == FCP_TARGET_RESET ? "trst" : "lrst", - tag, 1, adapter, scsi_cmnd, NULL, NULL); + tag, 1, adapter, scsi_cmnd, NULL, 0); } static int diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 4f4ef0c4ca7b..710ebbf89929 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -185,7 +185,7 @@ extern void zfcp_scsi_dbf_event_result(const char *, int, struct zfcp_adapter *, struct zfcp_fsf_req *); extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *, struct scsi_cmnd *, struct zfcp_fsf_req *, - struct zfcp_fsf_req *); + unsigned long); extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *, struct scsi_cmnd *); extern void zfcp_reqlist_add(struct zfcp_adapter *, struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 4913ffbb2fc8..a66b5193b70e 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -3527,7 +3527,7 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, fsf_req->unit = unit; /* associate FSF request with SCSI request (for look up on abort) */ - scsi_cmnd->host_scribble = (char *) fsf_req; + scsi_cmnd->host_scribble = (unsigned char *) fsf_req->req_id; /* associate SCSI command with FSF request */ fsf_req->data = (unsigned long) scsi_cmnd; @@ -4667,7 +4667,6 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, { volatile struct qdio_buffer_element *sbale; struct zfcp_fsf_req *fsf_req = NULL; - unsigned long flags; int ret = 0; struct zfcp_qdio_queue *req_queue = &adapter->request_queue; @@ -4684,10 +4683,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, fsf_req->fsf_command = fsf_cmd; INIT_LIST_HEAD(&fsf_req->list); - /* unique request id */ - spin_lock_irqsave(&adapter->req_list_lock, flags); + /* this is serialized (we are holding req_queue-lock of adapter */ + if (adapter->req_no == 0) + adapter->req_no++; fsf_req->req_id = adapter->req_no++; - spin_unlock_irqrestore(&adapter->req_list_lock, flags); zfcp_fsf_req_qtcb_init(fsf_req); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 4857cccb1d5b..043ed7c0a7ed 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -378,16 +378,15 @@ zfcp_unit_lookup(struct zfcp_adapter *adapter, int channel, unsigned int id, * will handle late commands. (Usually, the normal completion of late * commands is ignored with respect to the running abort operation.) */ -int -zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) +int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) { struct Scsi_Host *scsi_host; struct zfcp_adapter *adapter; struct zfcp_unit *unit; - int retval = SUCCESS; - struct zfcp_fsf_req *new_fsf_req = NULL; - struct zfcp_fsf_req *old_fsf_req; + struct zfcp_fsf_req *fsf_req; unsigned long flags; + unsigned long old_req_id; + int retval = SUCCESS; scsi_host = scpnt->device->host; adapter = (struct zfcp_adapter *) scsi_host->hostdata[0]; @@ -399,55 +398,47 @@ zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) /* avoid race condition between late normal completion and abort */ write_lock_irqsave(&adapter->abort_lock, flags); - /* - * Check whether command has just completed and can not be aborted. - * Even if the command has just been completed late, we can access - * scpnt since the SCSI stack does not release it at least until - * this routine returns. (scpnt is parameter passed to this routine - * and must not disappear during abort even on late completion.) - */ - old_fsf_req = (struct zfcp_fsf_req *) scpnt->host_scribble; - if (!old_fsf_req) { + /* Check whether corresponding fsf_req is still pending */ + spin_lock(&adapter->req_list_lock); + fsf_req = zfcp_reqlist_ismember(adapter, (unsigned long) + scpnt->host_scribble); + spin_unlock(&adapter->req_list_lock); + if (!fsf_req) { write_unlock_irqrestore(&adapter->abort_lock, flags); - zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, NULL); + zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, 0); retval = SUCCESS; goto out; } - old_fsf_req->data = 0; - old_fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING; + fsf_req->data = 0; + fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING; + old_req_id = fsf_req->req_id; - /* don't access old_fsf_req after releasing the abort_lock */ + /* don't access old fsf_req after releasing the abort_lock */ write_unlock_irqrestore(&adapter->abort_lock, flags); - /* call FSF routine which does the abort */ - new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req, - adapter, unit, 0); - if (!new_fsf_req) { + + fsf_req = zfcp_fsf_abort_fcp_command(old_req_id, adapter, unit, 0); + if (!fsf_req) { ZFCP_LOG_INFO("error: initiation of Abort FCP Cmnd failed\n"); zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, - old_fsf_req); + old_req_id); retval = FAILED; goto out; } - /* wait for completion of abort */ - __wait_event(new_fsf_req->completion_wq, - new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); + __wait_event(fsf_req->completion_wq, + fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); - /* status should be valid since signals were not permitted */ - if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) { - zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, new_fsf_req, - NULL); + if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) { + zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, fsf_req, 0); retval = SUCCESS; - } else if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) { - zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, new_fsf_req, - NULL); + } else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) { + zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, fsf_req, 0); retval = SUCCESS; } else { - zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, new_fsf_req, - NULL); + zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, fsf_req, 0); retval = FAILED; } - zfcp_fsf_req_free(new_fsf_req); + zfcp_fsf_req_free(fsf_req); out: return retval; } From 2abbe866c8eb0296e3f5343bcf73e5371522a738 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 18 Sep 2006 22:29:56 +0200 Subject: [PATCH 1048/1063] [SCSI] zfcp: introduce struct timer_list in struct zfcp_fsf_req This instance will be used whenever a timer is needed for a request by zfcp. Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_aux.c | 4 - drivers/s390/scsi/zfcp_def.h | 8 +- drivers/s390/scsi/zfcp_erp.c | 228 ++++++++++-------------------- drivers/s390/scsi/zfcp_ext.h | 13 +- drivers/s390/scsi/zfcp_fsf.c | 253 ++++++++++++++-------------------- drivers/s390/scsi/zfcp_scsi.c | 25 +--- 6 files changed, 189 insertions(+), 342 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 504c9219961c..5d39b2df0cc4 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -1135,9 +1135,6 @@ zfcp_adapter_enqueue(struct ccw_device *ccw_device) /* initialize lock of associated request queue */ rwlock_init(&adapter->request_queue.queue_lock); - /* intitialise SCSI ER timer */ - init_timer(&adapter->scsi_er_timer); - /* mark adapter unusable as long as sysfs registration is not complete */ atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status); @@ -1653,7 +1650,6 @@ zfcp_ns_gid_pn_request(struct zfcp_erp_action *erp_action) gid_pn->ct.handler = zfcp_ns_gid_pn_handler; gid_pn->ct.handler_data = (unsigned long) gid_pn; gid_pn->ct.timeout = ZFCP_NS_GID_PN_TIMEOUT; - gid_pn->ct.timer = &erp_action->timer; gid_pn->port = erp_action->port; ret = zfcp_fsf_send_ct(&gid_pn->ct, adapter->pool.fsf_req_erp, diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index ef1cd49184e8..8f882690994d 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -137,7 +137,7 @@ zfcp_address_to_sg(void *address, struct scatterlist *list) #define ZFCP_EXCHANGE_CONFIG_DATA_RETRIES 7 /* timeout value for "default timer" for fsf requests */ -#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ); +#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ) /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/ @@ -779,7 +779,6 @@ typedef void (*zfcp_send_ct_handler_t)(unsigned long); * @handler_data: data passed to handler function * @pool: pointer to memory pool for ct request structure * @timeout: FSF timeout for this request - * @timer: timer (e.g. for request initiated by erp) * @completion: completion for synchronization purposes * @status: used to pass error status to calling function */ @@ -793,7 +792,6 @@ struct zfcp_send_ct { unsigned long handler_data; mempool_t *pool; int timeout; - struct timer_list *timer; struct completion *completion; int status; }; @@ -821,7 +819,6 @@ typedef void (*zfcp_send_els_handler_t)(unsigned long); * @resp_count: number of elements in response scatter-gather list * @handler: handler function (called for response to the request) * @handler_data: data passed to handler function - * @timer: timer (e.g. for request initiated by erp) * @completion: completion for synchronization purposes * @ls_code: hex code of ELS command * @status: used to pass error status to calling function @@ -836,7 +833,6 @@ struct zfcp_send_els { unsigned int resp_count; zfcp_send_els_handler_t handler; unsigned long handler_data; - struct timer_list *timer; struct completion *completion; int ls_code; int status; @@ -886,7 +882,6 @@ struct zfcp_adapter { struct list_head port_remove_lh; /* head of ports to be removed */ u32 ports; /* number of remote ports */ - struct timer_list scsi_er_timer; /* SCSI err recovery watch */ atomic_t reqs_active; /* # active FSF reqs */ unsigned long req_no; /* unique FSF req number */ struct list_head *req_list; /* list of pending reqs */ @@ -1003,6 +998,7 @@ struct zfcp_fsf_req { struct fsf_qtcb *qtcb; /* address of associated QTCB */ u32 seq_no; /* Sequence number of request */ unsigned long data; /* private data of request */ + struct timer_list timer; /* used for erp or scsi er */ struct zfcp_erp_action *erp_action; /* used if this request is issued on behalf of erp */ mempool_t *pool; /* used if request was alloacted diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 7f60b6fdf724..af42a0eadf03 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -64,8 +64,6 @@ static int zfcp_erp_strategy_check_action(struct zfcp_erp_action *, int); static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int); static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *); -static void zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *); -static void zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *); @@ -111,8 +109,62 @@ static inline void zfcp_erp_action_to_ready(struct zfcp_erp_action *); static inline void zfcp_erp_action_to_running(struct zfcp_erp_action *); static void zfcp_erp_memwait_handler(unsigned long); -static void zfcp_erp_timeout_handler(unsigned long); -static inline void zfcp_erp_timeout_init(struct zfcp_erp_action *); + +/** + * zfcp_close_qdio - close qdio queues for an adapter + */ +static void zfcp_close_qdio(struct zfcp_adapter *adapter) +{ + struct zfcp_qdio_queue *req_queue; + int first, count; + + if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status)) + return; + + /* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */ + req_queue = &adapter->request_queue; + write_lock_irq(&req_queue->queue_lock); + atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status); + write_unlock_irq(&req_queue->queue_lock); + + debug_text_event(adapter->erp_dbf, 3, "qdio_down2a"); + while (qdio_shutdown(adapter->ccw_device, + QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS) + msleep(1000); + debug_text_event(adapter->erp_dbf, 3, "qdio_down2b"); + + /* cleanup used outbound sbals */ + count = atomic_read(&req_queue->free_count); + if (count < QDIO_MAX_BUFFERS_PER_Q) { + first = (req_queue->free_index+count) % QDIO_MAX_BUFFERS_PER_Q; + count = QDIO_MAX_BUFFERS_PER_Q - count; + zfcp_qdio_zero_sbals(req_queue->buffer, first, count); + } + req_queue->free_index = 0; + atomic_set(&req_queue->free_count, 0); + req_queue->distance_from_int = 0; + adapter->response_queue.free_index = 0; + atomic_set(&adapter->response_queue.free_count, 0); +} + +/** + * zfcp_close_fsf - stop FSF operations for an adapter + * + * Dismiss and cleanup all pending fsf_reqs (this wakes up all initiators of + * requests waiting for completion; especially this returns SCSI commands + * with error state). + */ +static void zfcp_close_fsf(struct zfcp_adapter *adapter) +{ + /* close queues to ensure that buffers are not accessed by adapter */ + zfcp_close_qdio(adapter); + zfcp_fsf_req_dismiss_all(adapter); + /* reset FSF request sequence number */ + adapter->fsf_req_seq_no = 0; + /* all ports and units are closed */ + zfcp_erp_modify_adapter_status(adapter, + ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR); +} /** * zfcp_fsf_request_timeout_handler - called if a request timed out @@ -121,52 +173,20 @@ static inline void zfcp_erp_timeout_init(struct zfcp_erp_action *); * This function needs to be called if requests (ELS, Generic Service, * or SCSI commands) exceed a certain time limit. The assumption is * that after the time limit the adapter get stuck. So we trigger a reopen of - * the adapter. This should not be used for error recovery, SCSI abort - * commands and SCSI requests from SCSI mid-layer. + * the adapter. */ -void -zfcp_fsf_request_timeout_handler(unsigned long data) +static void zfcp_fsf_request_timeout_handler(unsigned long data) { - struct zfcp_adapter *adapter; - - adapter = (struct zfcp_adapter *) data; - + struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; zfcp_erp_adapter_reopen(adapter, 0); } -/** - * zfcp_fsf_scsi_er_timeout_handler - timeout handler for scsi eh tasks - * - * This function needs to be called whenever a SCSI error recovery - * action (abort/reset) does not return. Re-opening the adapter means - * that the abort/reset command can be returned by zfcp. It won't complete - * via the adapter anymore (because qdio queues are closed). If ERP is - * already running on this adapter it will be stopped. - */ -void zfcp_fsf_scsi_er_timeout_handler(unsigned long data) +void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, unsigned long timeout) { - struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; - unsigned long flags; - - ZFCP_LOG_NORMAL("warning: SCSI error recovery timed out. " - "Restarting all operations on the adapter %s\n", - zfcp_get_busid_by_adapter(adapter)); - debug_text_event(adapter->erp_dbf, 1, "eh_lmem_tout"); - - write_lock_irqsave(&adapter->erp_lock, flags); - if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING, - &adapter->status)) { - zfcp_erp_modify_adapter_status(adapter, - ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN, - ZFCP_CLEAR); - zfcp_erp_action_dismiss_adapter(adapter); - write_unlock_irqrestore(&adapter->erp_lock, flags); - /* dismiss all pending requests including requests for ERP */ - zfcp_fsf_req_dismiss_all(adapter); - adapter->fsf_req_seq_no = 0; - } else - write_unlock_irqrestore(&adapter->erp_lock, flags); - zfcp_erp_adapter_reopen(adapter, 0); + fsf_req->timer.function = zfcp_fsf_request_timeout_handler; + fsf_req->timer.data = (unsigned long) fsf_req->adapter; + fsf_req->timer.expires = timeout; + add_timer(&fsf_req->timer); } /* @@ -282,7 +302,6 @@ zfcp_erp_adisc(struct zfcp_port *port) struct zfcp_ls_adisc *adisc; void *address = NULL; int retval = 0; - struct timer_list *timer; send_els = kzalloc(sizeof(struct zfcp_send_els), GFP_ATOMIC); if (send_els == NULL) @@ -329,22 +348,11 @@ zfcp_erp_adisc(struct zfcp_port *port) (wwn_t) adisc->wwnn, adisc->hard_nport_id, adisc->nport_id); - timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC); - if (!timer) - goto nomem; - - init_timer(timer); - timer->function = zfcp_fsf_request_timeout_handler; - timer->data = (unsigned long) adapter; - timer->expires = ZFCP_FSF_REQUEST_TIMEOUT; - send_els->timer = timer; - retval = zfcp_fsf_send_els(send_els); if (retval != 0) { ZFCP_LOG_NORMAL("error: initiation of Send ELS failed for port " "0x%08x on adapter %s\n", send_els->d_id, zfcp_get_busid_by_adapter(adapter)); - del_timer(send_els->timer); goto freemem; } @@ -356,7 +364,6 @@ zfcp_erp_adisc(struct zfcp_port *port) if (address != NULL) __free_pages(send_els->req->page, 0); if (send_els != NULL) { - kfree(send_els->timer); kfree(send_els->req); kfree(send_els->resp); kfree(send_els); @@ -382,9 +389,6 @@ zfcp_erp_adisc_handler(unsigned long data) struct zfcp_ls_adisc_acc *adisc; send_els = (struct zfcp_send_els *) data; - - del_timer(send_els->timer); - adapter = send_els->adapter; port = send_els->port; d_id = send_els->d_id; @@ -433,7 +437,6 @@ zfcp_erp_adisc_handler(unsigned long data) out: zfcp_port_put(port); __free_pages(send_els->req->page, 0); - kfree(send_els->timer); kfree(send_els->req); kfree(send_els->resp); kfree(send_els); @@ -909,8 +912,6 @@ static void zfcp_erp_async_handler_nolock(struct zfcp_erp_action *erp_action, debug_text_event(adapter->erp_dbf, 2, "a_asyh_ex"); debug_event(adapter->erp_dbf, 2, &erp_action->action, sizeof (int)); - if (!(set_mask & ZFCP_STATUS_ERP_TIMEDOUT)) - del_timer(&erp_action->timer); erp_action->status |= set_mask; zfcp_erp_action_ready(erp_action); } else { @@ -957,8 +958,7 @@ zfcp_erp_memwait_handler(unsigned long data) * action gets an appropriate flag and will be processed * accordingly */ -static void -zfcp_erp_timeout_handler(unsigned long data) +void zfcp_erp_timeout_handler(unsigned long data) { struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data; struct zfcp_adapter *adapter = erp_action->adapter; @@ -1934,8 +1934,7 @@ zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *erp_action, int close) &erp_action->adapter->status); failed_openfcp: - zfcp_erp_adapter_strategy_close_qdio(erp_action); - zfcp_erp_adapter_strategy_close_fsf(erp_action); + zfcp_close_fsf(erp_action->adapter); failed_qdio: out: return retval; @@ -2040,59 +2039,6 @@ zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action) return retval; } -/** - * zfcp_erp_adapter_strategy_close_qdio - close qdio queues for an adapter - */ -static void -zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action) -{ - int first_used; - int used_count; - struct zfcp_adapter *adapter = erp_action->adapter; - - if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status)) { - ZFCP_LOG_DEBUG("error: attempt to shut down inactive QDIO " - "queues on adapter %s\n", - zfcp_get_busid_by_adapter(adapter)); - return; - } - - /* - * Get queue_lock and clear QDIOUP flag. Thus it's guaranteed that - * do_QDIO won't be called while qdio_shutdown is in progress. - */ - write_lock_irq(&adapter->request_queue.queue_lock); - atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status); - write_unlock_irq(&adapter->request_queue.queue_lock); - - debug_text_event(adapter->erp_dbf, 3, "qdio_down2a"); - while (qdio_shutdown(adapter->ccw_device, - QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS) - msleep(1000); - debug_text_event(adapter->erp_dbf, 3, "qdio_down2b"); - - /* - * First we had to stop QDIO operation. - * Now it is safe to take the following actions. - */ - - /* Cleanup only necessary when there are unacknowledged buffers */ - if (atomic_read(&adapter->request_queue.free_count) - < QDIO_MAX_BUFFERS_PER_Q) { - first_used = (adapter->request_queue.free_index + - atomic_read(&adapter->request_queue.free_count)) - % QDIO_MAX_BUFFERS_PER_Q; - used_count = QDIO_MAX_BUFFERS_PER_Q - - atomic_read(&adapter->request_queue.free_count); - zfcp_qdio_zero_sbals(adapter->request_queue.buffer, - first_used, used_count); - } - adapter->response_queue.free_index = 0; - atomic_set(&adapter->response_queue.free_count, 0); - adapter->request_queue.free_index = 0; - atomic_set(&adapter->request_queue.free_count, 0); - adapter->request_queue.distance_from_int = 0; -} static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action) @@ -2127,7 +2073,6 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action) write_lock_irq(&adapter->erp_lock); zfcp_erp_action_to_running(erp_action); write_unlock_irq(&adapter->erp_lock); - zfcp_erp_timeout_init(erp_action); if (zfcp_fsf_exchange_config_data(erp_action)) { retval = ZFCP_ERP_FAILED; debug_text_event(adapter->erp_dbf, 5, "a_fstx_xf"); @@ -2196,7 +2141,6 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action) zfcp_erp_action_to_running(erp_action); write_unlock_irq(&adapter->erp_lock); - zfcp_erp_timeout_init(erp_action); ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL); if (ret == -EOPNOTSUPP) { debug_text_event(adapter->erp_dbf, 3, "a_xport_notsupp"); @@ -2248,27 +2192,6 @@ zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action return retval; } -/** - * zfcp_erp_adapter_strategy_close_fsf - stop FSF operations for an adapter - */ -static void -zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action) -{ - struct zfcp_adapter *adapter = erp_action->adapter; - - /* - * wake waiting initiators of requests, - * return SCSI commands (with error status), - * clean up all requests (synchronously) - */ - zfcp_fsf_req_dismiss_all(adapter); - /* reset FSF request sequence number */ - adapter->fsf_req_seq_no = 0; - /* all ports and units are closed */ - zfcp_erp_modify_adapter_status(adapter, - ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR); -} - /* * function: * @@ -2605,7 +2528,6 @@ zfcp_erp_port_forced_strategy_close(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_port *port = erp_action->port; - zfcp_erp_timeout_init(erp_action); retval = zfcp_fsf_close_physical_port(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "o_pfstc_nomem"); @@ -2662,7 +2584,6 @@ zfcp_erp_port_strategy_close(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_port *port = erp_action->port; - zfcp_erp_timeout_init(erp_action); retval = zfcp_fsf_close_port(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "p_pstc_nomem"); @@ -2700,7 +2621,6 @@ zfcp_erp_port_strategy_open_port(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_port *port = erp_action->port; - zfcp_erp_timeout_init(erp_action); retval = zfcp_fsf_open_port(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "p_psto_nomem"); @@ -2738,7 +2658,6 @@ zfcp_erp_port_strategy_open_common_lookup(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_port *port = erp_action->port; - zfcp_erp_timeout_init(erp_action); retval = zfcp_ns_gid_pn_request(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "p_pstn_nomem"); @@ -2864,7 +2783,6 @@ zfcp_erp_unit_strategy_close(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_unit *unit = erp_action->unit; - zfcp_erp_timeout_init(erp_action); retval = zfcp_fsf_close_unit(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "u_ustc_nomem"); @@ -2905,7 +2823,6 @@ zfcp_erp_unit_strategy_open(struct zfcp_erp_action *erp_action) struct zfcp_adapter *adapter = erp_action->adapter; struct zfcp_unit *unit = erp_action->unit; - zfcp_erp_timeout_init(erp_action); retval = zfcp_fsf_open_unit(erp_action); if (retval == -ENOMEM) { debug_text_event(adapter->erp_dbf, 5, "u_usto_nomem"); @@ -2930,14 +2847,13 @@ zfcp_erp_unit_strategy_open(struct zfcp_erp_action *erp_action) return retval; } -static inline void -zfcp_erp_timeout_init(struct zfcp_erp_action *erp_action) +void zfcp_erp_start_timer(struct zfcp_fsf_req *fsf_req) { - init_timer(&erp_action->timer); - erp_action->timer.function = zfcp_erp_timeout_handler; - erp_action->timer.data = (unsigned long) erp_action; - /* jiffies will be added in zfcp_fsf_req_send */ - erp_action->timer.expires = ZFCP_ERP_FSFREQ_TIMEOUT; + BUG_ON(!fsf_req->erp_action); + fsf_req->timer.function = zfcp_erp_timeout_handler; + fsf_req->timer.data = (unsigned long) fsf_req->erp_action; + fsf_req->timer.expires = jiffies + ZFCP_ERP_FSFREQ_TIMEOUT; + add_timer(&fsf_req->timer); } /* diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 710ebbf89929..3125a42a6343 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -87,8 +87,8 @@ extern int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *, struct fsf_qtcb_bottom_port *); extern int zfcp_fsf_control_file(struct zfcp_adapter *, struct zfcp_fsf_req **, u32, u32, struct zfcp_sg_list *); -extern void zfcp_fsf_request_timeout_handler(unsigned long); -extern void zfcp_fsf_scsi_er_timeout_handler(unsigned long); +extern void zfcp_fsf_start_timer(struct zfcp_fsf_req *, unsigned long); +extern void zfcp_erp_start_timer(struct zfcp_fsf_req *); extern int zfcp_fsf_req_dismiss_all(struct zfcp_adapter *); extern int zfcp_fsf_status_read(struct zfcp_adapter *, int); extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *, @@ -98,8 +98,7 @@ extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *, extern int zfcp_fsf_send_els(struct zfcp_send_els *); extern int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *, struct zfcp_unit *, - struct scsi_cmnd *, - struct timer_list*, int); + struct scsi_cmnd *, int, int); extern int zfcp_fsf_req_complete(struct zfcp_fsf_req *); extern void zfcp_fsf_incoming_els(struct zfcp_fsf_req *); extern void zfcp_fsf_req_free(struct zfcp_fsf_req *); @@ -123,13 +122,11 @@ extern char *zfcp_get_fcp_rsp_info_ptr(struct fcp_rsp_iu *); extern void set_host_byte(u32 *, char); extern void set_driver_byte(u32 *, char); extern char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *); -extern void zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *); extern fcp_dl_t zfcp_get_fcp_dl(struct fcp_cmnd_iu *); extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *, - struct scsi_cmnd *, struct timer_list *); -extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, - struct timer_list *); + struct scsi_cmnd *, int); +extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, int); extern struct fc_function_template zfcp_transport_functions; /******************************** ERP ****************************************/ diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index a66b5193b70e..277826cdd0c8 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -42,7 +42,7 @@ static inline int zfcp_fsf_req_sbal_check( static inline int zfcp_use_one_sbal( struct scatterlist *, int, struct scatterlist *, int); static struct zfcp_fsf_req *zfcp_fsf_req_alloc(mempool_t *, int); -static int zfcp_fsf_req_send(struct zfcp_fsf_req *, struct timer_list *); +static int zfcp_fsf_req_send(struct zfcp_fsf_req *); static int zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *); static int zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *); static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *); @@ -225,8 +225,10 @@ zfcp_fsf_req_complete(struct zfcp_fsf_req *fsf_req) */ zfcp_fsf_status_read_handler(fsf_req); goto out; - } else + } else { + del_timer(&fsf_req->timer); zfcp_fsf_protstatus_eval(fsf_req); + } /* * fsf_req may be deleted due to waking up functions, so @@ -785,8 +787,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags) sbale->addr = (void *) status_buffer; sbale->length = sizeof(struct fsf_status_read_buffer); - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(fsf_req, NULL); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_DEBUG("error: Could not set-up unsolicited status " "environment.\n"); @@ -1112,8 +1113,8 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id, struct zfcp_unit *unit, int req_flags) { volatile struct qdio_buffer_element *sbale; - unsigned long lock_flags; struct zfcp_fsf_req *fsf_req = NULL; + unsigned long lock_flags; int retval = 0; /* setup new FSF request */ @@ -1143,12 +1144,9 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id, /* set handle of request which should be aborted */ fsf_req->qtcb->bottom.support.req_handle = (u64) old_req_id; - /* start QDIO request for this FSF request */ - - zfcp_fsf_start_scsi_er_timer(adapter); - retval = zfcp_fsf_req_send(fsf_req, NULL); + zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { - del_timer(&adapter->scsi_er_timer); ZFCP_LOG_INFO("error: Failed to send abort command request " "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n", zfcp_get_busid_by_adapter(adapter), @@ -1184,8 +1182,6 @@ zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req) unsigned char status_qual = new_fsf_req->qtcb->header.fsf_status_qual.word[0]; - del_timer(&new_fsf_req->adapter->scsi_er_timer); - if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { /* do not set ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED */ goto skip_fsfstatus; @@ -1391,11 +1387,6 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool, goto failed_req; } - if (erp_action != NULL) { - erp_action->fsf_req = fsf_req; - fsf_req->erp_action = erp_action; - } - sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); if (zfcp_use_one_sbal(ct->req, ct->req_count, ct->resp, ct->resp_count)){ @@ -1462,8 +1453,14 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool, zfcp_san_dbf_event_ct_request(fsf_req); - /* start QDIO request for this FSF request */ - ret = zfcp_fsf_req_send(fsf_req, ct->timer); + if (erp_action) { + erp_action->fsf_req = fsf_req; + fsf_req->erp_action = erp_action; + zfcp_erp_start_timer(fsf_req); + } else + zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT); + + ret = zfcp_fsf_req_send(fsf_req); if (ret) { ZFCP_LOG_DEBUG("error: initiation of CT request failed " "(adapter %s, port 0x%016Lx)\n", @@ -1760,8 +1757,8 @@ zfcp_fsf_send_els(struct zfcp_send_els *els) zfcp_san_dbf_event_els_request(fsf_req); - /* start QDIO request for this FSF request */ - ret = zfcp_fsf_req_send(fsf_req, els->timer); + zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT); + ret = zfcp_fsf_req_send(fsf_req); if (ret) { ZFCP_LOG_DEBUG("error: initiation of ELS request failed " "(adapter %s, port d_id: 0x%08x)\n", @@ -1958,6 +1955,7 @@ int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) { volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; unsigned long lock_flags; int retval = 0; @@ -1966,7 +1964,7 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) FSF_QTCB_EXCHANGE_CONFIG_DATA, ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &(erp_action->fsf_req)); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create exchange configuration " "data request for adapter %s.\n", @@ -1974,26 +1972,26 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - erp_action->fsf_req->erp_action = erp_action; - erp_action->fsf_req->qtcb->bottom.config.feature_selection = + fsf_req->qtcb->bottom.config.feature_selection = FSF_FEATURE_CFDC | FSF_FEATURE_LUN_SHARING | FSF_FEATURE_NOTIFICATION_LOST | FSF_FEATURE_UPDATE_ALERT; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_INFO ("error: Could not send exchange configuration data " "command on the adapter %s\n", zfcp_get_busid_by_adapter(erp_action->adapter)); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -2223,10 +2221,9 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action, struct fsf_qtcb_bottom_port *data) { volatile struct qdio_buffer_element *sbale; - int retval = 0; - unsigned long lock_flags; struct zfcp_fsf_req *fsf_req; - struct timer_list *timer; + unsigned long lock_flags; + int retval = 0; if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT)) { ZFCP_LOG_INFO("error: exchange port data " @@ -2259,22 +2256,11 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action, if (erp_action) { erp_action->fsf_req = fsf_req; fsf_req->erp_action = erp_action; - timer = &erp_action->timer; - } else { - timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC); - if (!timer) { - write_unlock_irqrestore(&adapter->request_queue.queue_lock, - lock_flags); - zfcp_fsf_req_free(fsf_req); - return -ENOMEM; - } - init_timer(timer); - timer->function = zfcp_fsf_request_timeout_handler; - timer->data = (unsigned long) adapter; - timer->expires = ZFCP_FSF_REQUEST_TIMEOUT; - } + zfcp_erp_start_timer(fsf_req); + } else + zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT); - retval = zfcp_fsf_req_send(fsf_req, timer); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send an exchange port data " "command on the adapter %s\n", @@ -2282,8 +2268,6 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action, zfcp_fsf_req_free(fsf_req); if (erp_action) erp_action->fsf_req = NULL; - else - kfree(timer); write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags); return retval; @@ -2294,9 +2278,7 @@ zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action, if (!erp_action) { wait_event(fsf_req->completion_wq, fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); - del_timer_sync(timer); zfcp_fsf_req_free(fsf_req); - kfree(timer); } return retval; } @@ -2378,6 +2360,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) { volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; unsigned long lock_flags; int retval = 0; @@ -2386,7 +2369,7 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) FSF_QTCB_OPEN_PORT_WITH_DID, ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &(erp_action->fsf_req)); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create open port request " "for port 0x%016Lx on adapter %s.\n", @@ -2395,24 +2378,24 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - erp_action->fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id; + fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id; atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status); - erp_action->fsf_req->data = (unsigned long) erp_action->port; - erp_action->fsf_req->erp_action = erp_action; + fsf_req->data = (unsigned long) erp_action->port; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send open port request for " "port 0x%016Lx on adapter %s.\n", erp_action->port->wwpn, zfcp_get_busid_by_adapter(erp_action->adapter)); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -2634,6 +2617,7 @@ int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) { volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; unsigned long lock_flags; int retval = 0; @@ -2642,7 +2626,7 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) FSF_QTCB_CLOSE_PORT, ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &(erp_action->fsf_req)); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create a close port request " "for port 0x%016Lx on adapter %s.\n", @@ -2651,25 +2635,25 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status); - erp_action->fsf_req->data = (unsigned long) erp_action->port; - erp_action->fsf_req->erp_action = erp_action; - erp_action->fsf_req->qtcb->header.port_handle = - erp_action->port->handle; + fsf_req->data = (unsigned long) erp_action->port; + fsf_req->erp_action = erp_action; + fsf_req->qtcb->header.port_handle = erp_action->port->handle; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send a close port request for " "port 0x%016Lx on adapter %s.\n", erp_action->port->wwpn, zfcp_get_busid_by_adapter(erp_action->adapter)); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -2766,16 +2750,17 @@ zfcp_fsf_close_port_handler(struct zfcp_fsf_req *fsf_req) int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) { - int retval = 0; - unsigned long lock_flags; volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; + unsigned long lock_flags; + int retval = 0; /* setup new FSF request */ retval = zfcp_fsf_req_create(erp_action->adapter, FSF_QTCB_CLOSE_PHYSICAL_PORT, ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &erp_action->fsf_req); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create close physical port " "request (adapter %s, port 0x%016Lx)\n", @@ -2785,8 +2770,7 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; @@ -2794,20 +2778,19 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) atomic_set_mask(ZFCP_STATUS_PORT_PHYS_CLOSING, &erp_action->port->status); /* save a pointer to this port */ - erp_action->fsf_req->data = (unsigned long) erp_action->port; - /* port to be closed */ - erp_action->fsf_req->qtcb->header.port_handle = - erp_action->port->handle; - erp_action->fsf_req->erp_action = erp_action; + fsf_req->data = (unsigned long) erp_action->port; + fsf_req->qtcb->header.port_handle = erp_action->port->handle; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send close physical port " "request (adapter %s, port 0x%016Lx)\n", zfcp_get_busid_by_adapter(erp_action->adapter), erp_action->port->wwpn); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -2972,6 +2955,7 @@ int zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action) { volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; unsigned long lock_flags; int retval = 0; @@ -2980,7 +2964,7 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action) FSF_QTCB_OPEN_LUN, ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &(erp_action->fsf_req)); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create open unit request for " "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n", @@ -2990,24 +2974,22 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - erp_action->fsf_req->qtcb->header.port_handle = - erp_action->port->handle; - erp_action->fsf_req->qtcb->bottom.support.fcp_lun = - erp_action->unit->fcp_lun; + fsf_req->qtcb->header.port_handle = erp_action->port->handle; + fsf_req->qtcb->bottom.support.fcp_lun = erp_action->unit->fcp_lun; if (!(erp_action->adapter->connection_features & FSF_FEATURE_NPIV_MODE)) - erp_action->fsf_req->qtcb->bottom.support.option = + fsf_req->qtcb->bottom.support.option = FSF_OPEN_LUN_SUPPRESS_BOXING; atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status); - erp_action->fsf_req->data = (unsigned long) erp_action->unit; - erp_action->fsf_req->erp_action = erp_action; + fsf_req->data = (unsigned long) erp_action->unit; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(erp_action->fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send an open unit request " "on the adapter %s, port 0x%016Lx for " @@ -3015,7 +2997,7 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action) zfcp_get_busid_by_adapter(erp_action->adapter), erp_action->port->wwpn, erp_action->unit->fcp_lun); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -3308,6 +3290,7 @@ int zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action) { volatile struct qdio_buffer_element *sbale; + struct zfcp_fsf_req *fsf_req; unsigned long lock_flags; int retval = 0; @@ -3316,7 +3299,7 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action) FSF_QTCB_CLOSE_LUN, ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP, erp_action->adapter->pool.fsf_req_erp, - &lock_flags, &(erp_action->fsf_req)); + &lock_flags, &fsf_req); if (retval < 0) { ZFCP_LOG_INFO("error: Could not create close unit request for " "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n", @@ -3326,27 +3309,26 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action) goto out; } - sbale = zfcp_qdio_sbale_req(erp_action->fsf_req, - erp_action->fsf_req->sbal_curr, 0); + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - erp_action->fsf_req->qtcb->header.port_handle = - erp_action->port->handle; - erp_action->fsf_req->qtcb->header.lun_handle = erp_action->unit->handle; + fsf_req->qtcb->header.port_handle = erp_action->port->handle; + fsf_req->qtcb->header.lun_handle = erp_action->unit->handle; atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status); - erp_action->fsf_req->data = (unsigned long) erp_action->unit; - erp_action->fsf_req->erp_action = erp_action; + fsf_req->data = (unsigned long) erp_action->unit; + fsf_req->erp_action = erp_action; + erp_action->fsf_req = fsf_req; - /* start QDIO request for this FSF request */ - retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); + zfcp_erp_start_timer(fsf_req); + retval = zfcp_fsf_req_send(erp_action->fsf_req); if (retval) { ZFCP_LOG_INFO("error: Could not send a close unit request for " "unit 0x%016Lx on port 0x%016Lx onadapter %s.\n", erp_action->unit->fcp_lun, erp_action->port->wwpn, zfcp_get_busid_by_adapter(erp_action->adapter)); - zfcp_fsf_req_free(erp_action->fsf_req); + zfcp_fsf_req_free(fsf_req); erp_action->fsf_req = NULL; goto out; } @@ -3499,7 +3481,7 @@ int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, struct zfcp_unit *unit, struct scsi_cmnd * scsi_cmnd, - struct timer_list *timer, int req_flags) + int use_timer, int req_flags) { struct zfcp_fsf_req *fsf_req = NULL; struct fcp_cmnd_iu *fcp_cmnd_iu; @@ -3640,11 +3622,10 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG, (char *) scsi_cmnd->cmnd, scsi_cmnd->cmd_len); - /* - * start QDIO request for this FSF request - * covered by an SBALE) - */ - retval = zfcp_fsf_req_send(fsf_req, timer); + if (use_timer) + zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT); + + retval = zfcp_fsf_req_send(fsf_req); if (unlikely(retval < 0)) { ZFCP_LOG_INFO("error: Could not send FCP command request " "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n", @@ -3729,11 +3710,9 @@ zfcp_fsf_send_fcp_command_task_management(struct zfcp_adapter *adapter, fcp_cmnd_iu->fcp_lun = unit->fcp_lun; fcp_cmnd_iu->task_management_flags = tm_flags; - /* start QDIO request for this FSF request */ - zfcp_fsf_start_scsi_er_timer(adapter); - retval = zfcp_fsf_req_send(fsf_req, NULL); + zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT); + retval = zfcp_fsf_req_send(fsf_req); if (retval) { - del_timer(&adapter->scsi_er_timer); ZFCP_LOG_INFO("error: Could not send an FCP-command (task " "management) on adapter %s, port 0x%016Lx for " "unit LUN 0x%016Lx\n", @@ -4237,7 +4216,6 @@ zfcp_fsf_send_fcp_command_task_management_handler(struct zfcp_fsf_req *fsf_req) char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu); struct zfcp_unit *unit = (struct zfcp_unit *) fsf_req->data; - del_timer(&fsf_req->adapter->scsi_er_timer); if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { fsf_req->status |= ZFCP_STATUS_FSFREQ_TMFUNCFAILED; goto skip_fsfstatus; @@ -4306,7 +4284,6 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter, struct zfcp_fsf_req *fsf_req; struct fsf_qtcb_bottom_support *bottom; volatile struct qdio_buffer_element *sbale; - struct timer_list *timer; unsigned long lock_flags; int req_flags = 0; int direction; @@ -4338,12 +4315,6 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter, goto out; } - timer = kmalloc(sizeof(struct timer_list), GFP_KERNEL); - if (!timer) { - retval = -ENOMEM; - goto out; - } - retval = zfcp_fsf_req_create(adapter, fsf_command, req_flags, NULL, &lock_flags, &fsf_req); if (retval < 0) { @@ -4378,12 +4349,8 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter, } else sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - init_timer(timer); - timer->function = zfcp_fsf_request_timeout_handler; - timer->data = (unsigned long) adapter; - timer->expires = ZFCP_FSF_REQUEST_TIMEOUT; - - retval = zfcp_fsf_req_send(fsf_req, timer); + zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT); + retval = zfcp_fsf_req_send(fsf_req); if (retval < 0) { ZFCP_LOG_INFO("initiation of cfdc up/download failed" "(adapter %s)\n", @@ -4403,15 +4370,12 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter, fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); *fsf_req_ptr = fsf_req; - del_timer_sync(timer); - goto free_timer; + goto out; free_fsf_req: zfcp_fsf_req_free(fsf_req); unlock_queue_lock: write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags); - free_timer: - kfree(timer); out: return retval; } @@ -4688,7 +4652,8 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, adapter->req_no++; fsf_req->req_id = adapter->req_no++; - zfcp_fsf_req_qtcb_init(fsf_req); + init_timer(&fsf_req->timer); + zfcp_fsf_req_qtcb_init(fsf_req); /* initialize waitqueue which may be used to wait on this request completion */ @@ -4758,8 +4723,7 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, * returns: 0 - request transfer succesfully started * !0 - start of request transfer failed */ -static int -zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) +static int zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req) { struct zfcp_adapter *adapter; struct zfcp_qdio_queue *req_queue; @@ -4787,12 +4751,6 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) inc_seq_no = (fsf_req->qtcb != NULL); - /* figure out expiration time of timeout and start timeout */ - if (unlikely(timer)) { - timer->expires += jiffies; - add_timer(timer); - } - ZFCP_LOG_TRACE("request queue of adapter %s: " "next free SBAL is %i, %i free SBALs\n", zfcp_get_busid_by_adapter(adapter), @@ -4829,12 +4787,7 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) if (unlikely(retval)) { /* Queues are down..... */ retval = -EIO; - /* - * FIXME(potential race): - * timer might be expired (absolutely unlikely) - */ - if (timer) - del_timer(timer); + del_timer(&fsf_req->timer); spin_lock(&adapter->req_list_lock); zfcp_reqlist_remove(adapter, fsf_req->req_id); spin_unlock(&adapter->req_list_lock); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 043ed7c0a7ed..753bb9b2fe74 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -231,7 +231,7 @@ zfcp_scsi_command_fail(struct scsi_cmnd *scpnt, int result) */ int zfcp_scsi_command_async(struct zfcp_adapter *adapter, struct zfcp_unit *unit, - struct scsi_cmnd *scpnt, struct timer_list *timer) + struct scsi_cmnd *scpnt, int use_timer) { int tmp; int retval; @@ -267,7 +267,7 @@ zfcp_scsi_command_async(struct zfcp_adapter *adapter, struct zfcp_unit *unit, goto out; } - tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, timer, + tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, use_timer, ZFCP_REQ_AUTO_CLEANUP); if (unlikely(tmp < 0)) { @@ -291,21 +291,22 @@ zfcp_scsi_command_sync_handler(struct scsi_cmnd *scpnt) * zfcp_scsi_command_sync - send a SCSI command and wait for completion * @unit: unit where command is sent to * @scpnt: scsi command to be sent - * @timer: timer to be started if request is successfully initiated + * @use_timer: indicates whether timer should be setup or not * Return: 0 * * Errors are indicated in scpnt->result */ int zfcp_scsi_command_sync(struct zfcp_unit *unit, struct scsi_cmnd *scpnt, - struct timer_list *timer) + int use_timer) { int ret; DECLARE_COMPLETION(wait); scpnt->SCp.ptr = (void *) &wait; /* silent re-use */ scpnt->scsi_done = zfcp_scsi_command_sync_handler; - ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt, timer); + ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt, + use_timer); if (ret == 0) wait_for_completion(&wait); @@ -341,7 +342,7 @@ zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt, adapter = (struct zfcp_adapter *) scpnt->device->host->hostdata[0]; unit = (struct zfcp_unit *) scpnt->device->hostdata; - return zfcp_scsi_command_async(adapter, unit, scpnt, NULL); + return zfcp_scsi_command_async(adapter, unit, scpnt, 0); } static struct zfcp_unit * @@ -538,8 +539,6 @@ zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags, /** * zfcp_scsi_eh_host_reset_handler - handler for host and bus reset - * - * If ERP is already running it will be stopped. */ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) { @@ -638,16 +637,6 @@ zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter) return; } - -void -zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *adapter) -{ - adapter->scsi_er_timer.function = zfcp_fsf_scsi_er_timeout_handler; - adapter->scsi_er_timer.data = (unsigned long) adapter; - adapter->scsi_er_timer.expires = jiffies + ZFCP_SCSI_ER_TIMEOUT; - add_timer(&adapter->scsi_er_timer); -} - /* * Support functions for FC transport class */ From 8165428610446ea9e6aa9dfa5485ab78e58cc9fc Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 18 Sep 2006 22:30:36 +0200 Subject: [PATCH 1049/1063] [SCSI] zfcp: fix: avoid removal of fsf reqs before qdio queues are down Fix the fix ... One of my previous fixes introduced removal of all fsf requests in zfcp's eh_host_reset_handler. But this must not happen before qdio queues are shut down. So, I revert the changes of zfcp_scsi_eh_host_reset_handler. Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 3 ++- drivers/s390/scsi/zfcp_ext.h | 1 - drivers/s390/scsi/zfcp_scsi.c | 19 ++----------------- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index af42a0eadf03..862a411a4aa0 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -91,6 +91,7 @@ static int zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *); static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *); static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *); +static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *); static void zfcp_erp_action_dismiss_port(struct zfcp_port *); static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *); static void zfcp_erp_action_dismiss(struct zfcp_erp_action *); @@ -3157,7 +3158,7 @@ zfcp_erp_action_cleanup(int action, struct zfcp_adapter *adapter, } -void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) +static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) { struct zfcp_port *port; diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 3125a42a6343..b8794d77285d 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -134,7 +134,6 @@ extern void zfcp_erp_modify_adapter_status(struct zfcp_adapter *, u32, int); extern int zfcp_erp_adapter_reopen(struct zfcp_adapter *, int); extern int zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int); extern void zfcp_erp_adapter_failed(struct zfcp_adapter *); -extern void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *); extern void zfcp_erp_modify_port_status(struct zfcp_port *, u32, int); extern int zfcp_erp_port_reopen(struct zfcp_port *, int); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 753bb9b2fe74..7cafa34e4c7f 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -544,7 +544,6 @@ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) { struct zfcp_unit *unit; struct zfcp_adapter *adapter; - unsigned long flags; unit = (struct zfcp_unit*) scpnt->device->hostdata; adapter = unit->port->adapter; @@ -552,22 +551,8 @@ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) ZFCP_LOG_NORMAL("host/bus reset because of problems with " "unit 0x%016Lx\n", unit->fcp_lun); - write_lock_irqsave(&adapter->erp_lock, flags); - if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING, - &adapter->status)) { - zfcp_erp_modify_adapter_status(adapter, - ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN, - ZFCP_CLEAR); - zfcp_erp_action_dismiss_adapter(adapter); - write_unlock_irqrestore(&adapter->erp_lock, flags); - zfcp_fsf_req_dismiss_all(adapter); - adapter->fsf_req_seq_no = 0; - zfcp_erp_adapter_reopen(adapter, 0); - } else { - write_unlock_irqrestore(&adapter->erp_lock, flags); - zfcp_erp_adapter_reopen(adapter, 0); - zfcp_erp_wait(adapter); - } + zfcp_erp_adapter_reopen(adapter, 0); + zfcp_erp_wait(adapter); return SUCCESS; } From 99005e91eb2289c0ff0875257b5d18d9b7cc4eb7 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 18 Sep 2006 22:32:15 +0200 Subject: [PATCH 1050/1063] [SCSI] zfcp: update maintainers file Removed myself as maintainer of the s390 zfcp driver -- I will not maintain it any longer. Signed-off-by: Andreas Herrmann Signed-off-by: James Bottomley --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b08c537018de..c6bd9b90757c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2484,8 +2484,6 @@ W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported S390 ZFCP DRIVER -P: Andreas Herrmann -M: aherrman@de.ibm.com M: linux390@de.ibm.com L: linux-390@vm.marist.edu W: http://www.ibm.com/developerworks/linux/linux390/ From 9cbb889786548c1212fb77a9df8d09ed883a3480 Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Thu, 21 Sep 2006 16:29:31 +0200 Subject: [PATCH 1051/1063] [SCSI] zfcp: update maintainers file As Andreas stated he will not maintain the zfcp driver anymore. Instead I will take over the responsibility. Signed-off-by: Swen Schillig Signed-off-by: James Bottomley --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c6bd9b90757c..ed9757ee4a67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2484,6 +2484,8 @@ W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported S390 ZFCP DRIVER +P: Swen Schillig +M: swen@vnet.ibm.com M: linux390@de.ibm.com L: linux-390@vm.marist.edu W: http://www.ibm.com/developerworks/linux/linux390/ From 73af07de3e32b9ac328c3d1417258bb98a9b0a9b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 24 Sep 2006 09:30:19 +1000 Subject: [PATCH 1052/1063] [CRYPTO] hmac: Fix error truncation by unlikely() The error return values are truncated by unlikely so we need to save it first. Thanks to Kyle Moffett for spotting this. Signed-off-by: Herbert Xu Signed-off-by: Linus Torvalds --- crypto/hmac.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/crypto/hmac.c b/crypto/hmac.c index d52b234835cf..b521bcd2b2c6 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -92,13 +92,17 @@ static int hmac_init(struct hash_desc *pdesc) struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *)); struct hash_desc desc; struct scatterlist tmp; + int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; sg_set_buf(&tmp, ipad, bs); - return unlikely(crypto_hash_init(&desc)) ?: - crypto_hash_update(&desc, &tmp, bs); + err = crypto_hash_init(&desc); + if (unlikely(err)) + return err; + + return crypto_hash_update(&desc, &tmp, bs); } static int hmac_update(struct hash_desc *pdesc, @@ -123,13 +127,17 @@ static int hmac_final(struct hash_desc *pdesc, u8 *out) struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *)); struct hash_desc desc; struct scatterlist tmp; + int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; sg_set_buf(&tmp, opad, bs + ds); - return unlikely(crypto_hash_final(&desc, digest)) ?: - crypto_hash_digest(&desc, &tmp, bs + ds, out); + err = crypto_hash_final(&desc, digest); + if (unlikely(err)) + return err; + + return crypto_hash_digest(&desc, &tmp, bs + ds, out); } static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, @@ -145,6 +153,7 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, struct hash_desc desc; struct scatterlist sg1[2]; struct scatterlist sg2[1]; + int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; @@ -154,8 +163,11 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, sg1[1].length = 0; sg_set_buf(sg2, opad, bs + ds); - return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?: - crypto_hash_digest(&desc, sg2, bs + ds, out); + err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest); + if (unlikely(err)) + return err; + + return crypto_hash_digest(&desc, sg2, bs + ds, out); } static int hmac_init_tfm(struct crypto_tfm *tfm) From 65101355450df2d935f8d56ac3abef279f28a0e2 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 19 Sep 2006 08:59:23 -0700 Subject: [PATCH 1053/1063] [SCSI] aacraid: misc cleanup Received from Mark Salyzyn: Basically cleanup, nothing here will have an affect. Adjusting some error codes, removing superfluous definitions and code fragments. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 8 ++++---- drivers/scsi/aacraid/aacraid.h | 1 - drivers/scsi/aacraid/commsup.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 37c55ddce214..b14f7cac30e9 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1803,7 +1803,7 @@ static int query_disk(struct aac_dev *dev, void __user *arg) fsa_dev_ptr = dev->fsa_dev; if (!fsa_dev_ptr) - return -ENODEV; + return -EBUSY; if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk))) return -EFAULT; if (qd.cnum == -1) @@ -1842,6 +1842,8 @@ static int force_delete_disk(struct aac_dev *dev, void __user *arg) struct fsa_dev_info *fsa_dev_ptr; fsa_dev_ptr = dev->fsa_dev; + if (!fsa_dev_ptr) + return -EBUSY; if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk))) return -EFAULT; @@ -1866,9 +1868,7 @@ static int delete_disk(struct aac_dev *dev, void __user *arg) fsa_dev_ptr = dev->fsa_dev; if (!fsa_dev_ptr) - return -ENODEV; - if (!fsa_dev_ptr) - return -ENODEV; + return -EBUSY; if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk))) return -EFAULT; diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 8924c183d9c3..e5f7be6f3cd6 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1525,7 +1525,6 @@ struct aac_get_name { __le32 count; /* sizeof(((struct aac_get_name_resp *)NULL)->data) */ }; -#define CT_OK 218 struct aac_get_name_resp { __le32 dummy0; __le32 dummy1; diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 53add53be0bd..907161d6e92c 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -467,7 +467,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, dprintk((KERN_DEBUG " fib being sent=%p\n",fibptr)); if (!dev->queues) - return -ENODEV; + return -EBUSY; q = &dev->queues->queue[AdapNormCmdQueue]; if(wait) From 653ba58d55feb708c6f97e6f3e84901b3a03c9c0 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 19 Sep 2006 08:59:43 -0700 Subject: [PATCH 1054/1063] [SCSI] aacraid: expose physical devices Received from Mark Salyzyn: I am placing this functionality into an insmod parameter. Normally the physical components are exported to sg, and are blocked from showing up in sd. Note that the pass-through I/O path via the driver through the Firmware to the physical disks is not an optimized path, the card is designed for Hardware RAID, elevator sorting and caching. This should not be used as a means for utilizing the aacraid based controllers as a generic scsi/SATA/SAS controller, performance should suck by a few percentage points, any RAID meta-data on the drives will confuse the controller about who owns the drives and there is a high risk of destroying content in both directions. Unreliable and for experimentation or strange controlled circumstances only. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aachba.c | 6 +++++- drivers/scsi/aacraid/linit.c | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index b14f7cac30e9..ac108f9e2674 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -169,6 +169,10 @@ MODULE_PARM_DESC(numacb, "Request a limit to the number of adapter control block int acbsize = -1; module_param(acbsize, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware."); + +int expose_physicals = 0; +module_param(expose_physicals, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. 0=off, 1=on"); /** * aac_get_config_status - check the adapter configuration * @common: adapter to query @@ -1535,7 +1539,7 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) return 0; } } else { /* check for physical non-dasd devices */ - if(dev->nondasd_support == 1){ + if ((dev->nondasd_support == 1) || expose_physicals) { if (dev->in_reset) return -1; return aac_send_srb_fib(scsicmd); diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index d67058f80816..6e4eafa4ecee 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -82,6 +82,8 @@ static LIST_HEAD(aac_devices); static int aac_cfg_major = -1; char aac_driver_version[] = AAC_DRIVER_FULL_VERSION; +extern int expose_physicals; + /* * Because of the way Linux names scsi devices, the order in this table has * become important. Check for on-board Raid first, add-in cards second. @@ -394,6 +396,7 @@ static int aac_slave_configure(struct scsi_device *sdev) sdev->skip_ms_page_3f = 1; } if ((sdev->type == TYPE_DISK) && + !expose_physicals && (sdev_channel(sdev) != CONTAINER_CHANNEL)) { struct aac_dev *aac = (struct aac_dev *)sdev->host->hostdata; if (!aac->raid_scsi_mode || (sdev_channel(sdev) != 2)) @@ -928,7 +931,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev, * all containers are on the virtual channel 0 (CONTAINER_CHANNEL) * physical channels are address by their actual physical number+1 */ - if (aac->nondasd_support == 1) + if ((aac->nondasd_support == 1) || expose_physicals) shost->max_channel = aac->maximum_num_channels; else shost->max_channel = 0; From 76a7f8fdc0c2381ae1ba55ef71837712223ecb3c Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 19 Sep 2006 09:00:02 -0700 Subject: [PATCH 1055/1063] [SCSI] aacraid: merge rx and rkt code Received from Mark Salyzyn: The only real difference between the rkt and rx platform modules is the offset of the message registers. This patch recognizes this similarity and simplifies the driver to reduce it's code footprint and to improve maintainability by reducing the code duplication. Visibly, the 'rkt.c' portion of this patch looks more complicated than it really is. View it as retaining the rkt-only specifics of the interface. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/aacraid.h | 14 +- drivers/scsi/aacraid/comminit.c | 11 +- drivers/scsi/aacraid/linit.c | 12 +- drivers/scsi/aacraid/rkt.c | 467 ++------------------------------ drivers/scsi/aacraid/rx.c | 90 ++++-- drivers/scsi/aacraid/sa.c | 21 ++ 6 files changed, 115 insertions(+), 500 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index e5f7be6f3cd6..eb3ed91bac79 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -494,6 +494,7 @@ struct adapter_ops int (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4); int (*adapter_check_health)(struct aac_dev *dev); int (*adapter_send)(struct fib * fib); + int (*adapter_ioremap)(struct aac_dev * dev, u32 size); }; /* @@ -682,14 +683,6 @@ struct rx_inbound { __le32 Mailbox[8]; }; -#define InboundMailbox0 IndexRegs.Mailbox[0] -#define InboundMailbox1 IndexRegs.Mailbox[1] -#define InboundMailbox2 IndexRegs.Mailbox[2] -#define InboundMailbox3 IndexRegs.Mailbox[3] -#define InboundMailbox4 IndexRegs.Mailbox[4] -#define InboundMailbox5 IndexRegs.Mailbox[5] -#define InboundMailbox6 IndexRegs.Mailbox[6] - #define INBOUNDDOORBELL_0 0x00000001 #define INBOUNDDOORBELL_1 0x00000002 #define INBOUNDDOORBELL_2 0x00000004 @@ -1010,6 +1003,8 @@ struct aac_dev struct rx_registers __iomem *rx; struct rkt_registers __iomem *rkt; } regs; + volatile void __iomem *base; + volatile struct rx_inbound __iomem *IndexRegs; u32 OIMR; /* Mask Register Cache */ /* * AIF thread states @@ -1050,6 +1045,9 @@ struct aac_dev #define aac_adapter_send(fib) \ ((fib)->dev)->a_ops.adapter_send(fib) +#define aac_adapter_ioremap(dev, size) \ + (dev)->a_ops.adapter_ioremap(dev, size) + #define FIB_CONTEXT_FLAG_TIMED_OUT (0x00000001) /* diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 87a955096761..d5cf8b91a0e7 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -307,17 +307,12 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) if (status[1] & AAC_OPT_NEW_COMM) dev->new_comm_interface = dev->a_ops.adapter_send != 0; if (dev->new_comm_interface && (status[2] > dev->base_size)) { - iounmap(dev->regs.sa); + aac_adapter_ioremap(dev, 0); dev->base_size = status[2]; - dprintk((KERN_DEBUG "ioremap(%lx,%d)\n", - host->base, status[2])); - dev->regs.sa = ioremap(host->base, status[2]); - if (dev->regs.sa == NULL) { + if (aac_adapter_ioremap(dev, status[2])) { /* remap failed, go back ... */ dev->new_comm_interface = 0; - dev->regs.sa = ioremap(host->base, - AAC_MIN_FOOTPRINT_SIZE); - if (dev->regs.sa == NULL) { + if (aac_adapter_ioremap(dev, AAC_MIN_FOOTPRINT_SIZE)) { printk(KERN_WARNING "aacraid: unable to map adapter.\n"); return NULL; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 6e4eafa4ecee..359e7ddfdb47 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -867,13 +867,6 @@ static int __devinit aac_probe_one(struct pci_dev *pdev, * Map in the registers from the adapter. */ aac->base_size = AAC_MIN_FOOTPRINT_SIZE; - if ((aac->regs.sa = ioremap( - (unsigned long)aac->scsi_host_ptr->base, AAC_MIN_FOOTPRINT_SIZE)) - == NULL) { - printk(KERN_WARNING "%s: unable to map adapter.\n", - AAC_DRIVERNAME); - goto out_free_fibs; - } if ((*aac_drivers[index].init)(aac)) goto out_unmap; @@ -972,8 +965,7 @@ static int __devinit aac_probe_one(struct pci_dev *pdev, aac_fib_map_free(aac); pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys); kfree(aac->queues); - iounmap(aac->regs.sa); - out_free_fibs: + aac_adapter_ioremap(aac, 0); kfree(aac->fibs); kfree(aac->fsa_dev); out_free_host: @@ -1008,7 +1000,7 @@ static void __devexit aac_remove_one(struct pci_dev *pdev) kfree(aac->queues); free_irq(pdev->irq, aac); - iounmap(aac->regs.sa); + aac_adapter_ioremap(aac, 0); kfree(aac->fibs); kfree(aac->fsa_dev); diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c index f850c3a7cce9..643f23b5ded8 100644 --- a/drivers/scsi/aacraid/rkt.c +++ b/drivers/scsi/aacraid/rkt.c @@ -28,389 +28,27 @@ * */ -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include #include "aacraid.h" -static irqreturn_t aac_rkt_intr(int irq, void *dev_id, struct pt_regs *regs) +/** + * aac_rkt_ioremap + * @size: mapping resize request + * + */ +static int aac_rkt_ioremap(struct aac_dev * dev, u32 size) { - struct aac_dev *dev = dev_id; - - if (dev->new_comm_interface) { - u32 Index = rkt_readl(dev, MUnit.OutboundQueue); - if (Index == 0xFFFFFFFFL) - Index = rkt_readl(dev, MUnit.OutboundQueue); - if (Index != 0xFFFFFFFFL) { - do { - if (aac_intr_normal(dev, Index)) { - rkt_writel(dev, MUnit.OutboundQueue, Index); - rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady); - } - Index = rkt_readl(dev, MUnit.OutboundQueue); - } while (Index != 0xFFFFFFFFL); - return IRQ_HANDLED; - } - } else { - unsigned long bellbits; - u8 intstat; - intstat = rkt_readb(dev, MUnit.OISR); - /* - * Read mask and invert because drawbridge is reversed. - * This allows us to only service interrupts that have - * been enabled. - * Check to see if this is our interrupt. If it isn't just return - */ - if (intstat & ~(dev->OIMR)) - { - bellbits = rkt_readl(dev, OutboundDoorbellReg); - if (bellbits & DoorBellPrintfReady) { - aac_printf(dev, rkt_readl (dev, IndexRegs.Mailbox[5])); - rkt_writel(dev, MUnit.ODR,DoorBellPrintfReady); - rkt_writel(dev, InboundDoorbellReg,DoorBellPrintfDone); - } - else if (bellbits & DoorBellAdapterNormCmdReady) { - rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady); - aac_command_normal(&dev->queues->queue[HostNormCmdQueue]); -// rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady); - } - else if (bellbits & DoorBellAdapterNormRespReady) { - rkt_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady); - aac_response_normal(&dev->queues->queue[HostNormRespQueue]); - } - else if (bellbits & DoorBellAdapterNormCmdNotFull) { - rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull); - } - else if (bellbits & DoorBellAdapterNormRespNotFull) { - rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull); - rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull); - } - return IRQ_HANDLED; - } + if (!size) { + iounmap(dev->regs.rkt); + return 0; } - return IRQ_NONE; -} - -/** - * aac_rkt_disable_interrupt - Disable interrupts - * @dev: Adapter - */ - -static void aac_rkt_disable_interrupt(struct aac_dev *dev) -{ - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff); -} - -/** - * rkt_sync_cmd - send a command and wait - * @dev: Adapter - * @command: Command to execute - * @p1: first parameter - * @ret: adapter status - * - * This routine will send a synchronous command to the adapter and wait - * for its completion. - */ - -static int rkt_sync_cmd(struct aac_dev *dev, u32 command, - u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, - u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4) -{ - unsigned long start; - int ok; - /* - * Write the command into Mailbox 0 - */ - rkt_writel(dev, InboundMailbox0, command); - /* - * Write the parameters into Mailboxes 1 - 6 - */ - rkt_writel(dev, InboundMailbox1, p1); - rkt_writel(dev, InboundMailbox2, p2); - rkt_writel(dev, InboundMailbox3, p3); - rkt_writel(dev, InboundMailbox4, p4); - /* - * Clear the synch command doorbell to start on a clean slate. - */ - rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0); - /* - * Disable doorbell interrupts - */ - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff); - /* - * Force the completion of the mask register write before issuing - * the interrupt. - */ - rkt_readb (dev, MUnit.OIMR); - /* - * Signal that there is a new synch command - */ - rkt_writel(dev, InboundDoorbellReg, INBOUNDDOORBELL_0); - - ok = 0; - start = jiffies; - - /* - * Wait up to 30 seconds - */ - while (time_before(jiffies, start+30*HZ)) - { - udelay(5); /* Delay 5 microseconds to let Mon960 get info. */ - /* - * Mon960 will set doorbell0 bit when it has completed the command. - */ - if (rkt_readl(dev, OutboundDoorbellReg) & OUTBOUNDDOORBELL_0) { - /* - * Clear the doorbell. - */ - rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0); - ok = 1; - break; - } - /* - * Yield the processor in case we are slow - */ - msleep(1); - } - if (ok != 1) { - /* - * Restore interrupt mask even though we timed out - */ - if (dev->new_comm_interface) - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7); - else - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb); - return -ETIMEDOUT; - } - /* - * Pull the synch status from Mailbox 0. - */ - if (status) - *status = rkt_readl(dev, IndexRegs.Mailbox[0]); - if (r1) - *r1 = rkt_readl(dev, IndexRegs.Mailbox[1]); - if (r2) - *r2 = rkt_readl(dev, IndexRegs.Mailbox[2]); - if (r3) - *r3 = rkt_readl(dev, IndexRegs.Mailbox[3]); - if (r4) - *r4 = rkt_readl(dev, IndexRegs.Mailbox[4]); - /* - * Clear the synch command doorbell. - */ - rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0); - /* - * Restore interrupt mask - */ - if (dev->new_comm_interface) - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7); - else - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb); - return 0; - -} - -/** - * aac_rkt_interrupt_adapter - interrupt adapter - * @dev: Adapter - * - * Send an interrupt to the i960 and breakpoint it. - */ - -static void aac_rkt_interrupt_adapter(struct aac_dev *dev) -{ - rkt_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0, - NULL, NULL, NULL, NULL, NULL); -} - -/** - * aac_rkt_notify_adapter - send an event to the adapter - * @dev: Adapter - * @event: Event to send - * - * Notify the i960 that something it probably cares about has - * happened. - */ - -static void aac_rkt_notify_adapter(struct aac_dev *dev, u32 event) -{ - switch (event) { - - case AdapNormCmdQue: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_1); - break; - case HostNormRespNotFull: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_4); - break; - case AdapNormRespQue: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_2); - break; - case HostNormCmdNotFull: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3); - break; - case HostShutdown: -// rkt_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0, -// NULL, NULL, NULL, NULL, NULL); - break; - case FastIo: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6); - break; - case AdapPrintfDone: - rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_5); - break; - default: - BUG(); - break; - } -} - -/** - * aac_rkt_start_adapter - activate adapter - * @dev: Adapter - * - * Start up processing on an i960 based AAC adapter - */ - -static void aac_rkt_start_adapter(struct aac_dev *dev) -{ - struct aac_init *init; - - init = dev->init; - init->HostElapsedSeconds = cpu_to_le32(get_seconds()); - // We can only use a 32 bit address here - rkt_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa, - 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL); -} - -/** - * aac_rkt_check_health - * @dev: device to check if healthy - * - * Will attempt to determine if the specified adapter is alive and - * capable of handling requests, returning 0 if alive. - */ -static int aac_rkt_check_health(struct aac_dev *dev) -{ - u32 status = rkt_readl(dev, MUnit.OMRx[0]); - - /* - * Check to see if the board failed any self tests. - */ - if (status & SELF_TEST_FAILED) + dev->base = dev->regs.rkt = ioremap(dev->scsi_host_ptr->base, size); + if (dev->base == NULL) return -1; - /* - * Check to see if the board panic'd. - */ - if (status & KERNEL_PANIC) { - char * buffer; - struct POSTSTATUS { - __le32 Post_Command; - __le32 Post_Address; - } * post; - dma_addr_t paddr, baddr; - int ret; - - if ((status & 0xFF000000L) == 0xBC000000L) - return (status >> 16) & 0xFF; - buffer = pci_alloc_consistent(dev->pdev, 512, &baddr); - ret = -2; - if (buffer == NULL) - return ret; - post = pci_alloc_consistent(dev->pdev, - sizeof(struct POSTSTATUS), &paddr); - if (post == NULL) { - pci_free_consistent(dev->pdev, 512, buffer, baddr); - return ret; - } - memset(buffer, 0, 512); - post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS); - post->Post_Address = cpu_to_le32(baddr); - rkt_writel(dev, MUnit.IMRx[0], paddr); - rkt_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, 0, 0, 0, 0, 0, - NULL, NULL, NULL, NULL, NULL); - pci_free_consistent(dev->pdev, sizeof(struct POSTSTATUS), - post, paddr); - if ((buffer[0] == '0') && ((buffer[1] == 'x') || (buffer[1] == 'X'))) { - ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10); - ret <<= 4; - ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10); - } - pci_free_consistent(dev->pdev, 512, buffer, baddr); - return ret; - } - /* - * Wait for the adapter to be up and running. - */ - if (!(status & KERNEL_UP_AND_RUNNING)) - return -3; - /* - * Everything is OK - */ - return 0; -} - -/** - * aac_rkt_send - * @fib: fib to issue - * - * Will send a fib, returning 0 if successful. - */ -static int aac_rkt_send(struct fib * fib) -{ - u64 addr = fib->hw_fib_pa; - struct aac_dev *dev = fib->dev; - volatile void __iomem *device = dev->regs.rkt; - u32 Index; - - dprintk((KERN_DEBUG "%p->aac_rkt_send(%p->%llx)\n", dev, fib, addr)); - Index = rkt_readl(dev, MUnit.InboundQueue); - if (Index == 0xFFFFFFFFL) - Index = rkt_readl(dev, MUnit.InboundQueue); - dprintk((KERN_DEBUG "Index = 0x%x\n", Index)); - if (Index == 0xFFFFFFFFL) - return Index; - device += Index; - dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff), - (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size))); - writel((u32)(addr & 0xffffffff), device); - device += sizeof(u32); - writel((u32)(addr >> 32), device); - device += sizeof(u32); - writel(le16_to_cpu(fib->hw_fib->header.Size), device); - rkt_writel(dev, MUnit.InboundQueue, Index); - dprintk((KERN_DEBUG "aac_rkt_send - return 0\n")); - return 0; -} - -static int aac_rkt_restart_adapter(struct aac_dev *dev) -{ - u32 var; - - printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", - dev->name, dev->id); - - if (aac_rkt_check_health(dev) <= 0) - return 1; - if (rkt_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0, - &var, NULL, NULL, NULL, NULL)) - return 1; - if (var != 0x00000001) - return 1; - if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) - return 1; + dev->IndexRegs = &dev->regs.rkt->IndexRegs; return 0; } @@ -425,74 +63,18 @@ static int aac_rkt_restart_adapter(struct aac_dev *dev) int aac_rkt_init(struct aac_dev *dev) { - unsigned long start; - unsigned long status; - int instance; - const char * name; + int retval; + extern int _aac_rx_init(struct aac_dev *dev); + extern void aac_rx_start_adapter(struct aac_dev *dev); - instance = dev->id; - name = dev->name; - - /* - * Check to see if the board panic'd while booting. - */ - if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) - if (aac_rkt_restart_adapter(dev)) - goto error_iounmap; - /* - * Check to see if the board failed any self tests. - */ - if (rkt_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) { - printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance); - goto error_iounmap; - } - /* - * Check to see if the monitor panic'd while booting. - */ - if (rkt_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) { - printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance); - goto error_iounmap; - } - start = jiffies; - /* - * Wait for the adapter to be up and running. Wait up to 3 minutes - */ - while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING)) - { - if(time_after(jiffies, start+startup_timeout*HZ)) - { - status = rkt_readl(dev, MUnit.OMRx[0]); - printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", - dev->name, instance, status); - goto error_iounmap; - } - msleep(1); - } - if (request_irq(dev->scsi_host_ptr->irq, aac_rkt_intr, IRQF_SHARED|IRQF_DISABLED, "aacraid", (void *)dev)<0) - { - printk(KERN_ERR "%s%d: Interrupt unavailable.\n", name, instance); - goto error_iounmap; - } /* * Fill in the function dispatch table. */ - dev->a_ops.adapter_interrupt = aac_rkt_interrupt_adapter; - dev->a_ops.adapter_disable_int = aac_rkt_disable_interrupt; - dev->a_ops.adapter_notify = aac_rkt_notify_adapter; - dev->a_ops.adapter_sync_cmd = rkt_sync_cmd; - dev->a_ops.adapter_check_health = aac_rkt_check_health; - dev->a_ops.adapter_send = aac_rkt_send; + dev->a_ops.adapter_ioremap = aac_rkt_ioremap; - /* - * First clear out all interrupts. Then enable the one's that we - * can handle. - */ - rkt_writeb(dev, MUnit.OIMR, 0xff); - rkt_writel(dev, MUnit.ODR, 0xffffffff); - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb); - - if (aac_init_adapter(dev) == NULL) - goto error_irq; + retval = _aac_rx_init(dev); + if (retval) + return retval; if (dev->new_comm_interface) { /* * FIB Setup has already been done, but we can minimize the @@ -509,20 +91,11 @@ int aac_rkt_init(struct aac_dev *dev) dev->init->MaxIoCommands = cpu_to_le32(246); dev->scsi_host_ptr->can_queue = 246 - AAC_NUM_MGT_FIB; } - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7); } /* * Tell the adapter that all is configured, and it can start * accepting requests */ - aac_rkt_start_adapter(dev); + aac_rx_start_adapter(dev); return 0; - -error_irq: - rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff); - free_irq(dev->scsi_host_ptr->irq, (void *)dev); - -error_iounmap: - - return -1; } diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c index c715c4b2442d..a1d214d770eb 100644 --- a/drivers/scsi/aacraid/rx.c +++ b/drivers/scsi/aacraid/rx.c @@ -79,7 +79,7 @@ static irqreturn_t aac_rx_intr(int irq, void *dev_id, struct pt_regs *regs) { bellbits = rx_readl(dev, OutboundDoorbellReg); if (bellbits & DoorBellPrintfReady) { - aac_printf(dev, rx_readl (dev, IndexRegs.Mailbox[5])); + aac_printf(dev, readl (&dev->IndexRegs->Mailbox[5])); rx_writel(dev, MUnit.ODR,DoorBellPrintfReady); rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone); } @@ -134,14 +134,14 @@ static int rx_sync_cmd(struct aac_dev *dev, u32 command, /* * Write the command into Mailbox 0 */ - rx_writel(dev, InboundMailbox0, command); + writel(command, &dev->IndexRegs->Mailbox[0]); /* * Write the parameters into Mailboxes 1 - 6 */ - rx_writel(dev, InboundMailbox1, p1); - rx_writel(dev, InboundMailbox2, p2); - rx_writel(dev, InboundMailbox3, p3); - rx_writel(dev, InboundMailbox4, p4); + writel(p1, &dev->IndexRegs->Mailbox[1]); + writel(p2, &dev->IndexRegs->Mailbox[2]); + writel(p3, &dev->IndexRegs->Mailbox[3]); + writel(p4, &dev->IndexRegs->Mailbox[4]); /* * Clear the synch command doorbell to start on a clean slate. */ @@ -199,15 +199,15 @@ static int rx_sync_cmd(struct aac_dev *dev, u32 command, * Pull the synch status from Mailbox 0. */ if (status) - *status = rx_readl(dev, IndexRegs.Mailbox[0]); + *status = readl(&dev->IndexRegs->Mailbox[0]); if (r1) - *r1 = rx_readl(dev, IndexRegs.Mailbox[1]); + *r1 = readl(&dev->IndexRegs->Mailbox[1]); if (r2) - *r2 = rx_readl(dev, IndexRegs.Mailbox[2]); + *r2 = readl(&dev->IndexRegs->Mailbox[2]); if (r3) - *r3 = rx_readl(dev, IndexRegs.Mailbox[3]); + *r3 = readl(&dev->IndexRegs->Mailbox[3]); if (r4) - *r4 = rx_readl(dev, IndexRegs.Mailbox[4]); + *r4 = readl(&dev->IndexRegs->Mailbox[4]); /* * Clear the synch command doorbell. */ @@ -261,8 +261,6 @@ static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event) rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3); break; case HostShutdown: -// rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0, -// NULL, NULL, NULL, NULL, NULL); break; case FastIo: rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6); @@ -283,7 +281,7 @@ static void aac_rx_notify_adapter(struct aac_dev *dev, u32 event) * Start up processing on an i960 based AAC adapter */ -static void aac_rx_start_adapter(struct aac_dev *dev) +void aac_rx_start_adapter(struct aac_dev *dev) { struct aac_init *init; @@ -381,7 +379,7 @@ static int aac_rx_send(struct fib * fib) dprintk((KERN_DEBUG "Index = 0x%x\n", Index)); if (Index == 0xFFFFFFFFL) return Index; - device += Index; + device = dev->base + Index; dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff), (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size))); writel((u32)(addr & 0xffffffff), device); @@ -394,6 +392,24 @@ static int aac_rx_send(struct fib * fib) return 0; } +/** + * aac_rx_ioremap + * @size: mapping resize request + * + */ +static int aac_rx_ioremap(struct aac_dev * dev, u32 size) +{ + if (!size) { + iounmap(dev->regs.rx); + return 0; + } + dev->base = dev->regs.rx = ioremap(dev->scsi_host_ptr->base, size); + if (dev->base == NULL) + return -1; + dev->IndexRegs = &dev->regs.rx->IndexRegs; + return 0; +} + static int aac_rx_restart_adapter(struct aac_dev *dev) { u32 var; @@ -422,7 +438,7 @@ static int aac_rx_restart_adapter(struct aac_dev *dev) * to the comm region. */ -int aac_rx_init(struct aac_dev *dev) +int _aac_rx_init(struct aac_dev *dev) { unsigned long start; unsigned long status; @@ -432,23 +448,30 @@ int aac_rx_init(struct aac_dev *dev) instance = dev->id; name = dev->name; + if (aac_adapter_ioremap(dev, dev->base_size)) { + printk(KERN_WARNING "%s: unable to map adapter.\n", name); + goto error_iounmap; + } + /* * Check to see if the board panic'd while booting. */ - if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) + status = rx_readl(dev, MUnit.OMRx[0]); + if (status & KERNEL_PANIC) if (aac_rx_restart_adapter(dev)) goto error_iounmap; /* * Check to see if the board failed any self tests. */ - if (rx_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) { + status = rx_readl(dev, MUnit.OMRx[0]); + if (status & SELF_TEST_FAILED) { printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance); goto error_iounmap; } /* * Check to see if the monitor panic'd while booting. */ - if (rx_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) { + if (status & MONITOR_PANIC) { printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance); goto error_iounmap; } @@ -456,12 +479,10 @@ int aac_rx_init(struct aac_dev *dev) /* * Wait for the adapter to be up and running. Wait up to 3 minutes */ - while ((!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING)) - || (!(rx_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))) + while (!((status = rx_readl(dev, MUnit.OMRx[0])) & KERNEL_UP_AND_RUNNING)) { if(time_after(jiffies, start+startup_timeout*HZ)) { - status = rx_readl(dev, IndexRegs.Mailbox[7]); printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", dev->name, instance, status); goto error_iounmap; @@ -496,11 +517,6 @@ int aac_rx_init(struct aac_dev *dev) if (dev->new_comm_interface) rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7); - /* - * Tell the adapter that all is configured, and it can start - * accepting requests - */ - aac_rx_start_adapter(dev); return 0; error_irq: @@ -511,3 +527,23 @@ error_iounmap: return -1; } + +int aac_rx_init(struct aac_dev *dev) +{ + int retval; + + /* + * Fill in the function dispatch table. + */ + dev->a_ops.adapter_ioremap = aac_rx_ioremap; + + retval = _aac_rx_init(dev); + if (!retval) { + /* + * Tell the adapter that all is configured, and it can + * start accepting requests + */ + aac_rx_start_adapter(dev); + } + return retval; +} diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c index cd586cc8f9be..f906ead239dd 100644 --- a/drivers/scsi/aacraid/sa.c +++ b/drivers/scsi/aacraid/sa.c @@ -280,6 +280,21 @@ static int aac_sa_check_health(struct aac_dev *dev) return 0; } +/** + * aac_sa_ioremap + * @size: mapping resize request + * + */ +static int aac_sa_ioremap(struct aac_dev * dev, u32 size) +{ + if (!size) { + iounmap(dev->regs.sa); + return 0; + } + dev->base = dev->regs.sa = ioremap(dev->scsi_host_ptr->base, size); + return (dev->base == NULL) ? -1 : 0; +} + /** * aac_sa_init - initialize an ARM based AAC card * @dev: device to configure @@ -299,6 +314,11 @@ int aac_sa_init(struct aac_dev *dev) instance = dev->id; name = dev->name; + if (aac_sa_ioremap(dev, dev->base_size)) { + printk(KERN_WARNING "%s: unable to map adapter.\n", name); + goto error_iounmap; + } + /* * Check to see if the board failed any self tests. */ @@ -341,6 +361,7 @@ int aac_sa_init(struct aac_dev *dev) dev->a_ops.adapter_notify = aac_sa_notify_adapter; dev->a_ops.adapter_sync_cmd = sa_sync_cmd; dev->a_ops.adapter_check_health = aac_sa_check_health; + dev->a_ops.adapter_ioremap = aac_sa_ioremap; /* * First clear out all interrupts. Then enable the one's that From ac5826ca91243272f97b3f01e80d71e3618f105f Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 19 Sep 2006 09:00:18 -0700 Subject: [PATCH 1056/1063] [SCSI] aacraid: remove scsi_remove_device Received from Mark Salyzyn: Until the system is stabilized, I am suggesting the enclosed modification to prevent the driver from tickling the panic. Once sysfs and friends are stabilized, the patch may be backed out. We have yet to evaluate if we really want to relinquish existing Scsi Devices in any case, holding on to them as configuration of arrays comes and goes makes some sense as well. As a result, we have opted to pull the lines rather than comment them in legacy. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- drivers/scsi/aacraid/commsup.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 907161d6e92c..8734a045558e 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1033,13 +1033,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) if (device) { switch (device_config_needed) { case DELETE: - scsi_remove_device(device); - break; case CHANGE: - if (!dev->fsa_dev[container].valid) { - scsi_remove_device(device); - break; - } scsi_rescan_device(&device->sdev_gendev); default: From 2538363eb51a70948ed36bf3971a728268d10766 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Tue, 19 Sep 2006 09:00:39 -0700 Subject: [PATCH 1057/1063] [SCSI] aacraid: README update Received from Mark Salyzyn: This patch to the driver's documentation adds a few new product entries, sorts the entries on OEM lines first for easy searching, followed by product id order to make it easier to compare against the open source pci list. The driver has 'family match' so is somewhat future proof, no code changes are required to recognize the new products. Signed-off-by: Mark Haverkamp Signed-off-by: James Bottomley --- Documentation/scsi/aacraid.txt | 77 ++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index be55670851a4..ee03678c8029 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt @@ -11,38 +11,43 @@ the original). Supported Cards/Chipsets ------------------------- PCI ID (pci.ids) OEM Product - 9005:0285:9005:028a Adaptec 2020ZCR (Skyhawk) - 9005:0285:9005:028e Adaptec 2020SA (Skyhawk) - 9005:0285:9005:028b Adaptec 2025ZCR (Terminator) - 9005:0285:9005:028f Adaptec 2025SA (Terminator) - 9005:0285:9005:0286 Adaptec 2120S (Crusader) - 9005:0286:9005:028d Adaptec 2130S (Lancer) - 9005:0285:9005:0285 Adaptec 2200S (Vulcan) - 9005:0285:9005:0287 Adaptec 2200S (Vulcan-2m) - 9005:0286:9005:028c Adaptec 2230S (Lancer) - 9005:0286:9005:028c Adaptec 2230SLP (Lancer) - 9005:0285:9005:0296 Adaptec 2240S (SabreExpress) - 9005:0285:9005:0290 Adaptec 2410SA (Jaguar) - 9005:0285:9005:0293 Adaptec 21610SA (Corsair-16) - 9005:0285:103c:3227 Adaptec 2610SA (Bearcat HP release) - 9005:0285:9005:0292 Adaptec 2810SA (Corsair-8) - 9005:0285:9005:0294 Adaptec Prowler - 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release) - 9005:0286:9005:029c Adaptec 2620SA (Intruder) - 9005:0286:9005:029b Adaptec 2820SA (Intruder) - 9005:0286:9005:02a7 Adaptec 2830SA (Skyray) - 9005:0286:9005:02a8 Adaptec 2430SA (Skyray) - 9005:0285:9005:0288 Adaptec 3230S (Harrier) - 9005:0285:9005:0289 Adaptec 3240S (Tornado) - 9005:0285:9005:0298 Adaptec 4000SAS (BlackBird) - 9005:0285:9005:0297 Adaptec 4005SAS (AvonPark) - 9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X) - 9005:0285:9005:029a Adaptec 4805SAS (Marauder-E) - 9005:0286:9005:02a2 Adaptec 3800SAS (Hurricane44) - 1011:0046:9005:0364 Adaptec 5400S (Mustang) - 1011:0046:9005:0365 Adaptec 5400S (Mustang) 9005:0283:9005:0283 Adaptec Catapult (3210S with arc firmware) 9005:0284:9005:0284 Adaptec Tomcat (3410S with arc firmware) + 9005:0285:9005:0285 Adaptec 2200S (Vulcan) + 9005:0285:9005:0286 Adaptec 2120S (Crusader) + 9005:0285:9005:0287 Adaptec 2200S (Vulcan-2m) + 9005:0285:9005:0288 Adaptec 3230S (Harrier) + 9005:0285:9005:0289 Adaptec 3240S (Tornado) + 9005:0285:9005:028a Adaptec 2020ZCR (Skyhawk) + 9005:0285:9005:028b Adaptec 2025ZCR (Terminator) + 9005:0286:9005:028c Adaptec 2230S (Lancer) + 9005:0286:9005:028c Adaptec 2230SLP (Lancer) + 9005:0286:9005:028d Adaptec 2130S (Lancer) + 9005:0285:9005:028e Adaptec 2020SA (Skyhawk) + 9005:0285:9005:028f Adaptec 2025SA (Terminator) + 9005:0285:9005:0290 Adaptec 2410SA (Jaguar) + 9005:0285:103c:3227 Adaptec 2610SA (Bearcat HP release) + 9005:0285:9005:0293 Adaptec 21610SA (Corsair-16) + 9005:0285:9005:0296 Adaptec 2240S (SabreExpress) + 9005:0285:9005:0292 Adaptec 2810SA (Corsair-8) + 9005:0285:9005:0294 Adaptec Prowler + 9005:0285:9005:0297 Adaptec 4005SAS (AvonPark) + 9005:0285:9005:0298 Adaptec 4000SAS (BlackBird) + 9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X) + 9005:0285:9005:029a Adaptec 4805SAS (Marauder-E) + 9005:0286:9005:029b Adaptec 2820SA (Intruder) + 9005:0286:9005:029c Adaptec 2620SA (Intruder) + 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release) + 9005:0286:9005:02a2 Adaptec 3800SAS (Hurricane44) + 9005:0286:9005:02a7 Adaptec 3805SAS (Hurricane80) + 9005:0286:9005:02a8 Adaptec 3400SAS (Hurricane40) + 9005:0286:9005:02ac Adaptec 1800SAS (Typhoon44) + 9005:0286:9005:02b3 Adaptec 2400SAS (Hurricane40lm) + 9005:0285:9005:02b5 Adaptec ASR5800 (Voodoo44) + 9005:0285:9005:02b6 Adaptec ASR5805 (Voodoo80) + 9005:0285:9005:02b7 Adaptec ASR5808 (Voodoo08) + 1011:0046:9005:0364 Adaptec 5400S (Mustang) + 1011:0046:9005:0365 Adaptec 5400S (Mustang) 9005:0287:9005:0800 Adaptec Themisto (Jupiter) 9005:0200:9005:0200 Adaptec Themisto (Jupiter) 9005:0286:9005:0800 Adaptec Callisto (Jupiter) @@ -64,18 +69,20 @@ Supported Cards/Chipsets 9005:0285:9005:0290 IBM ServeRAID 7t (Jaguar) 9005:0285:1014:02F2 IBM ServeRAID 8i (AvonPark) 9005:0285:1014:0312 IBM ServeRAID 8i (AvonParkLite) - 9005:0286:1014:9580 IBM ServeRAID 8k/8k-l8 (Aurora) 9005:0286:1014:9540 IBM ServeRAID 8k/8k-l4 (AuroraLite) - 9005:0286:9005:029f ICP ICP9014R0 (Lancer) + 9005:0286:1014:9580 IBM ServeRAID 8k/8k-l8 (Aurora) + 9005:0286:1014:034d IBM ServeRAID 8s (Hurricane) 9005:0286:9005:029e ICP ICP9024R0 (Lancer) + 9005:0286:9005:029f ICP ICP9014R0 (Lancer) 9005:0286:9005:02a0 ICP ICP9047MA (Lancer) 9005:0286:9005:02a1 ICP ICP9087MA (Lancer) + 9005:0286:9005:02a3 ICP ICP5445AU (Hurricane44) 9005:0286:9005:02a4 ICP ICP9085LI (Marauder-X) 9005:0286:9005:02a5 ICP ICP5085BR (Marauder-E) - 9005:0286:9005:02a3 ICP ICP5445AU (Hurricane44) 9005:0286:9005:02a6 ICP ICP9067MA (Intruder-6) - 9005:0286:9005:02a9 ICP ICP5087AU (Skyray) - 9005:0286:9005:02aa ICP ICP5047AU (Skyray) + 9005:0286:9005:02a9 ICP ICP5085AU (Hurricane80) + 9005:0286:9005:02aa ICP ICP5045AU (Hurricane40) + 9005:0286:9005:02b4 ICP ICP5045AL (Hurricane40lm) People ------------------------- From 1a68d41a334a406d4bd35999f0be4d47f193e477 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 18 Sep 2006 19:18:34 +0100 Subject: [PATCH 1058/1063] [SCSI] eata_pio cleanup and PCI fix This started as a PCI reference fixup but to do that I need to build it, to build it I need to fix it and its full of 32bitisms and uglies. It has been resurrected, I'm not sure if this is a thank you for the work on the license stuff or punishment for some unknown misdeed however 8). I've also fixed a memory scribble in the init code. One oddity - the changes from HZ * to constants are deliberate. Whoever originally wrote the code (or cleaned it up) used HZ for a cycle timing loop even though is not HZ related. I've put it back to the counts used in the old days when the driver was most used. Signed-off-by: Alan Cox Signed-off-by: James Bottomley --- drivers/scsi/eata_generic.h | 1 + drivers/scsi/eata_pio.c | 127 +++++++++++++++++++----------------- 2 files changed, 69 insertions(+), 59 deletions(-) diff --git a/drivers/scsi/eata_generic.h b/drivers/scsi/eata_generic.h index 34bce2c9e92e..635c14861f86 100644 --- a/drivers/scsi/eata_generic.h +++ b/drivers/scsi/eata_generic.h @@ -364,6 +364,7 @@ typedef struct hstd { __u8 moresupport; /* HBA supports MORE flag */ struct Scsi_Host *next; struct Scsi_Host *prev; + struct pci_dev *pdev; /* PCI device or NULL for non PCI */ struct eata_sp sp; /* status packet */ struct eata_ccb ccb[0]; /* ccb array begins here */ }hostdata; diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c index 771b01984cbc..d312633db92b 100644 --- a/drivers/scsi/eata_pio.c +++ b/drivers/scsi/eata_pio.c @@ -71,11 +71,11 @@ #include "eata_pio.h" -static uint ISAbases[MAXISA] = { +static unsigned int ISAbases[MAXISA] = { 0x1F0, 0x170, 0x330, 0x230 }; -static uint ISAirqs[MAXISA] = { +static unsigned int ISAirqs[MAXISA] = { 14, 12, 15, 11 }; @@ -84,7 +84,7 @@ static unsigned char EISAbases[] = { 1, 1, 1, 1, 1, 1, 1, 1 }; -static uint registered_HBAs; +static unsigned int registered_HBAs; static struct Scsi_Host *last_HBA; static struct Scsi_Host *first_HBA; static unsigned char reg_IRQ[16]; @@ -165,6 +165,7 @@ static int eata_pio_proc_info(struct Scsi_Host *shost, char *buffer, char **star static int eata_pio_release(struct Scsi_Host *sh) { + hostdata *hd = SD(sh); if (sh->irq && reg_IRQ[sh->irq] == 1) free_irq(sh->irq, NULL); else @@ -173,10 +174,13 @@ static int eata_pio_release(struct Scsi_Host *sh) if (sh->io_port && sh->n_io_port) release_region(sh->io_port, sh->n_io_port); } + /* At this point the PCI reference can go */ + if (hd->pdev) + pci_dev_put(hd->pdev); return 1; } -static void IncStat(struct scsi_pointer *SCp, uint Increment) +static void IncStat(struct scsi_pointer *SCp, unsigned int Increment) { SCp->ptr += Increment; if ((SCp->this_residual -= Increment) == 0) { @@ -190,46 +194,49 @@ static void IncStat(struct scsi_pointer *SCp, uint Increment) } } -static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs); +static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs); static irqreturn_t do_eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) { unsigned long flags; struct Scsi_Host *dev = dev_id; + irqreturn_t ret; spin_lock_irqsave(dev->host_lock, flags); - eata_pio_int_handler(irq, dev_id, regs); + ret = eata_pio_int_handler(irq, dev_id, regs); spin_unlock_irqrestore(dev->host_lock, flags); - return IRQ_HANDLED; + return ret; } -static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) { - uint eata_stat = 0xfffff; + unsigned int eata_stat = 0xfffff; struct scsi_cmnd *cmd; hostdata *hd; struct eata_ccb *cp; - uint base; - uint x, z; + unsigned long base; + unsigned int x, z; struct Scsi_Host *sh; unsigned short zwickel = 0; unsigned char stat, odd; + irqreturn_t ret = IRQ_NONE; for (x = 1, sh = first_HBA; x <= registered_HBAs; x++, sh = SD(sh)->prev) { if (sh->irq != irq) continue; - if (inb((uint) sh->base + HA_RSTATUS) & HA_SBUSY) + if (inb(sh->base + HA_RSTATUS) & HA_SBUSY) continue; int_counter++; + ret = IRQ_HANDLED; hd = SD(sh); cp = &hd->ccb[0]; cmd = cp->cmd; - base = (uint) cmd->device->host->base; + base = cmd->device->host->base; do { stat = inb(base + HA_RSTATUS); @@ -304,7 +311,7 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) if (!(inb(base + HA_RSTATUS) & HA_SERROR)) { cmd->result = (DID_OK << 16); hd->devflags |= (1 << cp->cp_id); - } else if (hd->devflags & 1 << cp->cp_id) + } else if (hd->devflags & (1 << cp->cp_id)) cmd->result = (DID_OK << 16) + 0x02; else cmd->result = (DID_NO_CONNECT << 16); @@ -313,7 +320,7 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) cp->status = FREE; eata_stat = inb(base + HA_RSTATUS); printk(KERN_CRIT "eata_pio: int_handler, freeing locked " "queueslot\n"); - return; + return ret; } #if DBG_INTR2 if (stat != 0x50) @@ -325,12 +332,12 @@ static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs) cmd->scsi_done(cmd); } - return; + return ret; } -static inline uint eata_pio_send_command(uint base, unsigned char command) +static inline unsigned int eata_pio_send_command(unsigned long base, unsigned char command) { - uint loop = HZ / 2; + unsigned int loop = 50; while (inb(base + HA_RSTATUS) & HA_SBUSY) if (--loop == 0) @@ -349,8 +356,8 @@ static inline uint eata_pio_send_command(uint base, unsigned char command) static int eata_pio_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { - uint x, y; - uint base; + unsigned int x, y; + unsigned long base; hostdata *hd; struct Scsi_Host *sh; @@ -360,7 +367,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd, hd = HD(cmd); sh = cmd->device->host; - base = (uint) sh->base; + base = sh->base; /* use only slot 0, as 2001 can handle only one cmd at a time */ @@ -395,9 +402,9 @@ static int eata_pio_queue(struct scsi_cmnd *cmd, cp->DataIn = 0; /* Input mode */ cp->Interpret = (cmd->device->id == hd->hostid); - cp->cp_datalen = htonl((unsigned long) cmd->request_bufflen); + cp->cp_datalen = cpu_to_be32(cmd->request_bufflen); cp->Auto_Req_Sen = 0; - cp->cp_reqDMA = htonl(0); + cp->cp_reqDMA = 0; cp->reqlen = 0; cp->cp_id = cmd->device->id; @@ -406,7 +413,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd, cp->cp_identify = 1; memcpy(cp->cp_cdb, cmd->cmnd, COMMAND_SIZE(*cmd->cmnd)); - cp->cp_statDMA = htonl(0); + cp->cp_statDMA = 0; cp->cp_viraddr = cp; cp->cmd = cmd; @@ -445,14 +452,14 @@ static int eata_pio_queue(struct scsi_cmnd *cmd, DBG(DBG_QUEUE, scmd_printk(KERN_DEBUG, cmd, "Queued base %#.4lx pid: %ld " - "slot %d irq %d\n", (long) sh->base, cmd->pid, y, sh->irq)); + "slot %d irq %d\n", sh->base, cmd->pid, y, sh->irq)); return (0); } static int eata_pio_abort(struct scsi_cmnd *cmd) { - uint loop = HZ; + unsigned int loop = 100; DBG(DBG_ABNORM, scmd_printk(KERN_WARNING, cmd, "eata_pio_abort called pid: %ld\n", @@ -485,7 +492,7 @@ static int eata_pio_abort(struct scsi_cmnd *cmd) static int eata_pio_host_reset(struct scsi_cmnd *cmd) { - uint x, limit = 0; + unsigned int x, limit = 0; unsigned char success = 0; struct scsi_cmnd *sp; struct Scsi_Host *host = cmd->device->host; @@ -518,7 +525,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd) } /* hard reset the HBA */ - outb(EATA_CMD_RESET, (uint) cmd->device->host->base + HA_WCOMMAND); + outb(EATA_CMD_RESET, cmd->device->host->base + HA_WCOMMAND); DBG(DBG_ABNORM, printk(KERN_WARNING "eata_pio_reset: board reset done.\n")); HD(cmd)->state = RESET; @@ -558,7 +565,7 @@ static int eata_pio_host_reset(struct scsi_cmnd *cmd) } } -static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned long cplen, unsigned short cppadlen) +static char *get_pio_board_data(unsigned long base, unsigned int irq, unsigned int id, unsigned long cplen, unsigned short cppadlen) { struct eata_ccb cp; static char buff[256]; @@ -570,8 +577,8 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned cp.DataIn = 1; cp.Interpret = 1; /* Interpret command */ - cp.cp_datalen = htonl(254); - cp.cp_dataDMA = htonl(0); + cp.cp_datalen = cpu_to_be32(254); + cp.cp_dataDMA = cpu_to_be32(0); cp.cp_id = id; cp.cp_lun = 0; @@ -583,7 +590,7 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned cp.cp_cdb[4] = 254; cp.cp_cdb[5] = 0; - if (eata_pio_send_command((uint) base, EATA_CMD_PIO_SEND_CP)) + if (eata_pio_send_command(base, EATA_CMD_PIO_SEND_CP)) return (NULL); while (!(inb(base + HA_RSTATUS) & HA_SDRQ)); outsw(base + HA_RDATA, &cp, cplen); @@ -604,7 +611,7 @@ static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned } } -static int get_pio_conf_PIO(u32 base, struct get_conf *buf) +static int get_pio_conf_PIO(unsigned long base, struct get_conf *buf) { unsigned long loop = HZ / 2; int z; @@ -619,30 +626,30 @@ static int get_pio_conf_PIO(u32 base, struct get_conf *buf) if (--loop == 0) goto fail; - DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#x\n", base)); + DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#lx\n", base)); eata_pio_send_command(base, EATA_CMD_PIO_READ_CONFIG); - loop = HZ / 2; + loop = 50; for (p = (unsigned short *) buf; (long) p <= ((long) buf + (sizeof(struct get_conf) / 2)); p++) { while (!(inb(base + HA_RSTATUS) & HA_SDRQ)) if (--loop == 0) goto fail; - loop = HZ / 2; + loop = 50; *p = inw(base + HA_RDATA); } if (inb(base + HA_RSTATUS) & HA_SERROR) { DBG(DBG_PROBE, printk("eata_dma: get_conf_PIO, error during " - "transfer for HBA at %x\n", base)); + "transfer for HBA at %lx\n", base)); goto fail; } - if (htonl(EATA_SIGNATURE) != buf->signature) + if (cpu_to_be32(EATA_SIGNATURE) != buf->signature) goto fail; DBG(DBG_PIO && DBG_PROBE, printk(KERN_NOTICE "EATA Controller found " - "at %#4x EATA Level: %x\n", - base, (uint) (buf->version))); + "at %#4lx EATA Level: %x\n", + base, (unsigned int) (buf->version))); while (inb(base + HA_RSTATUS) & HA_SDRQ) inw(base + HA_RDATA); @@ -665,12 +672,12 @@ static int get_pio_conf_PIO(u32 base, struct get_conf *buf) static void print_pio_config(struct get_conf *gc) { printk("Please check values: (read config data)\n"); - printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", (uint) ntohl(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support); - printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], ntohs(gc->queuesiz), ntohs(gc->SGsiz), gc->SECOND); + printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", be32_to_cpu(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support); + printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], be16_to_cpu(gc->queuesiz), be16_to_cpu(gc->SGsiz), gc->SECOND); printk("IRQ:%d IRQT:%d FORCADR:%d MCH:%d RIDQ:%d\n", gc->IRQ, gc->IRQ_TR, gc->FORCADR, gc->MAX_CHAN, gc->ID_qest); } -static uint print_selftest(uint base) +static unsigned int print_selftest(unsigned int base) { unsigned char buffer[512]; #ifdef VERBOSE_SETUP @@ -697,7 +704,7 @@ static uint print_selftest(uint base) return (!(inb(base + HA_RSTATUS) & HA_SERROR)); } -static int register_pio_HBA(long base, struct get_conf *gc) +static int register_pio_HBA(long base, struct get_conf *gc, struct pci_dev *pdev) { unsigned long size = 0; char *buff; @@ -714,17 +721,17 @@ static int register_pio_HBA(long base, struct get_conf *gc) return 0; } - if ((buff = get_pio_board_data((uint) base, gc->IRQ, gc->scsi_id[3], cplen = (htonl(gc->cplen) + 1) / 2, cppadlen = (htons(gc->cppadlen) + 1) / 2)) == NULL) { - printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", (unsigned long) base); + if ((buff = get_pio_board_data(base, gc->IRQ, gc->scsi_id[3], cplen = (cpu_to_be32(gc->cplen) + 1) / 2, cppadlen = (cpu_to_be16(gc->cppadlen) + 1) / 2)) == NULL) { + printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", base); return 0; } if (!print_selftest(base) && !ALLOW_DMA_BOARDS) { - printk("HBA at %#lx failed while performing self test & setup.\n", (unsigned long) base); + printk("HBA at %#lx failed while performing self test & setup.\n", base); return 0; } - size = sizeof(hostdata) + (sizeof(struct eata_ccb) * ntohs(gc->queuesiz)); + size = sizeof(hostdata) + (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz)); sh = scsi_register(&driver_template, size); if (sh == NULL) @@ -749,8 +756,8 @@ static int register_pio_HBA(long base, struct get_conf *gc) hd = SD(sh); - memset(hd->ccb, 0, (sizeof(struct eata_ccb) * ntohs(gc->queuesiz))); - memset(hd->reads, 0, sizeof(unsigned long) * 26); + memset(hd->ccb, 0, (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz))); + memset(hd->reads, 0, sizeof(hd->reads)); strlcpy(SD(sh)->vendor, &buff[8], sizeof(SD(sh)->vendor)); strlcpy(SD(sh)->name, &buff[16], sizeof(SD(sh)->name)); @@ -761,7 +768,7 @@ static int register_pio_HBA(long base, struct get_conf *gc) SD(sh)->revision[4] = buff[35]; SD(sh)->revision[5] = 0; - switch (ntohl(gc->len)) { + switch (be32_to_cpu(gc->len)) { case 0x1c: SD(sh)->EATA_revision = 'a'; break; @@ -777,7 +784,7 @@ static int register_pio_HBA(long base, struct get_conf *gc) SD(sh)->EATA_revision = '?'; } - if (ntohl(gc->len) >= 0x22) { + if (be32_to_cpu(gc->len) >= 0x22) { if (gc->is_PCI) hd->bustype = IS_PCI; else if (gc->is_EISA) @@ -811,6 +818,8 @@ static int register_pio_HBA(long base, struct get_conf *gc) hd->channel = 0; + hd->pdev = pci_dev_get(pdev); /* Keep a PCI reference */ + sh->max_id = 8; sh->max_lun = 8; @@ -841,7 +850,7 @@ static void find_pio_ISA(struct get_conf *buf) continue; if (!get_pio_conf_PIO(ISAbases[i], buf)) continue; - if (!register_pio_HBA(ISAbases[i], buf)) + if (!register_pio_HBA(ISAbases[i], buf, NULL)) release_region(ISAbases[i], 9); else ISAbases[i] = 0; @@ -873,7 +882,7 @@ static void find_pio_EISA(struct get_conf *buf) if (get_pio_conf_PIO(base, buf)) { DBG(DBG_PROBE && DBG_EISA, print_pio_config(buf)); if (buf->IRQ) { - if (!register_pio_HBA(base, buf)) + if (!register_pio_HBA(base, buf, NULL)) release_region(base, 9); } else { printk(KERN_NOTICE "eata_dma: No valid IRQ. HBA " "removed from list\n"); @@ -896,9 +905,9 @@ static void find_pio_PCI(struct get_conf *buf) printk("eata_dma: kernel PCI support not enabled. Skipping scan for PCI HBAs.\n"); #else struct pci_dev *dev = NULL; - u32 base, x; + unsigned long base, x; - while ((dev = pci_find_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) { + while ((dev = pci_get_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) { DBG(DBG_PROBE && DBG_PCI, printk("eata_pio: find_PCI, HBA at %s\n", pci_name(dev))); if (pci_enable_device(dev)) continue; @@ -926,7 +935,7 @@ static void find_pio_PCI(struct get_conf *buf) * eventually remove it from the EISA and ISA list */ - if (!register_pio_HBA(base, buf)) { + if (!register_pio_HBA(base, buf, dev)) { release_region(base, 9); continue; } @@ -976,12 +985,12 @@ static int eata_pio_detect(struct scsi_host_template *tpnt) printk("Registered HBAs:\n"); printk("HBA no. Boardtype: Revis: EATA: Bus: BaseIO: IRQ: Ch: ID: Pr:" " QS: SG: CPL:\n"); for (i = 1; i <= registered_HBAs; i++) { - printk("scsi%-2d: %.10s v%s 2.0%c %s %#.4x %2d %d %d %c" + printk("scsi%-2d: %.10s v%s 2.0%c %s %#.4lx %2d %d %d %c" " %2d %2d %2d\n", HBA_ptr->host_no, SD(HBA_ptr)->name, SD(HBA_ptr)->revision, SD(HBA_ptr)->EATA_revision, (SD(HBA_ptr)->bustype == 'P') ? "PCI " : (SD(HBA_ptr)->bustype == 'E') ? "EISA" : "ISA ", - (uint) HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id, + HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id, SD(HBA_ptr)->primary ? 'Y' : 'N', HBA_ptr->can_queue, HBA_ptr->sg_tablesize, HBA_ptr->cmd_per_lun); HBA_ptr = SD(HBA_ptr)->next; From a07f353701acae77e023f6270e8af353b37af7c4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 15 Sep 2006 15:34:32 +0100 Subject: [PATCH 1059/1063] [SCSI] Switch some more scsi drivers to pci_get_device and refcounted pci structures Signed-off-by: Alan Cox Signed-off-by: James Bottomley --- drivers/scsi/BusLogic.c | 15 ++++++++------- drivers/scsi/aic7xxx_old.c | 9 +++++++-- drivers/scsi/dpt_i2o.c | 6 +++++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 59d1adaed73e..4ea49fd7965e 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -662,7 +662,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd particular standard ISA I/O Address need not be probed. */ PrimaryProbeInfo->IO_Address = 0; - while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) { + while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) { struct BusLogic_HostAdapter *HostAdapter = PrototypeHostAdapter; struct BusLogic_PCIHostAdapterInformation PCIHostAdapterInformation; enum BusLogic_ISACompatibleIOPort ModifyIOAddressRequest; @@ -762,7 +762,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd PrimaryProbeInfo->Bus = Bus; PrimaryProbeInfo->Device = Device; PrimaryProbeInfo->IRQ_Channel = IRQ_Channel; - PrimaryProbeInfo->PCI_Device = PCI_Device; + PrimaryProbeInfo->PCI_Device = pci_dev_get(PCI_Device); PCIMultiMasterCount++; } else if (BusLogic_ProbeInfoCount < BusLogic_MaxHostAdapters) { struct BusLogic_ProbeInfo *ProbeInfo = &BusLogic_ProbeInfoList[BusLogic_ProbeInfoCount++]; @@ -773,7 +773,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd ProbeInfo->Bus = Bus; ProbeInfo->Device = Device; ProbeInfo->IRQ_Channel = IRQ_Channel; - ProbeInfo->PCI_Device = PCI_Device; + ProbeInfo->PCI_Device = pci_dev_get(PCI_Device); NonPrimaryPCIMultiMasterCount++; PCIMultiMasterCount++; } else @@ -823,7 +823,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd noting the PCI bus location and assigned IRQ Channel. */ PCI_Device = NULL; - while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) { + while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) { unsigned char Bus; unsigned char Device; unsigned int IRQ_Channel; @@ -850,7 +850,7 @@ static int __init BusLogic_InitializeMultiMasterProbeInfo(struct BusLogic_HostAd ProbeInfo->Bus = Bus; ProbeInfo->Device = Device; ProbeInfo->IRQ_Channel = IRQ_Channel; - ProbeInfo->PCI_Device = PCI_Device; + ProbeInfo->PCI_Device = pci_dev_get(PCI_Device); break; } } @@ -874,7 +874,7 @@ static int __init BusLogic_InitializeFlashPointProbeInfo(struct BusLogic_HostAda /* Interrogate PCI Configuration Space for any FlashPoint Host Adapters. */ - while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) { + while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) { unsigned char Bus; unsigned char Device; unsigned int IRQ_Channel; @@ -923,7 +923,7 @@ static int __init BusLogic_InitializeFlashPointProbeInfo(struct BusLogic_HostAda ProbeInfo->Bus = Bus; ProbeInfo->Device = Device; ProbeInfo->IRQ_Channel = IRQ_Channel; - ProbeInfo->PCI_Device = PCI_Device; + ProbeInfo->PCI_Device = pci_dev_get(PCI_Device); FlashPointCount++; } else BusLogic_Warning("BusLogic: Too many Host Adapters " "detected\n", NULL); @@ -1890,6 +1890,7 @@ static void BusLogic_ReleaseResources(struct BusLogic_HostAdapter *HostAdapter) */ if (HostAdapter->MailboxSpace) pci_free_consistent(HostAdapter->PCI_Device, HostAdapter->MailboxSize, HostAdapter->MailboxSpace, HostAdapter->MailboxSpaceHandle); + pci_dev_put(HostAdapter->PCI_Device); HostAdapter->MailboxSpace = NULL; HostAdapter->MailboxSpaceHandle = 0; HostAdapter->MailboxSize = 0; diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index ba3bccafe113..5dcef48d414f 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -9194,7 +9194,7 @@ aic7xxx_detect(struct scsi_host_template *template) for (i = 0; i < ARRAY_SIZE(aic_pdevs); i++) { pdev = NULL; - while ((pdev = pci_find_device(aic_pdevs[i].vendor_id, + while ((pdev = pci_get_device(aic_pdevs[i].vendor_id, aic_pdevs[i].device_id, pdev))) { if (pci_enable_device(pdev)) @@ -9651,6 +9651,9 @@ aic7xxx_detect(struct scsi_host_template *template) */ aic7xxx_configure_bugs(temp_p); + /* Hold a pci device reference */ + pci_dev_get(temp_p->pdev); + if ( list_p == NULL ) { list_p = current_p = temp_p; @@ -10987,8 +10990,10 @@ aic7xxx_release(struct Scsi_Host *host) if(!p->pdev) release_region(p->base, MAXREG - MINREG); #ifdef CONFIG_PCI - else + else { pci_release_regions(p->pdev); + pci_dev_put(p->pdev); + } #endif prev = NULL; next = first_aic7xxx; diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index 45806336ce02..7b3bd34faf47 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -184,7 +184,7 @@ static int adpt_detect(struct scsi_host_template* sht) PINFO("Detecting Adaptec I2O RAID controllers...\n"); /* search for all Adatpec I2O RAID cards */ - while ((pDev = pci_find_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) { + while ((pDev = pci_get_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) { if(pDev->device == PCI_DPT_DEVICE_ID || pDev->device == PCI_DPT_RAPTOR_DEVICE_ID){ if(adpt_install_hba(sht, pDev) ){ @@ -192,8 +192,11 @@ static int adpt_detect(struct scsi_host_template* sht) PERROR("Will not try to detect others.\n"); return hba_count-1; } + pci_dev_get(pDev); } } + if (pDev) + pci_dev_put(pDev); /* In INIT state, Activate IOPs */ for (pHba = hba_chain; pHba; pHba = pHba->next) { @@ -1075,6 +1078,7 @@ static void adpt_i2o_delete_hba(adpt_hba* pHba) } } } + pci_dev_put(pHba->pDev); kfree(pHba); if(hba_count <= 0){ From e17ba8f51f69782abc4575dd30848ba5eafaa797 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 23 Sep 2006 18:31:42 -0700 Subject: [PATCH 1060/1063] [SPARC64]: Fix sched_clock() wrapping every ~17 seconds. Unfortunately, sparc64 doesn't have an easy way to do a "64 X 64 --> 128" bit multiply like PowerPC and IA64 do. We were doing a "64 X 64 --> 64" bit multiple which causes overflow very quickly with a 30-bit quotient shift. So use a quotientshift count of 10 instead of 30, just like x86 and ARM do. This also fixes the wrapping of printk timestamp values every ~17 seconds. Signed-off-by: David S. Miller --- arch/sparc64/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 094d3e35be18..b0b4feeec098 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -983,7 +983,7 @@ static struct time_interpolator sparc64_cpu_interpolator = { }; /* The quotient formula is taken from the IA64 port. */ -#define SPARC64_NSEC_PER_CYC_SHIFT 30UL +#define SPARC64_NSEC_PER_CYC_SHIFT 10UL void __init time_init(void) { unsigned long clock = sparc64_init_timers(); From 6973dddee264723720e18ad2be5a0a454c0f52d9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 23 Sep 2006 18:32:38 -0700 Subject: [PATCH 1061/1063] [SPARC64]: Update defconfig. Signed-off-by: David S. Miller --- arch/sparc64/defconfig | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 43d9229fca07..51cf6027b701 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-rc2 -# Fri Jul 21 14:19:24 2006 +# Linux kernel version: 2.6.18 +# Sat Sep 23 18:32:19 2006 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -9,6 +9,7 @@ CONFIG_64BIT=y CONFIG_MMU=y CONFIG_TIME_INTERPOLATION=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_AUDIT_ARCH=y CONFIG_SPARC64_PAGE_SIZE_8KB=y # CONFIG_SPARC64_PAGE_SIZE_64KB is not set # CONFIG_SPARC64_PAGE_SIZE_512KB is not set @@ -37,14 +38,14 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set -CONFIG_SYSCTL=y # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set CONFIG_RELAY=y CONFIG_INITRAMFS_SOURCE="" -CONFIG_UID16=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -53,12 +54,12 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y -CONFIG_RT_MUTEXES=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -169,6 +170,7 @@ CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m +# CONFIG_XFRM_SUB_POLICY is not set CONFIG_NET_KEY=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -214,11 +216,15 @@ CONFIG_IPV6_ROUTE_INFO=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +# CONFIG_IPV6_MIP6 is not set CONFIG_INET6_XFRM_TUNNEL=m CONFIG_INET6_TUNNEL=m CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set CONFIG_IPV6_TUNNEL=m +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set @@ -233,6 +239,7 @@ CONFIG_IP_DCCP_ACKVEC=y # DCCP CCIDs Configuration (EXPERIMENTAL) # CONFIG_IP_DCCP_CCID2=m +# CONFIG_IP_DCCP_CCID2_DEBUG is not set CONFIG_IP_DCCP_CCID3=m CONFIG_IP_DCCP_TFRC_LIB=m @@ -259,7 +266,6 @@ CONFIG_VLAN_8021Q=m # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -1386,6 +1392,10 @@ CONFIG_KEYS=y # Cryptographic options # CONFIG_CRYPTO=y +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_MD4=y @@ -1395,9 +1405,12 @@ CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_CAST5=m From dfdc58ba354adb80d67c99f7be84f95a8e02e466 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 20 Sep 2006 12:00:18 -0400 Subject: [PATCH 1062/1063] [SCSI] SPI transport class: misc DV fixes Key more of the domain validation settings off the inquiry data from the disk (in particular, don't try IU or DT unless the disk claims to support them. Also add a new dv_in_progress flag to prevent recursive DV. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_spi.c | 30 +++++++++++++++++++++++------- include/scsi/scsi_transport_spi.h | 3 ++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index 29a9a53cdd1a..9f070f0d0f2b 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -47,6 +47,7 @@ /* Private data accessors (keep these out of the header file) */ #define spi_dv_pending(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_pending) +#define spi_dv_in_progress(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_in_progress) #define spi_dv_mutex(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_mutex) struct spi_internal { @@ -240,6 +241,7 @@ static int spi_setup_transport_attrs(struct transport_container *tc, spi_pcomp_en(starget) = 0; spi_hold_mcs(starget) = 0; spi_dv_pending(starget) = 0; + spi_dv_in_progress(starget) = 0; spi_initial_dv(starget) = 0; mutex_init(&spi_dv_mutex(starget)); @@ -830,28 +832,37 @@ spi_dv_device_internal(struct scsi_device *sdev, u8 *buffer) DV_SET(period, spi_min_period(starget)); /* try QAS requests; this should be harmless to set if the * target supports it */ - if (scsi_device_qas(sdev)) + if (scsi_device_qas(sdev)) { DV_SET(qas, 1); - /* Also try IU transfers */ - if (scsi_device_ius(sdev)) + } else { + DV_SET(qas, 0); + } + + if (scsi_device_ius(sdev) && spi_min_period(starget) < 9) { + /* This u320 (or u640). Set IU transfers */ DV_SET(iu, 1); - if (spi_min_period(starget) < 9) { - /* This u320 (or u640). Ignore the coupled parameters - * like DT and IU, but set the optional ones */ + /* Then set the optional parameters */ DV_SET(rd_strm, 1); DV_SET(wr_flow, 1); DV_SET(rti, 1); if (spi_min_period(starget) == 8) DV_SET(pcomp_en, 1); + } else { + DV_SET(iu, 0); } + /* now that we've done all this, actually check the bus * signal type (if known). Some devices are stupid on * a SE bus and still claim they can try LVD only settings */ if (i->f->get_signalling) i->f->get_signalling(shost); if (spi_signalling(shost) == SPI_SIGNAL_SE || - spi_signalling(shost) == SPI_SIGNAL_HVD) + spi_signalling(shost) == SPI_SIGNAL_HVD || + !scsi_device_dt(sdev)) { DV_SET(dt, 0); + } else { + DV_SET(dt, 1); + } /* Do the read only INQUIRY tests */ spi_dv_retrain(sdev, buffer, buffer + sdev->inquiry_len, spi_dv_device_compare_inquiry); @@ -907,6 +918,10 @@ spi_dv_device(struct scsi_device *sdev) if (unlikely(scsi_device_get(sdev))) return; + if (unlikely(spi_dv_in_progress(starget))) + return; + spi_dv_in_progress(starget) = 1; + buffer = kzalloc(len, GFP_KERNEL); if (unlikely(!buffer)) @@ -938,6 +953,7 @@ spi_dv_device(struct scsi_device *sdev) out_free: kfree(buffer); out_put: + spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); } EXPORT_SYMBOL(spi_dv_device); diff --git a/include/scsi/scsi_transport_spi.h b/include/scsi/scsi_transport_spi.h index 302680c0c0de..da180f738477 100644 --- a/include/scsi/scsi_transport_spi.h +++ b/include/scsi/scsi_transport_spi.h @@ -53,7 +53,8 @@ struct spi_transport_attrs { unsigned int support_ius; /* support Information Units */ unsigned int support_qas; /* supports quick arbitration and selection */ /* Private Fields */ - unsigned int dv_pending:1; /* Internal flag */ + unsigned int dv_pending:1; /* Internal flag: DV Requested */ + unsigned int dv_in_progress:1; /* Internal: DV started */ struct mutex dv_mutex; /* semaphore to serialise dv */ }; From 1b73c4bb063c4aa0cdc25425809bb87f65ee75af Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 23 Sep 2006 22:07:20 -0500 Subject: [PATCH 1063/1063] [SCSI] scsi_transport_fc: fixup netlink arguments nlmsg_multicast now takes an extra allocation flag, so add it to the use in the fibre channel transport class. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 4ab176ed480d..38c215a78f69 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -480,7 +480,8 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number, event->event_code = event_code; event->event_data = event_data; - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); if (err && (err != -ESRCH)) /* filter no recipient errors */ /* nlmsg_multicast already kfree_skb'd */ goto send_fail; @@ -554,7 +555,8 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, event->event_code = FCH_EVT_VENDOR_UNIQUE; memcpy(&event->event_data, data_buf, data_len); - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS); + err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); if (err && (err != -ESRCH)) /* filter no recipient errors */ /* nlmsg_multicast already kfree_skb'd */ goto send_vendor_fail;