From f322727b92957ccd4b0366dbd42908613222033a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 01/33] x86: update .gitignore entries vdso / vsycall create .so.dbg files now. Add *.so.dbg to the main .ignore file Exclude the compile time created boot directory in arch/x86_64 as well Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- .gitignore | 1 + arch/x86_64/.gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 arch/x86_64/.gitignore diff --git a/.gitignore b/.gitignore index 22fb8fa9bc3d..8d14531846b9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ *.s *.ko *.so +*.so.dbg *.mod.c *.i *.lst diff --git a/arch/x86_64/.gitignore b/arch/x86_64/.gitignore new file mode 100644 index 000000000000..36ef4c374d25 --- /dev/null +++ b/arch/x86_64/.gitignore @@ -0,0 +1 @@ +boot From 957ff882f9db86e017f730e77322ec5193178e5e Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 02/33] x86, vsyscall: fix the oops crash with __pa_vsymbol() Appended patch fixes an oops while changing the vsyscall sysctl. I am sure no one tested this code before integrating into mainline :( BTW, using ioremap() in vsyscall_sysctl_change() to get the virtual address of a kernel symbol sounds like an over kill.. I wonder if we can define a simple __va_vsymbol() which will return directly the kernel direct mapping. comments in the code which says gcc has trouble with __va(__pa()) sounds bogus to me. __pa() on a vsyscall address will not work anyhow :( And also, the whole nop out syscall in vsyscall page infrastructure (vsyscall_sysctl_change()) is added to make some attacks difficult, and yet I don't see this nop out being done by default. This area requires more cleanups? Fix an oops with __pa_vsymbol(). VSYSCALL_FIRST_PAGE is a fixmap index. We want the starting virtual address of the vsyscall page and not the index. [ mingo: arch/x86 adaptation ] Reported-by: Yanmin Zhang Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 585541ca1a7e..4a2c340ab0f3 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -48,7 +48,7 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : From 3ceba7815cfc0b5d4c2bef5bb58e0c766da63549 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 03/33] x86: use relative symlink for bzImage Use relative symlinks so we can refer bzImage from different tree structures aka via NFS. Moved the creation of directory + symlink after a successful build of the boot parts. Signed-off-by: Sam Ravnborg Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 4 ++-- arch/x86_64/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/i386/Makefile b/arch/i386/Makefile index f036d2dee3de..d82f11de60ce 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -131,9 +131,9 @@ all: bzImage zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage zImage bzImage: vmlinux - $(Q)mkdir -p $(objtree)/arch/i386/boot - $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/i386/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage compressed: zImage diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 03e1ede27b85..f48d862397fb 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -97,9 +97,9 @@ BOOTIMAGE := arch/x86/boot/bzImage KBUILD_IMAGE := $(BOOTIMAGE) bzImage: vmlinux - $(Q)mkdir -p $(objtree)/arch/x86_64/boot - $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage $(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE) + $(Q)mkdir -p $(objtree)/arch/x86_64/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage bzlilo: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo From 54ffaa45c5f572ff6c344ca583137d0edf2d78cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 04/33] x86: fix CONFIG_NUMA and nosmp | maxcpus=0/1 crash x86 NUMA kernels crash in the scheduler setup code if "nosmp" or "maxcpus=0" is passed on the boot command line: | Brought up 1 CPUs | BUG: unable to handle kernel NULL pointer dereference at virtual address 00000000 | printing eip: c011f0b5 *pde = 00000000 | Oops: 0000 [#1] SMP | | Pid: 1, comm: swapper Not tainted (2.6.23 #67) | EIP: 0060:[] EFLAGS: 00010246 CPU: 0 | EIP is at sd_degenerate+0x35/0x40 the reason is sloppy spaghetti code in smpboot_32.c that resulted in a missing map_cpu_to_logical_apicid() call - which also had the side-effect of setting up the cpu_2_node[] entry for the lone CPU. That resulted in node_to_cpumask(0) resulting in 00000000 - confusing the sched-domains setup code. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index be3faac04719..65e5de7d64db 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -1008,6 +1008,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); return; @@ -1029,6 +1030,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) } smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); return; From 0387f4511e05a5cba8570e47cd1c7b7cc7875787 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 18 Oct 2007 15:26:51 -0400 Subject: [PATCH 05/33] GEODE: use symbolic constant in cs5536 reboot fixup Simple cosmetic update for the cs5536 reboot fixup; define the MSR that's used for rebooting in geode.h, and use the define. Signed-off-by: Andres Salomon Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot_fixups_32.c | 8 +++----- include/asm-x86/geode.h | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 8b30b26ad069..1a07bbea7be3 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -12,6 +12,7 @@ #include #include #include +#include static void cs5530a_warm_reset(struct pci_dev *dev) { @@ -24,11 +25,8 @@ static void cs5530a_warm_reset(struct pci_dev *dev) static void cs5536_warm_reset(struct pci_dev *dev) { - /* - * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) - * writing 1 to the LSB of this MSR causes a hard reset. - */ - wrmsrl(0x51400017, 1ULL); + /* writing 1 to the LSB of this MSR causes a hard reset */ + wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL); udelay(50); /* shouldn't get here but be safe and spin a while */ } diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index d94898831bac..771af336734f 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -38,6 +38,8 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F +#define MSR_DIVIL_SOFT_RESET 0x51400017 + #define MSR_PIC_YSEL_LOW 0x51400020 #define MSR_PIC_YSEL_HIGH 0x51400021 #define MSR_PIC_ZSEL_LOW 0x51400022 From 06b4f2a51c02ad6190b569b67ac493659d9df68c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 06/33] x86: move cpufreq Kconfigs to the same directory Move the 64bit Kconfig file to arch/x86/kernel/cpu/cpufreq, so we can unify them. Signed-off-by: Thomas Gleixner --- arch/i386/Kconfig | 2 +- arch/x86/kernel/cpu/cpufreq/{Kconfig => Kconfig_32} | 0 arch/x86/kernel/{cpufreq/Kconfig => cpu/cpufreq/Kconfig_64} | 2 +- arch/x86_64/Kconfig | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename arch/x86/kernel/cpu/cpufreq/{Kconfig => Kconfig_32} (100%) rename arch/x86/kernel/{cpufreq/Kconfig => cpu/cpufreq/Kconfig_64} (97%) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b84d5050e92e..ffcbbba806e0 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1080,7 +1080,7 @@ config APM_REAL_MODE_POWER_OFF endif # APM -source "arch/x86/kernel/cpu/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig_32" endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_32 similarity index 100% rename from arch/x86/kernel/cpu/cpufreq/Kconfig rename to arch/x86/kernel/cpu/cpufreq/Kconfig_32 diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_64 similarity index 97% rename from arch/x86/kernel/cpufreq/Kconfig rename to arch/x86/kernel/cpu/cpufreq/Kconfig_64 index a3fd51926cbd..9c9699fdcf52 100644 --- a/arch/x86/kernel/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_64 @@ -19,7 +19,7 @@ config X86_POWERNOW_K8 To compile this driver as a module, choose M here: the module will be called powernow-k8. - For details, take a look at . + For details, take a look at . If in doubt, say N. diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 78cb68f2ebbd..712ebd9c5d6a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -723,7 +723,7 @@ config ARCH_HIBERNATION_HEADER source "drivers/acpi/Kconfig" -source "arch/x86/kernel/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig_64" endmenu From 096708dcf71de6ab5f839ae68ee42275673dc178 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Thu, 18 Oct 2007 16:58:58 +0200 Subject: [PATCH 07/33] Kconfig: Missing line breaks in arch/x86_64/Kconfig The helptext for IA32_EMULATION in arch/x86_64/Kconfig is wider than 80 chars, thus failing to be displayed in 80x24 screens. This patch re-breaks lines. Signed-off-by: Adrian Knoth --- arch/x86_64/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 712ebd9c5d6a..ecfd4b18a200 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -766,9 +766,9 @@ source "fs/Kconfig.binfmt" config IA32_EMULATION bool "IA32 Emulation" help - Include code to run 32-bit programs under a 64-bit kernel. You should likely - turn this on, unless you're 100% sure that you don't have any 32-bit programs - left. + Include code to run 32-bit programs under a 64-bit kernel. You should + likely turn this on, unless you're 100% sure that you don't have any + 32-bit programs left. config IA32_AOUT tristate "IA32 a.out support" From af93ebc0b3ed8cdf93a6ed4bc1fab548f8059d0a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 08/33] x86: remove page_fault_trace Old debugging code that is not really needed anymore. If someone wants it it would be better replaced with a systemtap script or kprobe. This avoids a potential cache miss during page fault processing. [ mingo: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault_64.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 5e0e54906c48..af79cf60866c 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -285,7 +285,6 @@ static int vmalloc_fault(unsigned long address) return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; /* @@ -354,10 +353,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -621,10 +616,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); From 7778887880d278c23dc0975210df0381c878ae1e Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 09/33] x86: merge init_task_32/64.c Merge init_task_32/64.c. Move 64bit per cpu data orig_ist to setup64.c. [ mingo: fixed checkpatch trivialities. ] Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/i386/Makefile | 2 +- arch/x86/kernel/Makefile_32 | 2 +- arch/x86/kernel/Makefile_64 | 2 +- .../kernel/{init_task_32.c => init_task.c} | 11 ++-- arch/x86/kernel/init_task_64.c | 54 ------------------- arch/x86/kernel/setup64.c | 6 +++ arch/x86_64/Makefile | 2 +- 7 files changed, 16 insertions(+), 63 deletions(-) rename arch/x86/kernel/{init_task_32.c => init_task.c} (79%) delete mode 100644 arch/x86/kernel/init_task_64.c diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d82f11de60ce..b88e47ca3032 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -102,7 +102,7 @@ core-$(CONFIG_XEN) += arch/x86/xen/ # default subarch .h files mflags-y += -Iinclude/asm-x86/mach-default -head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o +head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ core-y += arch/x86/kernel/ \ diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index a3fa11f8f460..69990f0ba86e 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_32.o init_task_32.o vmlinux.lds +extra-y := head_32.o init_task.o vmlinux.lds obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 43da66213a47..2fc3d3f2c27e 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_64.o head64.o init_task_64.o vmlinux.lds +extra-y := head_64.o head64.o init_task.o vmlinux.lds EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c similarity index 79% rename from arch/x86/kernel/init_task_32.c rename to arch/x86/kernel/init_task.c index d26fc063a760..468c9c437842 100644 --- a/arch/x86/kernel/init_task_32.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); - EXPORT_SYMBOL(init_mm); /* @@ -25,7 +24,7 @@ EXPORT_SYMBOL(init_mm); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union +union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; @@ -35,12 +34,14 @@ union thread_union init_thread_union * All other task structs will be allocated on slabs in fork.c */ struct task_struct init_task = INIT_TASK(init_task); - EXPORT_SYMBOL(init_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c deleted file mode 100644 index 4ff33d4f8551..000000000000 --- a/arch/x86/kernel/init_task_64.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial task structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - -/* Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index ba9188235057..e11886e8d8ca 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -184,6 +184,12 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index f48d862397fb..6d89ab762ffc 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -74,7 +74,7 @@ KBUILD_CFLAGS += $(cflags-y) CFLAGS_KERNEL += $(cflags-kernel-y) KBUILD_AFLAGS += -m64 -head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task_64.o +head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ core-y += arch/x86/kernel/ \ From 95d1b8f98138b1300bfecd8fbf3a93e10e74dc5f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 10/33] x86: Use linux/elfcore-compat.h This makes x86-64's ia32 code use the new linux/elfcore-compat.h, reducing some hand-copied duplication. Signed-off-by: Roland McGrath Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Andi Kleen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/ia32/ia32_binfmt.c | 124 +++++++++++++----------------------- include/asm-x86/compat.h | 6 ++ 2 files changed, 52 insertions(+), 78 deletions(-) diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c index 5027650eb273..55822d2cf053 100644 --- a/arch/x86/ia32/ia32_binfmt.c +++ b/arch/x86/ia32/ia32_binfmt.c @@ -5,10 +5,6 @@ * This tricks binfmt_elf.c into loading 32bit binaries using lots * of ugly preprocessor tricks. Talk about very very poor man's inheritance. */ -#define __ASM_X86_64_ELF_H 1 - -#undef ELF_CLASS -#define ELF_CLASS ELFCLASS32 #include #include @@ -19,6 +15,7 @@ #include #include #include +#include #include #include @@ -31,6 +28,20 @@ #include #include +#undef ELF_ARCH +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#define ELF_ARCH EM_386 + +#undef elfhdr +#undef elf_phdr +#undef elf_note +#undef elf_addr_t +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_addr_t Elf32_Off + #define ELF_NAME "elf/i386" #define AT_SYSINFO 32 @@ -48,74 +59,20 @@ int sysctl_vsyscall32 = 1; } while(0) struct file; -struct elf_phdr; #define IA32_EMULATOR 1 +#undef ELF_ET_DYN_BASE + #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) -#undef ELF_ARCH -#define ELF_ARCH EM_386 - -#define ELF_DATA ELFDATA2LSB - -#define USE_ELF_CORE_DUMP 1 - -/* Override elfcore.h */ -#define _LINUX_ELFCORE_H 1 -typedef unsigned int elf_greg_t; - -#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; - -struct elf_siginfo -{ - int si_signo; /* signal number */ - int si_code; /* extra code */ - int si_errno; /* errno */ -}; - #define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) -struct elf_prstatus -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - unsigned int pr_sigpend; /* Set of pending signals */ - unsigned int pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define ELF_PRARGSZ (80) /* Number of chars for args */ - -struct elf_prpsinfo -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - unsigned int pr_flag; /* flags */ - __u16 pr_uid; - __u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - #define _GET_SEG(x) \ ({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; }) /* Assumes current==process to be dumped */ +#undef ELF_CORE_COPY_REGS #define ELF_CORE_COPY_REGS(pr_reg, regs) \ pr_reg[0] = regs->rbx; \ pr_reg[1] = regs->rcx; \ @@ -135,36 +92,41 @@ struct elf_prpsinfo pr_reg[15] = regs->rsp; \ pr_reg[16] = regs->ss; -#define user user32 + +#define elf_prstatus compat_elf_prstatus +#define elf_prpsinfo compat_elf_prpsinfo +#define elf_fpregset_t struct user_i387_ia32_struct +#define elf_fpxregset_t struct user32_fxsr_struct +#define user user32 #undef elf_read_implies_exec #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) -//#include -#include -typedef struct user_i387_ia32_struct elf_fpregset_t; -typedef struct user32_fxsr_struct elf_fpxregset_t; - - -static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +#define elf_core_copy_regs elf32_core_copy_regs +static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs, + struct pt_regs *regs) { - ELF_CORE_COPY_REGS((*elfregs), regs) + ELF_CORE_COPY_REGS((&elfregs->ebx), regs) } -static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) +#define elf_core_copy_task_regs elf32_core_copy_task_regs +static inline int elf32_core_copy_task_regs(struct task_struct *t, + compat_elf_gregset_t* elfregs) { struct pt_regs *pp = task_pt_regs(t); - ELF_CORE_COPY_REGS((*elfregs), pp); + ELF_CORE_COPY_REGS((&elfregs->ebx), pp); /* fix wrong segments */ - (*elfregs)[7] = t->thread.ds; - (*elfregs)[9] = t->thread.fsindex; - (*elfregs)[10] = t->thread.gsindex; - (*elfregs)[8] = t->thread.es; + elfregs->ds = t->thread.ds; + elfregs->fs = t->thread.fsindex; + elfregs->gs = t->thread.gsindex; + elfregs->es = t->thread.es; return 1; } +#define elf_core_copy_task_fpregs elf32_core_copy_task_fpregs static inline int -elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) +elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, + elf_fpregset_t *fpu) { struct _fpstate_ia32 *fpstate = (void*)fpu; mm_segment_t oldfs = get_fs(); @@ -186,8 +148,9 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr #define ELF_CORE_COPY_XFPREGS 1 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG +#define elf_core_copy_task_xfpregs elf32_core_copy_task_xfpregs static inline int -elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) +elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) { struct pt_regs *regs = task_pt_regs(t); if (!tsk_used_math(t)) @@ -206,6 +169,10 @@ elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) extern int force_personality32; +#undef ELF_EXEC_PAGESIZE +#undef ELF_HWCAP +#undef ELF_PLATFORM +#undef SET_PERSONALITY #define ELF_EXEC_PAGESIZE PAGE_SIZE #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) #define ELF_PLATFORM ("i686") @@ -231,6 +198,7 @@ do { \ #define load_elf_binary load_elf32_binary +#undef ELF_PLAT_INIT #define ELF_PLAT_INIT(r, load_addr) elf32_init(r) #undef start_thread diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h index 53cb96b68a62..66ba7987184a 100644 --- a/include/asm-x86/compat.h +++ b/include/asm-x86/compat.h @@ -6,6 +6,7 @@ */ #include #include +#include #define COMPAT_USER_HZ 100 @@ -180,6 +181,11 @@ struct compat_shmid64_ds { compat_ulong_t __unused5; }; +/* + * The type of struct elf_prstatus.pr_reg in compatible core dumps. + */ +typedef struct user_regs_struct32 compat_elf_gregset_t; + /* * A pointer passed in from user mode. This should not * be used for syscall parameters, just declare them From b3f5dde15ebe984e178d4660b6354285931b20ff Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 11/33] x86: don't zero pad addresses in segfault message don't zero pad addresses in segfault message. Matches the other trap messages. This leaves some more space for the new file name. [ mingo: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index af79cf60866c..a881f0796169 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -483,7 +483,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); From 764922376ca39085e217656af6784a3377d98566 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 12/33] x86: quirk.c trivial coding style and white space cleanup Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d769e204f942..1fbc5380e27c 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -45,9 +45,12 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(config & 0x2)) pci_write_config_byte(dev, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, + quirk_intel_irqbalance); #endif #if defined(CONFIG_HPET_TIMER) @@ -146,17 +149,17 @@ static void ich_force_enable_hpet(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, - ich_force_enable_hpet); + ich_force_enable_hpet); static struct pci_dev *cached_dev; @@ -233,9 +236,9 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); void force_hpet_resume(void) { From b17530bda22e7ffbf08f7a8a50743256b1672f6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 13/33] x86: add force_hpet boot option add force_hpet boot option. (this will be useful to make the forced-enable quirks depend on.) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 6 ++++-- arch/x86/kernel/hpet.c | 3 +++ include/asm-x86/hpet.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 189df0bcab99..c6859b94aaa7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -431,8 +431,10 @@ and is between 256 and 4096 characters. It is defined in the file over the 8254 in addition to over the IO-APIC. The kernel tries to set a sensible default. - hpet= [X86-32,HPET] option to disable HPET and use PIT. - Format: disable + hpet= [X86-32,HPET] option to control HPET usage + Format: { enable (default) | disable | force } + disable: disable HPET and use PIT instead + force: allow force enabled of undocumented chips (ICH4, VIA) com20020= [HW,NET] ARCnet - COM20020 chipset Format: diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f8367074da0d..22d8f00c80dc 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -69,12 +69,15 @@ static inline void hpet_clear_mapping(void) * HPET command line enable / disable */ static int boot_hpet_disable; +int hpet_force_user; static int __init hpet_setup(char* str) { if (str) { if (!strncmp("disable", str, 7)) boot_hpet_disable = 1; + if (!strncmp("force", str, 5)) + hpet_force_user = 1; } return 1; } diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index d4ab6db050b6..4f51519fc199 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -64,6 +64,7 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; extern unsigned long force_hpet_address; +extern int hpet_force_user; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern unsigned long hpet_readl(unsigned long a); From b196884e2f5d45fb505b46011e41ca95e0859e34 Mon Sep 17 00:00:00 2001 From: "Udo A. Steinberg" Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 14/33] x86: force enable HPET on VT8235/8237 chipsets This patch adds quirks to force enable HPET on Via VT8235 and VT8237 chipsets. The datasheet for 8237 documents HPET functionality (although wrongly) whereas HPET is undocumented for 8235. Tested on A7V880 (8237) and K7VT4A+ (8235) boards. tglx: depends on the force_hept commandline option Signed-off-by: Udo A. Steinberg Cc: Venkatesh Pallipadi Cc: Andi Kleen Cc: john stultz Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/quirks.c | 69 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1fbc5380e27c..fbe32e7c3f5f 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -59,7 +59,8 @@ unsigned long force_hpet_address; static enum { NONE_FORCE_HPET_RESUME, OLD_ICH_FORCE_HPET_RESUME, - ICH_FORCE_HPET_RESUME + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME } force_hpet_resume_type; static void __iomem *rcba_base; @@ -240,6 +241,69 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, old_ich_force_enable_hpet); + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + + void force_hpet_resume(void) { switch (force_hpet_resume_type) { @@ -249,6 +313,9 @@ void force_hpet_resume(void) case OLD_ICH_FORCE_HPET_RESUME: return old_ich_force_hpet_resume(); + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + default: break; } From 158ad3260ba3b006e3c6dfad05298e9d7889c5b1 Mon Sep 17 00:00:00 2001 From: "Udo A. Steinberg" Date: Fri, 19 Oct 2007 20:35:02 +0200 Subject: [PATCH 15/33] x86: enable HPET on ICH3 and ICH4 ICH3 and ICH4 have undocumented HPET capabilities. This patch enables HPET for platforms based around these ICHs. Tested on various ICH3 and ICH4 platforms. Because HPET is not officially documented for ICH3/4 and may not have been validated by chipset folks, we're on thin ice here. I'd recommend testing this patch in -hrt or -mm for a while and wait for success/failure reports before feeding it upstream. tglx: depends on the force_hpet command line option ! Signed-off-by: Udo A. Steinberg Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/quirks.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index fbe32e7c3f5f..a4ce1911efdf 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -236,6 +236,24 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) printk(KERN_DEBUG "Failed to force enable HPET\n"); } +/* + * Undocumented chipset features. Make sure that the user enforced + * this. + */ +static void old_ich_force_enable_hpet_user(struct pci_dev *dev) +{ + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, From 54ef34009a69f95c25685247e73673dfeb435c71 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 16/33] x86: Unify i386 and x86-64 early quirks They were already very similar; just use the same file now. [ tglx: arch/x86 adaptation ] Cc: lenb@kernel.org Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile_32 | 1 + arch/x86/kernel/Makefile_64 | 2 +- arch/x86/kernel/acpi/Makefile_32 | 3 - arch/x86/kernel/acpi/earlyquirk_32.c | 84 ------------------- .../{early-quirks_64.c => early-quirks.c} | 19 +++-- arch/x86/kernel/setup_32.c | 4 +- include/asm-x86/acpi_32.h | 6 +- include/asm-x86/io_apic_64.h | 2 + include/asm-x86/proto.h | 2 - 9 files changed, 20 insertions(+), 103 deletions(-) delete mode 100644 arch/x86/kernel/acpi/earlyquirk_32.c rename arch/x86/kernel/{early-quirks_64.c => early-quirks.c} (88%) diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index 69990f0ba86e..ccea590bbb92 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -17,6 +17,7 @@ obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_PCI) += early-quirks.o obj-$(CONFIG_APM) += apm_32.o obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o obj-$(CONFIG_SMP) += smpcommon_32.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 2fc3d3f2c27e..dec06e769281 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -39,7 +39,7 @@ obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_MODULES) += module_64.o -obj-$(CONFIG_PCI) += early-quirks_64.o +obj-$(CONFIG_PCI) += early-quirks.o obj-y += topology.o obj-y += intel_cacheinfo.o diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32 index a4852a2e9190..045dd54b33e0 100644 --- a/arch/x86/kernel/acpi/Makefile_32 +++ b/arch/x86/kernel/acpi/Makefile_32 @@ -1,7 +1,4 @@ obj-$(CONFIG_ACPI) += boot.o -ifneq ($(CONFIG_PCI),) -obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o -endif obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o ifneq ($(CONFIG_ACPI_PROCESSOR),) diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c deleted file mode 100644 index 23f78efc577d..000000000000 --- a/arch/x86/kernel/acpi/earlyquirk_32.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Do early PCI probing for bug detection when the main PCI subsystem is - * not up yet. - */ -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_ACPI - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif - -static int __init check_bridge(int vendor, int device) -{ -#ifdef CONFIG_ACPI - static int warned; - /* According to Nvidia all timer overrides are bogus unless HPET - is enabled. */ - if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { - if (!warned && acpi_table_parse(ACPI_SIG_HPET, - nvidia_hpet_check)) { - warned = 1; - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - - } - } -#endif - if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO "ATI board detected. Disabling timer routing " - "over 8254.\n"); - } - return 0; -} - -void __init check_acpi_pci(void) -{ - int num, slot, func; - - /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. - Actually a few buggy systems can machine check. Allow the user - to disable it by command line option at least -AK */ - if (!early_pci_allowed()) - return; - - /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { - u32 class; - u32 vendor; - class = read_pci_config(num, slot, func, - PCI_CLASS_REVISION); - if (class == 0xffffffff) - break; - - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, - PCI_VENDOR_ID); - - if (check_bridge(vendor & 0xffff, vendor >> 16)) - return; - } - - } - } -} diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c similarity index 88% rename from arch/x86/kernel/early-quirks_64.c rename to arch/x86/kernel/early-quirks.c index 13aa4fd728f3..dc34acbd54aa 100644 --- a/arch/x86/kernel/early-quirks_64.c +++ b/arch/x86/kernel/early-quirks.c @@ -13,9 +13,13 @@ #include #include #include -#include -#include #include +#include +#include + +#ifdef CONFIG_IOMMU +#include +#endif static void __init via_bugs(void) { @@ -23,7 +27,8 @@ static void __init via_bugs(void) if ((end_pfn > MAX_DMA32_PFN || force_iommu) && !iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n"); + "Looks like a VIA chipset. Disabling IOMMU." + " Override with iommu=allowed\n"); iommu_aperture_disabled = 1; } #endif @@ -40,6 +45,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header) static void __init nvidia_bugs(void) { #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_IO_APIC /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. @@ -58,6 +64,7 @@ static void __init nvidia_bugs(void) printk(KERN_INFO "If you got timer trouble " "try acpi_use_timer_override\n"); } +#endif #endif /* RED-PEN skip them on mptables too? */ @@ -65,11 +72,13 @@ static void __init nvidia_bugs(void) static void __init ati_bugs(void) { +#ifdef CONFIG_X86_IO_APIC if (timer_over_8254 == 1) { timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); + "ATI board detected. Disabling timer routing over 8254.\n"); } +#endif } struct chipset { @@ -104,7 +113,7 @@ void __init early_quirks(void) if (class == 0xffffffff) break; - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) continue; vendor = read_pci_config(num, slot, func, diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index b87a6fd5ba48..d357d60431f0 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -622,9 +622,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif + early_quirks(); #endif #ifdef CONFIG_ACPI diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h index 125179adf044..723493e6c851 100644 --- a/include/asm-x86/acpi_32.h +++ b/include/asm-x86/acpi_32.h @@ -81,11 +81,7 @@ int __acpi_release_global_lock(unsigned int *lock); :"=r"(n_hi), "=r"(n_lo) \ :"0"(n_hi), "1"(n_lo)) -#ifdef CONFIG_X86_IO_APIC -extern void check_acpi_pci(void); -#else -static inline void check_acpi_pci(void) { } -#endif +extern void early_quirks(void); #ifdef CONFIG_ACPI extern int acpi_lapic; diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h index d9f2e54324d5..e2c13675ee4e 100644 --- a/include/asm-x86/io_apic_64.h +++ b/include/asm-x86/io_apic_64.h @@ -133,4 +133,6 @@ void enable_NMI_through_LVT0 (void * dummy); extern spinlock_t i8259A_lock; +extern int timer_over_8254; + #endif diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index c44a3a93b5a4..dabba55f7ed8 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -83,8 +83,6 @@ extern unsigned tsc_khz; extern int reboot_force; extern int notsc_setup(char *); -extern int timer_over_8254; - extern int gsi_irq_sharing(int gsi); extern int force_mwait; From 4f8a6b1ab222d08931a36d6770a60cff405968bd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 17/33] i386: i386 add AMD64 Barcelona PMU MSR definitions to msr.h [i386] add AMD Barcelona PMU MSR definitions AK: Not used right now, but will presumably at some point. [ tglx: arch/x86 adaptation ] Signed-off-by: Stephane Eranian Signed-off-by: Robert Richter Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/msr-index.h | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index a02eb2991349..a4944732be04 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -73,8 +73,32 @@ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 -/* K7/K8 MSRs. Not complete. See the architecture manual for a more +/* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ + +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSCTL 0xc001103a + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_HWCR 0xc0010015 +#define MSR_K8_ENABLE_C1E 0xc0010055 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ #define MSR_K7_EVNTSEL0 0xc0010000 #define MSR_K7_PERFCTR0 0xc0010004 #define MSR_K7_EVNTSEL1 0xc0010001 @@ -83,20 +107,10 @@ #define MSR_K7_PERFCTR2 0xc0010006 #define MSR_K7_EVNTSEL3 0xc0010003 #define MSR_K7_PERFCTR3 0xc0010007 -#define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K7_CLK_CTL 0xc001001b -#define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 - -#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ -#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ -#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ - #define MSR_K7_HWCR 0xc0010015 -#define MSR_K8_HWCR 0xc0010015 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 -#define MSR_K8_ENABLE_C1E 0xc0010055 /* K6 MSRs */ #define MSR_K6_EFER 0xc0000080 From 948062683004d13ca21c8c05ac052d387978a449 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 18/33] x86: enable iommu_merge by default [ tglx: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index b2b42bdb0a15..afaf9f12c032 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -11,7 +11,7 @@ #include #include -int iommu_merge __read_mostly = 0; +int iommu_merge __read_mostly = 1; EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; From 574a60421c8ea5383a54ebee1f37fa871d00e1b9 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 19/33] i386: make callgraph use dump_trace() on i386/x86_64 This patch improves oprofile callgraphs for i386/x86_64. The old backtracing code was unable to produce kernel backtraces if the kernel wasn't compiled with framepointers. The code now uses dump_trace(). [ tglx: arch/x86 adaptation ] Signed-off-by: Jan Blunck Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/oprofile/backtrace.c | 108 +++++++++++++--------------------- 1 file changed, 40 insertions(+), 68 deletions(-) diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index c049ce414f01..dc59a808009f 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -13,25 +13,45 @@ #include #include #include +#include + +static void backtrace_warning_symbol(void *data, char *msg, + unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + /* Yes, we want all stacks */ + return 0; +} + +static void backtrace_address(void *data, unsigned long addr) +{ + unsigned int *depth = data; + + if ((*depth)--) + oprofile_add_trace(addr); +} + +static struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; struct frame_head { - struct frame_head * ebp; + struct frame_head *ebp; unsigned long ret; } __attribute__((packed)); -static struct frame_head * -dump_kernel_backtrace(struct frame_head * head) -{ - oprofile_add_trace(head->ret); - - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) - return NULL; - - return head->ebp; -} - static struct frame_head * dump_user_backtrace(struct frame_head * head) { @@ -53,72 +73,24 @@ dump_user_backtrace(struct frame_head * head) return bufhead[0].ebp; } -/* - * | | /\ Higher addresses - * | | - * --------------- stack base (address of current_thread_info) - * | thread info | - * . . - * | stack | - * --------------- saved regs->ebp value if valid (frame_head address) - * . . - * --------------- saved regs->rsp value if x86_64 - * | | - * --------------- struct pt_regs * stored on stack if 32-bit - * | | - * . . - * | | - * --------------- %esp - * | | - * | | \/ Lower addresses - * - * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the - * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other - * exceptions use special stacks, maintained by the interrupt stack table - * (IST). These stacks are set up in trap_init() in - * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point - * to the kernel stack; instead, it points to some location on the NMI - * stack. On the other hand, regs->rsp is the stack pointer saved when the - * NMI occurred. (2) For 32-bit, regs->esp is not valid because the - * processor does not save %esp on the kernel stack when interrupts occur - * in the kernel mode. - */ -#ifdef CONFIG_FRAME_POINTER -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - unsigned long headaddr = (unsigned long)head; -#ifdef CONFIG_X86_64 - unsigned long stack = (unsigned long)regs->rsp; -#else - unsigned long stack = (unsigned long)regs; -#endif - unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; - - return headaddr > stack && headaddr < stack_base; -} -#else -/* without fp, it's just junk */ -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - return 0; -} -#endif - - void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { struct frame_head *head; + unsigned long stack; #ifdef CONFIG_X86_64 head = (struct frame_head *)regs->rbp; + stack = regs->rsp; #else head = (struct frame_head *)regs->ebp; + stack = regs->esp; #endif if (!user_mode_vm(regs)) { - while (depth-- && valid_kernel_stack(head, regs)) - head = dump_kernel_backtrace(head); + if (depth) + dump_trace(NULL, regs, (unsigned long *)stack, + &backtrace_ops, &depth); return; } From 9d975ebda56699c1b8480e9736caf33a61ccb810 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 20/33] i386: consolidate show_regs and show_registers for i386 Both functions printk the same information, except for CRx and debug registers in the show_registers() one and a bit different manner. So move the common code into one place. This is already done for x86_64, so I think it's worth having the same on i386. This saves 100 bytes of .rodata section :) ... but only 8 from .text :( [ tglx: arch/x86 adaptation ] Signed-off-by: Pavel Emelyanov Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 56 +++++++++++++++++++++++++----------- arch/x86/kernel/traps_32.c | 30 ++----------------- include/asm-x86/system_32.h | 1 + 3 files changed, 44 insertions(+), 43 deletions(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 097aeafce5ff..ba4b64117084 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -295,34 +295,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %.*s %s (%s %.*s)\n", + current->pid, TASK_COMM_LEN, current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -330,10 +348,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index b132d3957dfc..746fad2c504f 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -288,33 +288,9 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, task_thread_info(current)); @@ -322,14 +298,14 @@ void show_registers(struct pt_regs *regs) * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h index db6283eb5e46..ef8468883bac 100644 --- a/include/asm-x86/system_32.h +++ b/include/asm-x86/system_32.h @@ -315,5 +315,6 @@ extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void __show_registers(struct pt_regs *, int all); #endif From 9d1c6e7c86ddc366d67f0c5fa77be9b93710037a Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 21/33] x86: use descriptor's functions instead of inline assembly This patch provides a new set of functions for managing the descriptor tables that can be used instead of putting the raw assembly in .c files. Remodeling of store_tr() suggested by Frederik Deweerdt. [ tglx: arch/x86 adaptation ] Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/reboot_64.c | 3 ++- arch/x86/kernel/setup64.c | 4 ++-- arch/x86/kernel/suspend_64.c | 11 ++++++----- include/asm-x86/desc_64.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index a7eee0a4751d..6b3469311e42 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -58,7 +58,7 @@ void __init x86_64_start_kernel(char * real_mode_data) for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c index 368db2b9c5ac..776eb06b6512 100644 --- a/arch/x86/kernel/reboot_64.c +++ b/arch/x86/kernel/reboot_64.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,7 @@ void machine_emergency_restart(void) } case BOOT_TRIPLE: - __asm__ __volatile__("lidt (%0)": :"r" (&no_idt)); + load_idt((const struct desc_ptr *)&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index e11886e8d8ca..3558ac78c926 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -230,8 +230,8 @@ void __cpuinit cpu_init (void) memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index f8fafe527ff1..622bb0268284 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -32,9 +32,9 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); - asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("str %0" : "=m" (ctxt->tr)); + store_gdt((struct desc_ptr *)&ctxt->gdt_limit); + store_idt((struct desc_ptr *)&ctxt->idt_limit); + store_tr(ctxt->tr); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ /* @@ -91,8 +91,9 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); + load_idt((const struct desc_ptr *)&ctxt->idt_limit); + /* * segment registers diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h index ac991b5ca0fd..7d9c938e69fd 100644 --- a/include/asm-x86/desc_64.h +++ b/include/asm-x86/desc_64.h @@ -20,6 +20,16 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) #define clear_LDT() asm volatile("lldt %w0"::"r" (0)) +static inline unsigned long __store_tr(void) +{ + unsigned long tr; + + asm volatile ("str %w0":"=r" (tr)); + return tr; +} + +#define store_tr(tr) (tr) = __store_tr() + /* * This is the ldt that every process will get unless we need * something other than this. @@ -31,6 +41,16 @@ extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) +static inline void load_gdt(const struct desc_ptr *ptr) +{ + asm volatile("lgdt %w0"::"m" (*ptr)); +} + +static inline void store_gdt(struct desc_ptr *ptr) +{ + asm("sgdt %w0":"=m" (*ptr)); +} + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -71,6 +91,16 @@ static inline void set_system_gate_ist(int nr, void *func, unsigned ist) _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } +static inline void load_idt(const struct desc_ptr *ptr) +{ + asm volatile("lidt %w0"::"m" (*ptr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm("sidt %w0":"=m" (*dtr)); +} + static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, unsigned size) { From 66d16ed45d19600abd72dbd55bd2018437b24b73 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 22/33] x86: implement missing x86_64 function smp_call_function_mask() This patch defines the missing function smp_call_function_mask() for x86_64, this is more or less a cut&paste of i386 function. It removes also some duplicate code. This function is needed by KVM to execute a function on some CPUs. AK: Fixed description AK: Moved WARN_ON(irqs_disabled) one level up to not warn in the panic case. [ tglx: arch/x86 adaptation ] Signed-off-by: Laurent Vivier Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smp_64.c | 119 +++++++++++++++++++++------------------ include/asm-x86/smp_64.h | 2 + 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 5c2964727d19..03fa6ed559c6 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -322,17 +322,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -343,19 +353,55 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; } +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; +} +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -374,6 +420,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -387,50 +434,13 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - /* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. @@ -449,10 +459,7 @@ static void __smp_call_function (void (*func) (void *info), void *info, int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -479,7 +486,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_local_APIC(); diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index d30e9b684fdd..9d35018e54fe 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -37,6 +37,8 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); /* * cpu_sibling_map and cpu_core_map now live From 1f503e7743df91eb6f63af27c310c557edfc3307 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 23/33] i386: do cpuid_device_create() in CPU_UP_PREPARE instead of CPU_ONLINE. Do cpuid_device_create() in CPU_UP_PREPARE instead of CPU_ONLINE. [ tglx: arch/x86 adaptation ] Cc: "H. Peter Anvin" Signed-off-by: Akinobu Mita Signed-off-by: Andi Kleen Cc: Gautham R Shenoy Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpuid.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 70dcf912d9fb..2086c727fb09 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -134,15 +134,18 @@ static const struct file_operations cpuid_fops = { .open = cpuid_open, }; -static int __cpuinit cpuid_device_create(int i) +static __cpuinit int cpuid_device_create(int cpu) { - int err = 0; struct device *dev; - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; + dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), + "cpu%d", cpu); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +static void cpuid_device_destroy(int cpu) +{ + device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); } static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, @@ -150,18 +153,21 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuid_device_create(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpuid_device_create(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = @@ -198,7 +204,7 @@ static int __init cpuid_init(void) out_class: i = 0; for_each_online_cpu(i) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); + cpuid_device_destroy(i); } class_destroy(cpuid_class); out_chrdev: @@ -212,7 +218,7 @@ static void __exit cpuid_exit(void) int cpu = 0; for_each_online_cpu(cpu) - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); From b1992df3f070475b243b12ca1241a5938ef5f9bc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 24/33] x86: honor _PAGE_PSE bit on page walks [ tglx: arch/x86 adaptation ] Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault_32.c | 3 ++- arch/x86/mm/fault_64.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 6555c3d14371..c9a03f73986e 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -564,7 +564,8 @@ no_context: * it's allocated already. */ if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index a881f0796169..1e631745ca41 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -169,7 +169,7 @@ void dump_pagetable(unsigned long address) pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; From 39743c9ef717fd4f2b5583f010115c5f2482b8ae Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 25/33] x86: use raw locks during oopses Don't want any lockdep or other fragile machinery to run during oopses. Use raw spinlocks directly for oops locking. Also disables irq flag tracing there. [ tglx: arch/x86 adaptation ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/traps_32.c | 12 +++++++----- arch/x86/kernel/traps_64.c | 16 +++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 746fad2c504f..3f02e0f42e6a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -350,11 +350,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -365,13 +365,14 @@ void die(const char * str, struct pt_regs * regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + __raw_spin_lock(&die.lock); + raw_local_save_flags(flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } else - local_save_flags(flags); + raw_local_save_flags(flags); if (++die.lock_owner_depth < 3) { unsigned long esp; @@ -415,7 +416,8 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index b4a9b3db1994..df690c3fa458 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -462,7 +462,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -474,13 +474,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -494,12 +494,10 @@ void __kprobes oops_end(unsigned long flags) die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); From dbeb2be21d678c49a8d8bbf774903df15dd55474 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 26/33] i386: introduce "used_vectors" bitmap which can be used to reserve vectors. This simplifies the io_apic.c __assign_irq_vector() logic and removes the explicit SYSCALL_VECTOR check, and also allows for vectors to be reserved by other mechanisms (ie. lguest). [ tglx: arch/x86 adaptation ] Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i8259_32.c | 3 ++- arch/x86/kernel/io_apic_32.c | 13 ++++++++----- arch/x86/kernel/traps_32.c | 10 ++++++++++ include/asm-x86/irq_32.h | 3 +++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index d34a10cc13a7..f634fc715c99 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -403,7 +403,8 @@ void __init native_init_IRQ(void) int vector = FIRST_EXTERNAL_VECTOR + i; if (i >= NR_IRQS) break; - if (vector != SYSCALL_VECTOR) + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) set_intr_gate(vector, interrupt[i]); } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5f10c7189534..0c55e9d86f69 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1198,7 +1198,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - int vector, offset, i; + int vector, offset; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); @@ -1215,11 +1215,8 @@ next: } if (vector == current_vector) return -ENOSPC; - if (vector == SYSCALL_VECTOR) + if (test_and_set_bit(vector, used_vectors)) goto next; - for (i = 0; i < NR_IRQ_VECTORS; i++) - if (irq_vector[i] == vector) - goto next; current_vector = vector; current_offset = offset; @@ -2295,6 +2292,12 @@ static inline void __init check_timer(void) void __init setup_IO_APIC(void) { + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); + enable_IO_APIC(); if (acpi_ioapic) diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 3f02e0f42e6a..47e6d10902fe 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -63,6 +63,9 @@ int panic_on_unrecovered_nmi; +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -1120,6 +1123,8 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) void __init trap_init(void) { + int i; + #ifdef CONFIG_EISA void __iomem *p = ioremap(0x0FFFD9, 4); if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { @@ -1179,6 +1184,11 @@ void __init trap_init(void) set_system_gate(SYSCALL_VECTOR,&system_call); + /* Reserve all the builtin and the syscall vector. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + set_bit(SYSCALL_VECTOR, used_vectors); + /* * Should be a barrier for any external CPU state. */ diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h index 36f310632c49..aca9c96e8e6b 100644 --- a/include/asm-x86/irq_32.h +++ b/include/asm-x86/irq_32.h @@ -45,4 +45,7 @@ unsigned int do_IRQ(struct pt_regs *regs); void init_IRQ(void); void __init native_init_IRQ(void); +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + #endif /* _ASM_IRQ_H */ From 71fff5e6ca1b738ac4742580e4c0ff79d906f6c8 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 27/33] x86: convert cpu_to_apicid to be a per cpu variable This patch converts the x86_cpu_to_apicid array to be a per cpu variable. This saves sizeof(apicid) * NR unused cpus. Access is mostly from startup and CPU HOTPLUG functions. MP_processor_info() is one of the functions that require access to the x86_cpu_to_apicid array before the per_cpu data area is setup. For this case, a pointer to the __initdata array is initialized in setup_arch() and removed in smp_prepare_cpus() after the per_cpu data area is initialized. A second change is included to change the initial array value of ARCH i386 from 0xff to BAD_APICID to be consistent with ARCH x86_64. Signed-off-by: Mike Travis Cc: Andi Kleen Cc: Christoph Lameter Cc: "Siddha, Suresh B" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/genapic_64.c | 15 ++++++++++++--- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/mpparse_64.c | 15 +++++++++++++-- arch/x86/kernel/setup_64.c | 5 +++++ arch/x86/kernel/smp_32.c | 2 +- arch/x86/kernel/smpboot_32.c | 22 +++++++++++++++------- arch/x86/kernel/smpboot_64.c | 23 ++++++++++++++++++++++- arch/x86/mm/numa_64.c | 2 +- include/asm-x86/ipi.h | 2 +- include/asm-x86/smp_32.h | 6 ++++-- include/asm-x86/smp_64.h | 6 ++++-- 12 files changed, 80 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index afd2afe9102d..f28b2e251b1d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -555,7 +555,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { - x86_cpu_to_apicid[cpu] = -1; + per_cpu(x86_cpu_to_apicid, cpu) = -1; cpu_clear(cpu, cpu_present_map); num_processors--; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 4ae03e3e8294..ce703e21c912 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -24,10 +24,19 @@ #include #endif -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly +/* + * which logical CPU number maps to which CPU (physical APIC ID) + * + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); struct genapic __read_mostly *genapic = &apic_flat; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 91c7526768ee..07352b74bda6 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -172,7 +172,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) */ cpu = first_cpu(cpumask); if ((unsigned)cpu < NR_CPUS) - return x86_cpu_to_apicid[cpu]; + return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8bf0ca03ac8e..4336c0fc3b81 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -86,7 +86,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -123,7 +123,18 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 5a19f0cc5b67..94630c66470e 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -271,6 +271,11 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); +#ifdef CONFIG_SMP + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 791d9f8036ae..4974c3d2a0a6 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -676,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id) int i; for (i = 0; i < NR_CPUS; i++) { - if (x86_cpu_to_apicid[i] == apic_id) + if (per_cpu(x86_cpu_to_apicid, i) == apic_id) return i; } return -1; diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 65e5de7d64db..631be36ec2a9 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -92,9 +92,17 @@ static cpumask_t smp_commenced_mask; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = - { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +/* + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = + { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; @@ -804,7 +812,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -866,7 +874,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; } else { - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; cpu_set(cpu, cpu_present_map); } @@ -915,7 +923,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) struct warm_boot_cpu_info info; int apicid, ret; - apicid = x86_cpu_to_apicid[cpu]; + apicid = per_cpu(x86_cpu_to_apicid, cpu); if (apicid == BAD_APICID) { ret = -ENODEV; goto exit; @@ -965,7 +973,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; current_thread_info()->cpu = 0; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index e351ac4ab5b1..c3e8668c5278 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -694,7 +694,7 @@ do_rest: clear_node_cpumask(cpu); /* was set by numa_add_cpu */ cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); - x86_cpu_to_apicid[cpu] = BAD_APICID; + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; return -EIO; } @@ -840,6 +840,26 @@ static int __init smp_sanity_check(unsigned max_cpus) return 0; } +/* + * Copy apicid's found by MP_processor_info from initial array to the per cpu + * data area. The x86_cpu_to_apicid_init array is then expendable and the + * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no + * longer available. + */ +void __init smp_set_apicids(void) +{ + int cpu; + + for_each_cpu_mask(cpu, cpu_possible_map) { + if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_apicid, cpu) = + x86_cpu_to_apicid_init[cpu]; + } + + /* indicate the static array will be going away soon */ + x86_cpu_to_apicid_ptr = NULL; +} + /* * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. @@ -849,6 +869,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ + smp_set_apicids(); set_cpu_sibling_map(0); if (smp_sanity_check(max_cpus) < 0) { diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 5eec5e56d07f..3d6926ba8995 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -612,7 +612,7 @@ void __init init_cpu_to_node(void) { int i; for (i = 0; i < NR_CPUS; i++) { - u8 apicid = x86_cpu_to_apicid[i]; + u8 apicid = x86_cpu_to_apicid_init[i]; if (apicid == BAD_APICID) continue; if (apicid_to_node[apicid] == NUMA_NO_NODE) diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index a7c75ea408a8..6d011bd6067d 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -119,7 +119,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) */ local_irq_save(flags); for_each_cpu_mask(query_cpu, mask) { - __send_IPI_dest_field(x86_cpu_to_apicid[query_cpu], + __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h index ee46038d126c..9006f6041bf8 100644 --- a/include/asm-x86/smp_32.h +++ b/include/asm-x86/smp_32.h @@ -39,9 +39,11 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 -extern u8 x86_cpu_to_apicid[]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) extern void set_cpu_sibling_map(int cpu); diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index 9d35018e54fe..f1545704e24e 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -86,7 +86,9 @@ static inline int hard_smp_processor_id(void) * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. */ -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ extern u8 bios_cpu_apicid[]; static inline int cpu_present_to_apicid(int mps_cpu) @@ -117,7 +119,7 @@ static __inline int logical_smp_processor_id(void) } #ifdef CONFIG_SMP -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #else #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ From b6278470b7cdbf2bf6ba7345130d83924451a51a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 28/33] x86: convert cpu_llc_id to be a per cpu variable Convert cpu_llc_id from a static array sized by NR_CPUS to a per_cpu variable. This saves sizeof(cpu_llc_id) * NR unused cpus. Access is mostly from startup and CPU HOTPLUG functions. Note there's an additional change of the type of cpu_llc_id from int to u8 for ARCH i386 to correspond with the same type in ARCH x86_64. Signed-off-by: Mike Travis Cc: Andi Kleen Cc: Christoph Lameter Cc: "Siddha, Suresh B" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_cacheinfo.c | 4 ++-- arch/x86/kernel/smpboot_32.c | 6 +++--- arch/x86/kernel/smpboot_64.c | 6 +++--- include/asm-x86/processor_32.h | 6 +++++- include/asm-x86/smp_64.h | 3 ++- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 297a24116949..35c7ebb57423 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; + per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; + per_cpu(cpu_llc_id, cpu) = l3_id; #endif } diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 631be36ec2a9..47bccfc283d9 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -67,7 +67,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -348,8 +348,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index c3e8668c5278..7c8f58643186 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -65,7 +65,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -283,8 +283,8 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h index 83800e7496ee..565b6fc5f5b4 100644 --- a/include/asm-x86/processor_32.h +++ b/include/asm-x86/processor_32.h @@ -110,7 +110,11 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif -extern int cpu_llc_id[NR_CPUS]; +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index f1545704e24e..6f0e0273b646 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -49,7 +49,7 @@ extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), */ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); -extern u8 cpu_llc_id[NR_CPUS]; +DECLARE_PER_CPU(u8, cpu_llc_id); #define SMP_TRAMPOLINE_BASE 0x6000 @@ -121,6 +121,7 @@ static __inline int logical_smp_processor_id(void) #ifdef CONFIG_SMP #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #else +extern unsigned int boot_cpu_id; #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ #endif From 71b31233a215be27e2efbcc0e739cbebb0bde078 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 19 Oct 2007 20:35:03 +0200 Subject: [PATCH 29/33] x86: acpi use cpu_physical_id This is from an earlier message from Christoph Lameter: processor_core.c currently tries to determine the apicid by special casing for IA64 and x86. The desired information is readily available via cpu_physical_id() on IA64, i386 and x86_64. Signed-off-by: Christoph Lameter Additionally, boot_cpu_id needed to be exported to fix compile errors in dma code when !CONFIG_SMP. Signed-off-by: Mike Travis Cc: Andi Kleen Cc: Christoph Lameter Cc: "Siddha, Suresh B" Cc: Len Brown Signed-off-by: Andrew Morton --- arch/x86/kernel/mpparse_64.c | 2 ++ drivers/acpi/processor_core.c | 8 +------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 4336c0fc3b81..ef4aab123581 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 9f11dc296cdd..fd48110f084e 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -421,12 +421,6 @@ static int map_lsapic_id(struct acpi_subtable_header *entry, return 0; } -#ifdef CONFIG_IA64 -#define arch_cpu_to_apicid ia64_cpu_to_sapicid -#else -#define arch_cpu_to_apicid x86_cpu_to_apicid -#endif - static int map_madt_entry(u32 acpi_id) { unsigned long madt_end, entry; @@ -500,7 +494,7 @@ static int get_cpu_id(acpi_handle handle, u32 acpi_id) return apic_id; for (i = 0; i < NR_CPUS; ++i) { - if (arch_cpu_to_apicid[i] == apic_id) + if (cpu_physical_id(i) == apic_id) return i; } return -1; From 124d395fd05efb65d00ca23c7bcc86c272bd8813 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 19 Oct 2007 20:35:04 +0200 Subject: [PATCH 30/33] i386: do not BUG_ON() when MSR is unknown Here is a small patch to change the behavior of the PMU msr allocator to avoid BUG_ON() when the MSR is unknwon. Instead, it now returns ok, which means "I do not manage". The current allocator is not yet managing the full set of PMU registers (e.g., GLOBAL_* on Core 2). [watchdog] do not BUG_ON() in the MSR allocator if MSR is unknown, return ok instead Signed-off-by: Stephane Eranian Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/perfctr-watchdog.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 54cdbf1a40f1..c02541e6e653 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, perfctr_nmi_owner)) return 1; @@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, perfctr_nmi_owner); } @@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, evntsel_nmi_owner)) return 1; @@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, evntsel_nmi_owner); } From ab483570a13be2a34c0502b166df8f8b26802103 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Oct 2007 20:35:04 +0200 Subject: [PATCH 31/33] x86 & generic: change to __builtin_prefetch() gcc 3.2+ supports __builtin_prefetch, so it's possible to use it on all architectures. Change the generic fallback in linux/prefetch.h to use it instead of noping it out. gcc should do the right thing when the architecture doesn't support prefetching Undefine the x86-64 inline assembler version and use the fallback. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/processor_64.h | 6 ------ include/linux/prefetch.h | 9 ++------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h index f422becbddd9..398c39160fce 100644 --- a/include/asm-x86/processor_64.h +++ b/include/asm-x86/processor_64.h @@ -390,12 +390,6 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define ARCH_HAS_PREFETCH -static inline void prefetch(void *x) -{ - asm volatile("prefetcht0 (%0)" :: "r" (x)); -} - #define ARCH_HAS_PREFETCHW 1 static inline void prefetchw(void *x) { diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index 1adfe668d031..af7c36a5a521 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -34,17 +34,12 @@ */ -/* - * These cannot be do{}while(0) macros. See the mental gymnastics in - * the loop macro. - */ - #ifndef ARCH_HAS_PREFETCH -static inline void prefetch(const void *x) {;} +#define prefetch(x) __builtin_prefetch(x) #endif #ifndef ARCH_HAS_PREFETCHW -static inline void prefetchw(const void *x) {;} +#define prefetchw(x) __builtin_prefetch(x,1) #endif #ifndef ARCH_HAS_SPINLOCK_PREFETCH From f1df280f53d7c3ce8613a3b25d1efe009b9860dd Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Fri, 19 Oct 2007 20:35:04 +0200 Subject: [PATCH 32/33] x86: introduce frame_pointer() and stack_pointer() This patch defines frame_pointer() and stack_pointer() similar to the already defined instruction_pointer(). Thus the oprofile code can be written in a more readable fashion. [ tglx: arch/x86 adaptation ] Signed-off-by: Jan Blunck Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/oprofile/backtrace.c | 12 ++---------- include/asm-x86/ptrace_32.h | 2 ++ include/asm-x86/ptrace_64.h | 2 ++ 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index dc59a808009f..0ed046a187f7 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -76,16 +76,8 @@ dump_user_backtrace(struct frame_head * head) void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head; - unsigned long stack; - -#ifdef CONFIG_X86_64 - head = (struct frame_head *)regs->rbp; - stack = regs->rsp; -#else - head = (struct frame_head *)regs->ebp; - stack = regs->esp; -#endif + struct frame_head *head = (struct frame_head *)frame_pointer(regs); + unsigned long stack = stack_pointer(regs); if (!user_mode_vm(regs)) { if (depth) diff --git a/include/asm-x86/ptrace_32.h b/include/asm-x86/ptrace_32.h index 6002597b9e12..78d063dabe0a 100644 --- a/include/asm-x86/ptrace_32.h +++ b/include/asm-x86/ptrace_32.h @@ -55,6 +55,8 @@ static inline int v8086_mode(struct pt_regs *regs) } #define instruction_pointer(regs) ((regs)->eip) +#define frame_pointer(regs) ((regs)->ebp) +#define stack_pointer(regs) ((regs)->esp) #define regs_return_value(regs) ((regs)->eax) extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/include/asm-x86/ptrace_64.h b/include/asm-x86/ptrace_64.h index 7f166ccb0606..7bfe61e1b705 100644 --- a/include/asm-x86/ptrace_64.h +++ b/include/asm-x86/ptrace_64.h @@ -40,6 +40,8 @@ struct pt_regs { #define user_mode(regs) (!!((regs)->cs & 3)) #define user_mode_vm(regs) user_mode(regs) #define instruction_pointer(regs) ((regs)->rip) +#define frame_pointer(regs) ((regs)->rbp) +#define stack_pointer(regs) ((regs)->rsp) #define regs_return_value(regs) ((regs)->rax) extern unsigned long profile_pc(struct pt_regs *regs); From 92cb7612aee39642d109b8d935ad265e602c0563 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 19 Oct 2007 20:35:04 +0200 Subject: [PATCH 33/33] x86: convert cpuinfo_x86 array to a per_cpu array cpu_data is currently an array defined using NR_CPUS. This means that we overallocate since we will rarely really use maximum configured cpus. When NR_CPU count is raised to 4096 the size of cpu_data becomes 3,145,728 bytes. These changes were adopted from the sparc64 (and ia64) code. An additional field was added to cpuinfo_x86 to be a non-ambiguous cpu index. This corresponds to the index into a cpumask_t as well as the per_cpu index. It's used in various places like show_cpuinfo(). cpu_data is defined to be the boot_cpu_data structure for the NON-SMP case. Signed-off-by: Mike Travis Acked-by: Christoph Lameter Cc: Andi Kleen Cc: James Bottomley Cc: Dmitry Torokhov Cc: "Antonino A. Daplas" Cc: Mark M. Hoffman Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/cstate.c | 4 +- arch/x86/kernel/acpi/processor.c | 2 +- arch/x86/kernel/alternative.c | 6 +-- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 +- arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 2 +- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 4 +- arch/x86/kernel/cpu/cpufreq/longhaul.c | 4 +- arch/x86/kernel/cpu/cpufreq/longrun.c | 4 +- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 4 +- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 2 +- arch/x86/kernel/cpu/cpufreq/sc520_freq.c | 4 +- .../kernel/cpu/cpufreq/speedstep-centrino.c | 6 +-- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 +-- arch/x86/kernel/cpu/proc.c | 11 ++-- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/mce_64.c | 3 +- arch/x86/kernel/mce_amd_64.c | 4 +- arch/x86/kernel/microcode.c | 6 +-- arch/x86/kernel/msr.c | 2 +- arch/x86/kernel/setup_64.c | 20 +++++--- arch/x86/kernel/smp_32.c | 2 +- arch/x86/kernel/smpboot_32.c | 51 ++++++++++--------- arch/x86/kernel/smpboot_64.c | 45 ++++++++-------- arch/x86/kernel/tsc_32.c | 8 +-- arch/x86/kernel/tsc_64.c | 4 +- arch/x86/kernel/vsyscall_64.c | 2 +- arch/x86/lib/delay_32.c | 2 +- arch/x86/lib/delay_64.c | 3 +- arch/x86/mach-voyager/voyager_smp.c | 12 ++--- drivers/hwmon/coretemp.c | 6 +-- drivers/hwmon/hwmon-vid.c | 2 +- drivers/input/gameport/gameport.c | 3 +- drivers/video/geode/video_gx.c | 2 +- include/asm-x86/processor_32.h | 10 ++-- include/asm-x86/processor_64.h | 10 ++-- include/asm-x86/topology_32.h | 4 +- include/asm-x86/topology_64.h | 4 +- 39 files changed, 148 insertions(+), 128 deletions(-) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2d39f55d29a8..10b67170b133 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -29,7 +29,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); flags->bm_check = 0; if (num_online_cpus() == 1) @@ -72,7 +72,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg) { struct cstate_entry *percpu_entry; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; int retval; diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index b54fded49834..2ed0a4ce62f0 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -63,7 +63,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { unsigned int cpu = pr->id; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); pr->pdc = NULL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3bd2688bd443..d6405e0842b5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -357,14 +357,14 @@ void alternatives_smp_switch(int smp) if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); @@ -432,7 +432,7 @@ void __init alternative_instructions(void) if (1 == num_possible_cpus()) { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 2ca43ba32bc0..fea0af0476b9 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict; static int check_est_cpu(unsigned int cpuid) { - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) @@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c11baaf9f2b4..326a4c81f684 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = { static int __init eps_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* This driver will work only on Centaur C7 processors with * Enhanced SpeedStep/PowerSaver registers */ diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 1e7ae7dafcf6..94619c22f563 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy, static int elanfreq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int i; int result; @@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = { static int __init elanfreq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 5045f5d583c8..749d00cb2ebd 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); char *cpuname=NULL; int ret; u32 lo, hi; @@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = { static int __init longhaul_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index b2689514295a..af4a867a097c 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, u32 save_lo, save_hi; u32 eax, ebx, ecx, edx; u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (!low_freq || !high_freq) return -EINVAL; @@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = { */ static int __init longrun_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_TRANSMETA || !cpu_has(c, X86_FEATURE_LONGRUN)) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 793eae854f4f..14791ec55cfd 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -195,7 +195,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); int cpuid = 0; unsigned int i; @@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int ret; /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index 6d0285339317..42405b4e34ed 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { */ static int __init powernow_k6_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || ((c->x86_model != 12) && (c->x86_model != 13))) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index f3686a5f2308..b5a9863d6cdc 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed) static int check_powernow(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index d9f3e90a7ae0..42da9bd677d6 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy, static int sc520_freq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int result; /* capability check */ @@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int err; /* Test if we have the right hardware */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 811d47438546..3031f1196192 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -230,7 +230,7 @@ static struct cpu_model models[] = static int centrino_cpu_init_table(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); struct cpu_model *model; for(model = models; model->cpu_id != NULL; model++) @@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu) static int centrino_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); unsigned freq; unsigned l, h; int ret; @@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = { */ static int __init centrino_init(void) { - struct cpuinfo_x86 *cpu = cpu_data; + struct cpuinfo_x86 *cpu = &cpu_data(0); if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index b1acc8ce3167..76c3ab0da468 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 35c7ebb57423..9921b01fe199 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; #ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); + unsigned int cpu = c->cpu_index; #endif if (c->cpuid_level > 3) { @@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { + if (cpu_data(i).apicid >> index_msb == + c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); if (i != cpu && cpuid4_info[i]) { sibling_leaf = CPUID4_INFO_IDX(i, index); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 879a0f789b1e..2d42b414b777 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* nothing */ }; struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; + int i, n = 0; int fpu_exception; #ifdef CONFIG_SMP if (!cpu_online(n)) return 0; + n = c->cpu_index; #endif seq_printf(m, "processor\t: %d\n" "vendor_id\t: %s\n" @@ -175,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2086c727fb09..05c9936a16cc 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -114,7 +114,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c index 66e6b797b2cb..2cf20de5beca 100644 --- a/arch/x86/kernel/mce_64.c +++ b/arch/x86/kernel/mce_64.c @@ -799,7 +799,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) { int err; int i; - if (!mce_available(&cpu_data[cpu])) + + if (!mce_available(&cpu_data(cpu))) return -EIO; memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c index 0d2afd96aca4..752fb16a817d 100644 --- a/arch/x86/kernel/mce_amd_64.c +++ b/arch/x86/kernel/mce_amd_64.c @@ -472,11 +472,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) sprintf(name, "threshold_bank%i", bank); #ifdef CONFIG_SMP - if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ + if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ i = first_cpu(per_cpu(cpu_core_map, cpu)); /* first core not up yet */ - if (cpu_data[i].cpu_core_id) + if (cpu_data(i).cpu_core_id) goto out; /* already linked */ diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 09cf78110358..09c315214a5e 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -132,7 +132,7 @@ static struct ucode_cpu_info { static void collect_cpu_info(int cpu_num) { - struct cpuinfo_x86 *c = cpu_data + cpu_num; + struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; @@ -522,7 +522,7 @@ static struct platform_device *microcode_pdev; static int cpu_request_microcode(int cpu) { char name[30]; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; void *buf; unsigned long size; @@ -570,7 +570,7 @@ static int cpu_request_microcode(int cpu) static int apply_microcode_check_cpu(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; unsigned int val[2]; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index e18e516cf549..ee6eba4ecfea 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -112,7 +112,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 94630c66470e..0f0d9d4a6650 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -534,7 +534,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -858,6 +858,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; + c->cpu_index = 0; #endif } @@ -964,6 +965,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in . @@ -1042,8 +1044,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) + if (!cpu_online(c->cpu_index)) return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1051,7 +1054,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1063,7 +1066,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1076,7 +1079,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", cpus_weight(per_cpu(cpu_core_map, cpu))); @@ -1134,12 +1136,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 4974c3d2a0a6..2621ca3b2e4d 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -610,7 +610,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_local_APIC(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index 47bccfc283d9..7b8fdfa169dd 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -89,8 +89,8 @@ EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); /* * The following static array is used during kernel startup @@ -158,9 +158,10 @@ void __init smp_alloc_memory(void) void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; if (id!=0) identify_secondary_cpu(c); /* @@ -302,7 +303,7 @@ static int cpucount; /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map @@ -319,41 +320,41 @@ static cpumask_t cpu_sibling_setup_map; void __cpuinit set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { cpu_set(i, per_cpu(cpu_sibling_map, cpu)); cpu_set(cpu, per_cpu(cpu_sibling_map, i)); cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c[cpu].booted_cores = 1; + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); /* @@ -365,15 +366,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) * the booted_cores for this new cpu */ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c[cpu].booted_cores++; + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -852,7 +853,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); Dprintk("CPU has booted.\n"); } else { boot_error= 1; @@ -969,7 +970,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) */ smp_store_cpu_info(0); /* Final full version of the data */ printk("CPU%d: ", 0); - print_cpu_info(&cpu_data[0]); + print_cpu_info(&cpu_data(0)); boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); @@ -1092,7 +1093,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Before bogomips.\n"); for (cpu = 0; cpu < NR_CPUS; cpu++) if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data[cpu].loops_per_jiffy; + bogosum += cpu_data(cpu).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, @@ -1162,7 +1163,7 @@ void __init native_smp_prepare_boot_cpu(void) void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); @@ -1170,15 +1171,15 @@ void remove_siblinginfo(int cpu) * last thread sibling in this cpu core going down */ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - c[sibling].booted_cores--; + cpu_data(sibling).booted_cores--; } for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); cpus_clear(per_cpu(cpu_sibling_map, cpu)); cpus_clear(per_cpu(cpu_core_map, cpu)); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 7c8f58643186..fd1fff6a35a2 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -84,8 +84,8 @@ cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); /* Set when the idlers are all forked */ int smp_threads_ready; @@ -138,9 +138,10 @@ static unsigned long __cpuinit setup_trampoline(void) static void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; identify_cpu(c); print_cpu_info(c); } @@ -237,7 +238,7 @@ void __cpuinit smp_callin(void) /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map @@ -254,41 +255,41 @@ static cpumask_t cpu_sibling_setup_map; static inline void set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { cpu_set(i, per_cpu(cpu_sibling_map, cpu)); cpu_set(cpu, per_cpu(cpu_sibling_map, i)); cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c[cpu].booted_cores = 1; + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); /* @@ -300,15 +301,15 @@ static inline void set_cpu_sibling_map(int cpu) * the booted_cores for this new cpu */ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c[cpu].booted_cores++; + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -989,7 +990,7 @@ void __init smp_cpus_done(unsigned int max_cpus) static void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); @@ -997,15 +998,15 @@ static void remove_siblinginfo(int cpu) * last thread sibling in this cpu core going down */ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - c[sibling].booted_cores--; + cpu_data(sibling).booted_cores--; } for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); cpus_clear(per_cpu(cpu_sibling_map, cpu)); cpus_clear(per_cpu(cpu_core_map, cpu)); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e87a3939ed40..b8a7cf671432 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -181,8 +181,8 @@ int recalibrate_cpu_khz(void) if (cpu_has_tsc) { cpu_khz = calculate_cpu_khz(); tsc_khz = cpu_khz; - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); return 0; } else @@ -215,7 +215,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) return 0; } ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; cpu_khz_ref = cpu_khz; } @@ -223,7 +223,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || (val == CPUFREQ_RESUMECHANGE)) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = + cpu_data(freq->cpu).loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9f22e542c374..9c70af45b42b 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -73,13 +73,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj, dummy; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data(freq->cpu).loops_per_jiffy; #else lpj = &boot_cpu_data.loops_per_jiffy; #endif diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 4a2c340ab0f3..78f2250963ae 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -291,7 +291,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu) #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index f6edb11364df..952e7a89c2ac 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -82,7 +82,7 @@ inline void __const_udelay(unsigned long xloops) __asm__("mull %0" :"=d" (xloops), "=&a" (d0) :"1" (xloops), "0" - (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); __delay(++xloops); } diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 2dbebd308347..0ebbfb9e7c7f 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -40,7 +40,8 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); + __delay(((xloops * HZ * + cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index e4928aa6bdfb..f93a730b44d0 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -36,8 +36,8 @@ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR /* per CPU data structure (for /proc/cpuinfo et al), visible externally * indexed physically */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU(cpuinfo_x86, cpu_info) __cacheline_aligned; +EXPORT_PER_CPU_SYMBOL(cpu_info); /* physical ID of the CPU used to boot the system */ unsigned char boot_cpu_id; @@ -430,7 +430,7 @@ find_smp_config(void) void __init smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c=&cpu_data[id]; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; @@ -634,7 +634,7 @@ do_boot_cpu(__u8 cpu) cpu, smp_processor_id())); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); wmb(); cpu_set(cpu, cpu_callout_map); cpu_set(cpu, cpu_present_map); @@ -683,7 +683,7 @@ smp_boot_cpus(void) */ smp_store_cpu_info(boot_cpu_id); printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data[boot_cpu_id]); + print_cpu_info(&cpu_data(boot_cpu_id)); if(is_cpu_quad()) { /* booting on a Quad CPU */ @@ -714,7 +714,7 @@ smp_boot_cpus(void) unsigned long bogosum = 0; for (i = 0; i < NR_CPUS; i++) if (cpu_isset(i, cpu_online_map)) - bogosum += cpu_data[i].loops_per_jiffy; + bogosum += cpu_data(i).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, bogosum/(500000/HZ), diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 6f66551d9e51..5c82ec7f8bbd 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -150,7 +150,7 @@ static struct coretemp_data *coretemp_update_device(struct device *dev) static int __devinit coretemp_probe(struct platform_device *pdev) { struct coretemp_data *data; - struct cpuinfo_x86 *c = &(cpu_data)[pdev->id]; + struct cpuinfo_x86 *c = &cpu_data(pdev->id); int err; u32 eax, edx; @@ -359,7 +359,7 @@ static int __init coretemp_init(void) struct pdev_entry *p, *n; /* quick check if we run Intel */ - if (cpu_data[0].x86_vendor != X86_VENDOR_INTEL) + if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL) goto exit; err = platform_driver_register(&coretemp_driver); @@ -367,7 +367,7 @@ static int __init coretemp_init(void) goto exit; for_each_online_cpu(i) { - struct cpuinfo_x86 *c = &(cpu_data)[i]; + struct cpuinfo_x86 *c = &cpu_data(i); /* check if family 6, models e, f, 16 */ if ((c->cpuid_level < 0) || (c->x86 != 0x6) || diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index f17e771e42f8..3330667280b9 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -200,7 +200,7 @@ static u8 find_vrm(u8 eff_family, u8 eff_model, u8 eff_stepping, u8 vendor) u8 vid_which_vrm(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 eax; u8 eff_family, eff_model, eff_stepping, vrm_ret; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index ec1b6cfefcd3..bfc6061f1554 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -136,7 +136,8 @@ static int gameport_measure_speed(struct gameport *gameport) } gameport_close(gameport); - return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); + return (cpu_data(raw_smp_processor_id()).loops_per_jiffy * + (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); #else diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c index 7f3f18d06718..febf09c63492 100644 --- a/drivers/video/geode/video_gx.c +++ b/drivers/video/geode/video_gx.c @@ -127,7 +127,7 @@ static void gx_set_dclk_frequency(struct fb_info *info) int timeout = 1000; /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ - if (cpu_data->x86_mask == 1) { + if (cpu_data(0).x86_mask == 1) { pll_table = gx_pll_table_14MHz; pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); } else { diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h index 565b6fc5f5b4..13976b086837 100644 --- a/include/asm-x86/processor_32.h +++ b/include/asm-x86/processor_32.h @@ -79,6 +79,7 @@ struct cpuinfo_x86 { unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ __u8 cpu_core_id; /* Core id */ + __u8 cpu_index; /* index into per_cpu list */ #endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -103,11 +104,12 @@ extern struct tss_struct doublefault_tss; DECLARE_PER_CPU(struct tss_struct, init_tss); #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif /* diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h index 398c39160fce..e4f19970a82b 100644 --- a/include/asm-x86/processor_64.h +++ b/include/asm-x86/processor_64.h @@ -74,6 +74,7 @@ struct cpuinfo_x86 { __u8 booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical Processor id. */ __u8 cpu_core_id; /* Core id. */ + __u8 cpu_index; /* index into per_cpu list */ #endif } ____cacheline_aligned; @@ -88,11 +89,12 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif extern char ignore_irq13; diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h index ae1074603c4b..9040f5a61278 100644 --- a/include/asm-x86/topology_32.h +++ b/include/asm-x86/topology_32.h @@ -28,8 +28,8 @@ #define _ASM_I386_TOPOLOGY_H #ifdef CONFIG_X86_HT -#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) -#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h index 848c17f92226..74c4e184c0d8 100644 --- a/include/asm-x86/topology_64.h +++ b/include/asm-x86/topology_64.h @@ -56,8 +56,8 @@ extern int __node_distance(int, int); #endif #ifdef CONFIG_SMP -#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) -#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define mc_capable() (boot_cpu_data.x86_max_cores > 1)