linux/arch/x86/include/asm/irq_vectors.h

181 lines
5.0 KiB
C
Raw Normal View History

#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
* There are 256 IDT entries (per CPU - each entry is 8 bytes) which can
* be defined by Linux. They are used as a jump table by the CPU when a
* given vector is triggered - by a CPU-external, CPU-internal or
* software-triggered event.
*
* Linux sets the kernel code address each entry jumps to early during
* bootup, and never changes them. This is the general layout of the
* IDT entries:
*
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
x86-64: Emulate legacy vsyscalls There's a fair amount of code in the vsyscall page. It contains a syscall instruction (in the gettimeofday fallback) and who knows what will happen if an exploit jumps into the middle of some other code. Reduce the risk by replacing the vsyscalls with short magic incantations that cause the kernel to emulate the real vsyscalls. These incantations are useless if entered in the middle. This causes vsyscalls to be a little more expensive than real syscalls. Fortunately sensible programs don't use them. The only exception is time() which is still called by glibc through the vsyscall - but calling time() millions of times per second is not sensible. glibc has this fixed in the development tree. This patch is not perfect: the vread_tsc and vread_hpet functions are still at a fixed address. Fixing that might involve making alternative patching work in the vDSO. Signed-off-by: Andy Lutomirski <luto@mit.edu> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Jesper Juhl <jj@chaosbits.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Jan Beulich <JBeulich@novell.com> Cc: richard -rw- weinberger <richard.weinberger@gmail.com> Cc: Mikael Pettersson <mikpe@it.uu.se> Cc: Andi Kleen <andi@firstfloor.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Louis Rilling <Louis.Rilling@kerlabs.com> Cc: Valdis.Kletnieks@vt.edu Cc: pageexec@freemail.hu Link: http://lkml.kernel.org/r/e64e1b3c64858820d12c48fa739efbd1485e79d5.1307292171.git.luto@mit.edu [ Removed the CONFIG option - it's simpler to just do it unconditionally. Tidied up the code as well. ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-06-06 01:50:24 +08:00
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
x86: Scale up the number of TLB invalidate vectors with NR_CPUs, up to 32 Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-01-17 10:52:07 +08:00
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
* This file enumerates the exact layout of them:
*/
#define NMI_VECTOR 0x02
#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
* (0x80 is the syscall vector, 0x30-0x3f are for ISA)
*/
#define FIRST_EXTERNAL_VECTOR 0x20
/*
* We start allocating at 0x21 to spread out vectors evenly between
* priority levels. (0x80 is the syscall vector)
*/
#define VECTOR_OFFSET_START 1
/*
* Reserve the lowest usable vector (and hence lowest priority) 0x20 for
* triggering cleanup after irq migration. 0x21-0x2f will still be used
* for device interrupts.
*/
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
#define IA32_SYSCALL_VECTOR 0x80
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
#endif
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
* round up to the next 16-vector boundary
*/
#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
*
* some of the following vectors are 'rare', they are merged
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical.
*/
#define SPURIOUS_APIC_VECTOR 0xff
/*
* Sanity check
*/
#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
# error SPURIOUS_APIC_VECTOR definition error
#endif
#define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
#define THRESHOLD_APIC_VECTOR 0xf9
x86: fix panic with interrupts off (needed for MCE) For some time each panic() called with interrupts disabled triggered the !irqs_disabled() WARN_ON in smp_call_function(), producing ugly backtraces and confusing users. This is a common situation with machine checks for example which tend to call panic with interrupts disabled, but will also hit in other situations e.g. panic during early boot. In fact it means that panic cannot be called in many circumstances, which would be bad. This all started with the new fancy queued smp_call_function, which is then used by the shutdown path to shut down the other CPUs. On closer examination it turned out that the fancy RCU smp_call_function() does lots of things not suitable in a panic situation anyways, like allocating memory and relying on complex system state. I originally tried to patch this over by checking for panic there, but it was quite complicated and the original patch was also not very popular. This also didn't fix some of the underlying complexity problems. The new code in post 2.6.29 tries to patch around this by checking for oops_in_progress, but that is not enough to make this fully safe and I don't think that's a real solution because panic has to be reliable. So instead use an own vector to reboot. This makes the reboot code extremly straight forward, which is definitely a big plus in a panic situation where it is important to avoid relying on too much kernel state. The new simple code is also safe to be called from interupts off region because it is very very simple. There can be situations where it is important that panic is reliable. For example on a fatal machine check the panic is needed to get the system up again and running as quickly as possible. So it's important that panic is reliable and all function it calls simple. This is why I came up with this simple vector scheme. It's very hard to beat in simplicity. Vectors are not particularly precious anymore since all big systems are using per CPU vectors. Another possibility would have been to use an NMI similar to kdump, but there is still the problem that NMIs don't work reliably on some systems due to BIOS issues. NMIs would have been able to stop CPUs running with interrupts off too. In the sake of universal reliability I opted for using a non NMI vector for now. I put the reboot vector into the highest priority bucket of the APIC vectors and moved the 64bit UV_BAU message down instead into the next lower priority. [ Impact: bug fix, fixes an old regression ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-05-28 03:56:52 +08:00
#define REBOOT_VECTOR 0xf8
/*
* Generic system vector for platform specific use
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
#define IRQ_WORK_VECTOR 0xf6
#define UV_BAU_MESSAGE 0xf5
x86: fix panic with interrupts off (needed for MCE) For some time each panic() called with interrupts disabled triggered the !irqs_disabled() WARN_ON in smp_call_function(), producing ugly backtraces and confusing users. This is a common situation with machine checks for example which tend to call panic with interrupts disabled, but will also hit in other situations e.g. panic during early boot. In fact it means that panic cannot be called in many circumstances, which would be bad. This all started with the new fancy queued smp_call_function, which is then used by the shutdown path to shut down the other CPUs. On closer examination it turned out that the fancy RCU smp_call_function() does lots of things not suitable in a panic situation anyways, like allocating memory and relying on complex system state. I originally tried to patch this over by checking for panic there, but it was quite complicated and the original patch was also not very popular. This also didn't fix some of the underlying complexity problems. The new code in post 2.6.29 tries to patch around this by checking for oops_in_progress, but that is not enough to make this fully safe and I don't think that's a real solution because panic has to be reliable. So instead use an own vector to reboot. This makes the reboot code extremly straight forward, which is definitely a big plus in a panic situation where it is important to avoid relying on too much kernel state. The new simple code is also safe to be called from interupts off region because it is very very simple. There can be situations where it is important that panic is reliable. For example on a fatal machine check the panic is needed to get the system up again and running as quickly as possible. So it's important that panic is reliable and all function it calls simple. This is why I came up with this simple vector scheme. It's very hard to beat in simplicity. Vectors are not particularly precious anymore since all big systems are using per CPU vectors. Another possibility would have been to use an NMI similar to kdump, but there is still the problem that NMIs don't work reliably on some systems due to BIOS issues. NMIs would have been able to stop CPUs running with interrupts off too. In the sake of universal reliability I opted for using a non NMI vector for now. I put the reboot vector into the highest priority bucket of the APIC vectors and moved the 64bit UV_BAU message down instead into the next lower priority. [ Impact: bug fix, fixes an old regression ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-05-28 03:56:52 +08:00
/* Xen vector callback to receive events in a HVM domain */
#define XEN_HVM_EVTCHN_CALLBACK 0xf3
/*
* Local APIC timer IRQ vector is on a different priority level,
* to work around the 'lost local interrupt if more than 2 IRQ
* sources per level' errata.
*/
#define LOCAL_TIMER_VECTOR 0xef
x86: Scale up the number of TLB invalidate vectors with NR_CPUs, up to 32 Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-01-17 10:52:07 +08:00
/* up to 32 vectors used for spreading out TLB flushes: */
#if NR_CPUS <= 32
# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
x86: Scale up the number of TLB invalidate vectors with NR_CPUs, up to 32 Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-01-17 10:52:07 +08:00
#else
# define NUM_INVALIDATE_TLB_VECTORS (32)
x86: Scale up the number of TLB invalidate vectors with NR_CPUs, up to 32 Make the maxium TLB invalidate vectors depend on NR_CPUS linearly, with a maximum of 32 vectors. We currently only have 8 vectors for TLB invalidation and that is clearly inadequate. If we have a lot of CPUs, the CPUs need share the 8 vectors and tlbstate_lock is used to protect them. flush_tlb_page() is heavily used in page reclaim, which will cause a lot of lock contention for tlbstate_lock. Andi Kleen suggested increasing the vectors number to 32, which should be good for current typical systems to reduce the tlbstate_lock contention. My test system has 4 sockets and 64G memory, and 64 CPUs. My workload creates 64 processes. Each process mmap reads a big empty sparse file. The total size of the files are 2*total_mem, so this will cause a lot of page reclaim. Below is the result I get from perf call-graph profiling: without the patch: ------------------ 24.25% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock | |--42.15%-- native_flush_tlb_others with the patch: ------------------ 14.96% usemem [kernel] [k] _raw_spin_lock | --- _raw_spin_lock |--13.89%-- native_flush_tlb_others So this heavily reduces the tlbstate_lock contention. Suggested-by: Andi Kleen <andi@firstfloor.org> Signed-off-by: Shaohua Li <shaohua.li@intel.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1295232727.1949.709.camel@sli10-conroe> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-01-17 10:52:07 +08:00
#endif
#define INVALIDATE_TLB_VECTOR_END (0xee)
#define INVALIDATE_TLB_VECTOR_START \
(INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
#define NR_VECTORS 256
#define FPU_IRQ 13
#define FIRST_VM86_IRQ 3
#define LAST_VM86_IRQ 15
#ifndef __ASSEMBLY__
static inline int invalid_vm86_irq(int irq)
{
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
}
#endif
/*
* Size the maximum number of interrupts.
*
* If the irq_desc[] array has a sparse layout, we can size things
* generously - it scales up linearly with the maximum number of CPUs,
* and the maximum number of IO-APICs, whichever is higher.
*
* In other cases we size more conservatively, to not create too large
* static arrays.
*/
#define NR_IRQS_LEGACY 16
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
#ifdef CONFIG_X86_IO_APIC
# ifdef CONFIG_SPARSE_IRQ
# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
# define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# else
# define CPU_VECTOR_LIMIT (32 * NR_CPUS)
# define NR_IRQS \
(CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# endif
#else /* !CONFIG_X86_IO_APIC: */
# define NR_IRQS NR_IRQS_LEGACY
#endif
#endif /* _ASM_X86_IRQ_VECTORS_H */