mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-21 21:34:58 +08:00
2cffad7bad
Before a CPU is taken offline the number of active interrupt vectors on the outgoing CPU and the number of vectors which are available on the other online CPUs are counted and compared. If the active vectors are more than the available vectors on the other CPUs then the CPU hot-unplug operation is aborted. This again uses loop based search and is inaccurate. The bitmap matrix allocator has accurate accounting information and can tell exactly whether the vector space is sufficient or not. Emit a message when the number of globaly reserved (unallocated) vectors is larger than the number of available vectors after offlining a CPU because after that point request_irq() might fail. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Juergen Gross <jgross@suse.com> Tested-by: Yu Chen <yu.c.chen@intel.com> Acked-by: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rui Zhang <rui.zhang@intel.com> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Len Brown <lenb@kernel.org> Link: https://lkml.kernel.org/r/20170913213156.351193962@linutronix.de
384 lines
11 KiB
C
384 lines
11 KiB
C
/*
|
|
* Common interrupt code for 32 and 64 bit
|
|
*/
|
|
#include <linux/cpu.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/of.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/export.h>
|
|
|
|
#include <asm/apic.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/irq.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/hw_irq.h>
|
|
#include <asm/desc.h>
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <asm/trace/irq_vectors.h>
|
|
|
|
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
|
|
EXPORT_PER_CPU_SYMBOL(irq_stat);
|
|
|
|
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
|
|
EXPORT_PER_CPU_SYMBOL(irq_regs);
|
|
|
|
atomic_t irq_err_count;
|
|
|
|
/*
|
|
* 'what should we do if we get a hw irq event on an illegal vector'.
|
|
* each architecture has to answer this themselves.
|
|
*/
|
|
void ack_bad_irq(unsigned int irq)
|
|
{
|
|
if (printk_ratelimit())
|
|
pr_err("unexpected IRQ trap at vector %02x\n", irq);
|
|
|
|
/*
|
|
* Currently unexpected vectors happen only on SMP and APIC.
|
|
* We _must_ ack these because every local APIC has only N
|
|
* irq slots per priority level, and a 'hanging, unacked' IRQ
|
|
* holds up an irq slot - in excessive cases (when multiple
|
|
* unexpected vectors occur) that might lock up the APIC
|
|
* completely.
|
|
* But only ack when the APIC is enabled -AK
|
|
*/
|
|
ack_APIC_irq();
|
|
}
|
|
|
|
#define irq_stats(x) (&per_cpu(irq_stat, x))
|
|
/*
|
|
* /proc/interrupts printing for arch specific interrupts
|
|
*/
|
|
int arch_show_interrupts(struct seq_file *p, int prec)
|
|
{
|
|
int j;
|
|
|
|
seq_printf(p, "%*s: ", prec, "NMI");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
|
|
seq_puts(p, " Non-maskable interrupts\n");
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
seq_printf(p, "%*s: ", prec, "LOC");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
|
|
seq_puts(p, " Local timer interrupts\n");
|
|
|
|
seq_printf(p, "%*s: ", prec, "SPU");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
|
|
seq_puts(p, " Spurious interrupts\n");
|
|
seq_printf(p, "%*s: ", prec, "PMI");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
|
|
seq_puts(p, " Performance monitoring interrupts\n");
|
|
seq_printf(p, "%*s: ", prec, "IWI");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
|
|
seq_puts(p, " IRQ work interrupts\n");
|
|
seq_printf(p, "%*s: ", prec, "RTR");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
|
|
seq_puts(p, " APIC ICR read retries\n");
|
|
if (x86_platform_ipi_callback) {
|
|
seq_printf(p, "%*s: ", prec, "PLT");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
|
|
seq_puts(p, " Platform interrupts\n");
|
|
}
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
seq_printf(p, "%*s: ", prec, "RES");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
|
|
seq_puts(p, " Rescheduling interrupts\n");
|
|
seq_printf(p, "%*s: ", prec, "CAL");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
|
seq_puts(p, " Function call interrupts\n");
|
|
seq_printf(p, "%*s: ", prec, "TLB");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
|
|
seq_puts(p, " TLB shootdowns\n");
|
|
#endif
|
|
#ifdef CONFIG_X86_THERMAL_VECTOR
|
|
seq_printf(p, "%*s: ", prec, "TRM");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
|
|
seq_puts(p, " Thermal event interrupts\n");
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE_THRESHOLD
|
|
seq_printf(p, "%*s: ", prec, "THR");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
|
seq_puts(p, " Threshold APIC interrupts\n");
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE_AMD
|
|
seq_printf(p, "%*s: ", prec, "DFR");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
|
|
seq_puts(p, " Deferred Error APIC interrupts\n");
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE
|
|
seq_printf(p, "%*s: ", prec, "MCE");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
|
|
seq_puts(p, " Machine check exceptions\n");
|
|
seq_printf(p, "%*s: ", prec, "MCP");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
|
|
seq_puts(p, " Machine check polls\n");
|
|
#endif
|
|
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
|
|
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
|
|
seq_printf(p, "%*s: ", prec, "HYP");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ",
|
|
irq_stats(j)->irq_hv_callback_count);
|
|
seq_puts(p, " Hypervisor callback interrupts\n");
|
|
}
|
|
#endif
|
|
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
|
|
#endif
|
|
#ifdef CONFIG_HAVE_KVM
|
|
seq_printf(p, "%*s: ", prec, "PIN");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
|
|
seq_puts(p, " Posted-interrupt notification event\n");
|
|
|
|
seq_printf(p, "%*s: ", prec, "NPI");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ",
|
|
irq_stats(j)->kvm_posted_intr_nested_ipis);
|
|
seq_puts(p, " Nested posted-interrupt event\n");
|
|
|
|
seq_printf(p, "%*s: ", prec, "PIW");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ",
|
|
irq_stats(j)->kvm_posted_intr_wakeup_ipis);
|
|
seq_puts(p, " Posted-interrupt wakeup event\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* /proc/stat helpers
|
|
*/
|
|
u64 arch_irq_stat_cpu(unsigned int cpu)
|
|
{
|
|
u64 sum = irq_stats(cpu)->__nmi_count;
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
sum += irq_stats(cpu)->apic_timer_irqs;
|
|
sum += irq_stats(cpu)->irq_spurious_count;
|
|
sum += irq_stats(cpu)->apic_perf_irqs;
|
|
sum += irq_stats(cpu)->apic_irq_work_irqs;
|
|
sum += irq_stats(cpu)->icr_read_retry_count;
|
|
if (x86_platform_ipi_callback)
|
|
sum += irq_stats(cpu)->x86_platform_ipis;
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
sum += irq_stats(cpu)->irq_resched_count;
|
|
sum += irq_stats(cpu)->irq_call_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_THERMAL_VECTOR
|
|
sum += irq_stats(cpu)->irq_thermal_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE_THRESHOLD
|
|
sum += irq_stats(cpu)->irq_threshold_count;
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE
|
|
sum += per_cpu(mce_exception_count, cpu);
|
|
sum += per_cpu(mce_poll_count, cpu);
|
|
#endif
|
|
return sum;
|
|
}
|
|
|
|
u64 arch_irq_stat(void)
|
|
{
|
|
u64 sum = atomic_read(&irq_err_count);
|
|
return sum;
|
|
}
|
|
|
|
|
|
/*
|
|
* do_IRQ handles all normal device IRQ's (the special
|
|
* SMP cross-CPU interrupts have their own specific
|
|
* handlers).
|
|
*/
|
|
__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
struct irq_desc * desc;
|
|
/* high bit used in ret_from_ code */
|
|
unsigned vector = ~regs->orig_ax;
|
|
|
|
/*
|
|
* NB: Unlike exception entries, IRQ entries do not reliably
|
|
* handle context tracking in the low-level entry code. This is
|
|
* because syscall entries execute briefly with IRQs on before
|
|
* updating context tracking state, so we can take an IRQ from
|
|
* kernel mode with CONTEXT_USER. The low-level entry code only
|
|
* updates the context if we came from user mode, so we won't
|
|
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
|
* code is cleaned up enough that we can cleanly defer enabling
|
|
* IRQs.
|
|
*/
|
|
|
|
entering_irq();
|
|
|
|
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
|
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
|
|
|
|
desc = __this_cpu_read(vector_irq[vector]);
|
|
|
|
if (!handle_irq(desc, regs)) {
|
|
ack_APIC_irq();
|
|
|
|
if (desc != VECTOR_RETRIGGERED) {
|
|
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
|
|
__func__, smp_processor_id(),
|
|
vector);
|
|
} else {
|
|
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
|
}
|
|
}
|
|
|
|
exiting_irq();
|
|
|
|
set_irq_regs(old_regs);
|
|
return 1;
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
/* Function pointer for generic interrupt vector handling */
|
|
void (*x86_platform_ipi_callback)(void) = NULL;
|
|
/*
|
|
* Handler for X86_PLATFORM_IPI_VECTOR.
|
|
*/
|
|
__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_ack_irq();
|
|
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
|
|
inc_irq_stat(x86_platform_ipis);
|
|
if (x86_platform_ipi_callback)
|
|
x86_platform_ipi_callback();
|
|
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HAVE_KVM
|
|
static void dummy_handler(void) {}
|
|
static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
|
|
|
|
void kvm_set_posted_intr_wakeup_handler(void (*handler)(void))
|
|
{
|
|
if (handler)
|
|
kvm_posted_intr_wakeup_handler = handler;
|
|
else
|
|
kvm_posted_intr_wakeup_handler = dummy_handler;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler);
|
|
|
|
/*
|
|
* Handler for POSTED_INTERRUPT_VECTOR.
|
|
*/
|
|
__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_ack_irq();
|
|
inc_irq_stat(kvm_posted_intr_ipis);
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
/*
|
|
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
|
|
*/
|
|
__visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_ack_irq();
|
|
inc_irq_stat(kvm_posted_intr_wakeup_ipis);
|
|
kvm_posted_intr_wakeup_handler();
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
/*
|
|
* Handler for POSTED_INTERRUPT_NESTED_VECTOR.
|
|
*/
|
|
__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
entering_ack_irq();
|
|
inc_irq_stat(kvm_posted_intr_nested_ipis);
|
|
exiting_irq();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
|
|
void fixup_irqs(void)
|
|
{
|
|
unsigned int irr, vector;
|
|
struct irq_desc *desc;
|
|
struct irq_data *data;
|
|
struct irq_chip *chip;
|
|
|
|
irq_migrate_all_off_this_cpu();
|
|
|
|
/*
|
|
* We can remove mdelay() and then send spuriuous interrupts to
|
|
* new cpu targets for all the irqs that were handled previously by
|
|
* this cpu. While it works, I have seen spurious interrupt messages
|
|
* (nothing wrong but still...).
|
|
*
|
|
* So for now, retain mdelay(1) and check the IRR and then send those
|
|
* interrupts to new targets as this cpu is already offlined...
|
|
*/
|
|
mdelay(1);
|
|
|
|
/*
|
|
* We can walk the vector array of this cpu without holding
|
|
* vector_lock because the cpu is already marked !online, so
|
|
* nothing else will touch it.
|
|
*/
|
|
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
|
if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
|
|
continue;
|
|
|
|
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
|
if (irr & (1 << (vector % 32))) {
|
|
desc = __this_cpu_read(vector_irq[vector]);
|
|
|
|
raw_spin_lock(&desc->lock);
|
|
data = irq_desc_get_irq_data(desc);
|
|
chip = irq_data_get_irq_chip(data);
|
|
if (chip->irq_retrigger) {
|
|
chip->irq_retrigger(data);
|
|
__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
|
|
}
|
|
raw_spin_unlock(&desc->lock);
|
|
}
|
|
if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED)
|
|
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
|
|
}
|
|
}
|
|
#endif
|