irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains

Enhance Intel interrupt remapping driver to support hierarchical
irqdomains. Implement intel_ir_chip to support stacked irq_chip.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Link: http://lkml.kernel.org/r/1428905519-23704-11-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Jiang Liu 2015-04-13 14:11:32 +08:00 committed by Thomas Gleixner
parent 8dedf4cf5a
commit b106ee63ab
2 changed files with 333 additions and 8 deletions

View File

@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/intel-iommu.h>
#include <linux/acpi.h>
#include <linux/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
#include <asm/cpu.h>
@ -31,6 +32,14 @@ struct hpet_scope {
unsigned int devfn;
};
struct intel_ir_data {
struct irq_2_iommu irq_2_iommu;
struct irte irte_entry;
union {
struct msi_msg msi_entry;
};
};
#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
@ -50,6 +59,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
* the dmar_global_lock.
*/
static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
static struct irq_domain_ops intel_ir_domain_ops;
static int __init parse_ioapics_under_ir(void);
@ -263,7 +273,7 @@ static int free_irte(int irq)
unsigned long flags;
int rc;
if (!irq_iommu)
if (!irq_iommu || irq_iommu->iommu == NULL)
return -1;
raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
@ -488,7 +498,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
INTR_REMAP_PAGE_ORDER);
if (!pages) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
@ -502,11 +511,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
goto out_free_pages;
}
iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(),
0, INTR_REMAP_TABLE_ENTRIES,
NULL, &intel_ir_domain_ops,
iommu);
if (!iommu->ir_domain) {
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
goto out_free_bitmap;
}
iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
ir_table->base = page_address(pages);
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
return 0;
out_free_bitmap:
kfree(bitmap);
out_free_pages:
__free_pages(pages, INTR_REMAP_PAGE_ORDER);
out_free_table:
@ -517,6 +538,14 @@ out_free_table:
static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
{
if (iommu && iommu->ir_table) {
if (iommu->ir_msi_domain) {
irq_domain_remove(iommu->ir_msi_domain);
iommu->ir_msi_domain = NULL;
}
if (iommu->ir_domain) {
irq_domain_remove(iommu->ir_domain);
iommu->ir_domain = NULL;
}
free_pages((unsigned long)iommu->ir_table->base,
INTR_REMAP_PAGE_ORDER);
kfree(iommu->ir_table->bitmap);
@ -1062,12 +1091,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
struct irte irte;
int err;
if (!config_enabled(CONFIG_SMP))
return -EINVAL;
if (!cpumask_intersects(mask, cpu_online_mask))
return -EINVAL;
if (get_irte(irq, &irte))
return -EBUSY;
@ -1100,6 +1123,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
send_cleanup_vector(cfg);
cpumask_copy(data->affinity, mask);
return 0;
}
@ -1205,6 +1229,53 @@ static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
return ret;
}
static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info)
{
struct intel_iommu *iommu = NULL;
if (!info)
return NULL;
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
iommu = map_ioapic_to_ir(info->ioapic_id);
break;
case X86_IRQ_ALLOC_TYPE_HPET:
iommu = map_hpet_to_ir(info->hpet_id);
break;
case X86_IRQ_ALLOC_TYPE_MSI:
case X86_IRQ_ALLOC_TYPE_MSIX:
iommu = map_dev_to_ir(info->msi_dev);
break;
default:
BUG_ON(1);
break;
}
return iommu ? iommu->ir_domain : NULL;
}
static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
{
struct intel_iommu *iommu;
if (!info)
return NULL;
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_MSI:
case X86_IRQ_ALLOC_TYPE_MSIX:
iommu = map_dev_to_ir(info->msi_dev);
if (iommu)
return iommu->ir_msi_domain;
break;
default:
break;
}
return NULL;
}
struct irq_remap_ops intel_irq_remap_ops = {
.prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
@ -1218,6 +1289,256 @@ struct irq_remap_ops intel_irq_remap_ops = {
.msi_alloc_irq = intel_msi_alloc_irq,
.msi_setup_irq = intel_msi_setup_irq,
.alloc_hpet_msi = intel_alloc_hpet_msi,
.get_ir_irq_domain = intel_get_ir_irq_domain,
.get_irq_domain = intel_get_irq_domain,
};
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
*
* For both level and edge triggered, irq migration is a simple atomic
* update(of vector and cpu destination) of IRTE and flush the hardware cache.
*
* For level triggered, we eliminate the io-apic RTE modification (with the
* updated vector information), by using a virtual vector (io-apic pin number).
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
*
* As the migration is a simple atomic update of IRTE, the same mechanism
* is used to migrate MSI irq's in the presence of interrupt-remapping.
*/
static int
intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
struct intel_ir_data *ir_data = data->chip_data;
struct irte *irte = &ir_data->irte_entry;
struct irq_cfg *cfg = irqd_cfg(data);
struct irq_data *parent = data->parent_data;
int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
/*
* Atomically updates the IRTE with the new destination, vector
* and flushes the interrupt entry cache.
*/
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
modify_irte(&ir_data->irq_2_iommu, irte);
/*
* After this point, all the interrupts will start arriving
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return IRQ_SET_MASK_OK_DONE;
}
static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
struct msi_msg *msg)
{
struct intel_ir_data *ir_data = irq_data->chip_data;
*msg = ir_data->msi_entry;
}
static struct irq_chip intel_ir_chip = {
.irq_ack = ir_ack_apic_edge,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
};
static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
struct irq_cfg *irq_cfg,
struct irq_alloc_info *info,
int index, int sub_handle)
{
struct IR_IO_APIC_route_entry *entry;
struct irte *irte = &data->irte_entry;
struct msi_msg *msg = &data->msi_entry;
prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid);
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
/* Set source-id of interrupt request */
set_ioapic_sid(irte, info->ioapic_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set IRTE entry (P:%d FPD:%d Dst_Mode:%d Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X Avail:%X Vector:%02X Dest:%08X SID:%04X SQ:%X SVT:%X)\n",
info->ioapic_id, irte->present, irte->fpd,
irte->dst_mode, irte->redir_hint,
irte->trigger_mode, irte->dlvry_mode,
irte->avail, irte->vector, irte->dest_id,
irte->sid, irte->sq, irte->svt);
entry = (struct IR_IO_APIC_route_entry *)info->ioapic_entry;
info->ioapic_entry = NULL;
memset(entry, 0, sizeof(*entry));
entry->index2 = (index >> 15) & 0x1;
entry->zero = 0;
entry->format = 1;
entry->index = (index & 0x7fff);
/*
* IO-APIC RTE will be configured with virtual vector.
* irq handler will do the explicit EOI to the io-apic.
*/
entry->vector = info->ioapic_pin;
entry->mask = 0; /* enable IRQ */
entry->trigger = info->ioapic_trigger;
entry->polarity = info->ioapic_polarity;
if (info->ioapic_trigger)
entry->mask = 1; /* Mask level triggered irqs. */
break;
case X86_IRQ_ALLOC_TYPE_HPET:
case X86_IRQ_ALLOC_TYPE_MSI:
case X86_IRQ_ALLOC_TYPE_MSIX:
if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
set_hpet_sid(irte, info->hpet_id);
else
set_msi_sid(irte, info->msi_dev);
msg->address_hi = MSI_ADDR_BASE_HI;
msg->data = sub_handle;
msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
MSI_ADDR_IR_SHV |
MSI_ADDR_IR_INDEX1(index) |
MSI_ADDR_IR_INDEX2(index);
break;
default:
BUG_ON(1);
break;
}
}
static void intel_free_irq_resources(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *irq_data;
struct intel_ir_data *data;
struct irq_2_iommu *irq_iommu;
unsigned long flags;
int i;
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
if (irq_data && irq_data->chip_data) {
data = irq_data->chip_data;
irq_iommu = &data->irq_2_iommu;
raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
clear_entries(irq_iommu);
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
irq_domain_reset_irq_data(irq_data);
kfree(data);
}
}
}
static int intel_irq_remapping_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *arg)
{
struct intel_iommu *iommu = domain->host_data;
struct irq_alloc_info *info = arg;
struct intel_ir_data *data;
struct irq_data *irq_data;
struct irq_cfg *irq_cfg;
int i, ret, index;
if (!info || !iommu)
return -EINVAL;
if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
info->type != X86_IRQ_ALLOC_TYPE_MSIX)
return -EINVAL;
/*
* With IRQ remapping enabled, don't need contiguous CPU vectors
* to support multiple MSI interrupts.
*/
if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out_free_parent;
down_read(&dmar_global_lock);
index = alloc_irte(iommu, virq, &data->irq_2_iommu, nr_irqs);
up_read(&dmar_global_lock);
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
kfree(data);
goto out_free_parent;
}
for (i = 0; i < nr_irqs; i++) {
irq_data = irq_domain_get_irq_data(domain, virq + i);
irq_cfg = irqd_cfg(irq_data);
if (!irq_data || !irq_cfg) {
ret = -EINVAL;
goto out_free_data;
}
if (i > 0) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out_free_data;
}
irq_data->hwirq = (index << 16) + i;
irq_data->chip_data = data;
irq_data->chip = &intel_ir_chip;
intel_irq_remapping_prepare_irte(data, irq_cfg, info, index, i);
irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
return 0;
out_free_data:
intel_free_irq_resources(domain, virq, i);
out_free_parent:
irq_domain_free_irqs_common(domain, virq, nr_irqs);
return ret;
}
static void intel_irq_remapping_free(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
intel_free_irq_resources(domain, virq, nr_irqs);
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}
static void intel_irq_remapping_activate(struct irq_domain *domain,
struct irq_data *irq_data)
{
struct intel_ir_data *data = irq_data->chip_data;
modify_irte(&data->irq_2_iommu, &data->irte_entry);
}
static void intel_irq_remapping_deactivate(struct irq_domain *domain,
struct irq_data *irq_data)
{
struct intel_ir_data *data = irq_data->chip_data;
struct irte entry;
memset(&entry, 0, sizeof(entry));
modify_irte(&data->irq_2_iommu, &entry);
}
static struct irq_domain_ops intel_ir_domain_ops = {
.alloc = intel_irq_remapping_alloc,
.free = intel_irq_remapping_free,
.activate = intel_irq_remapping_activate,
.deactivate = intel_irq_remapping_deactivate,
};
/*

View File

@ -286,6 +286,8 @@ struct q_inval {
#define INTR_REMAP_TABLE_ENTRIES 65536
struct irq_domain;
struct ir_table {
struct irte *base;
unsigned long *bitmap;
@ -335,6 +337,8 @@ struct intel_iommu {
#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
struct irq_domain *ir_domain;
struct irq_domain *ir_msi_domain;
#endif
struct device *iommu_dev; /* IOMMU-sysfs device */
int node;