From d87beb749281404b4b4919930b1cc6352e3746f2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 31 May 2017 18:52:29 +0100 Subject: [PATCH 01/87] iommu/of: Handle PCI aliases properly When a PCI device has DMA quirks, we need to ensure that an upstream IOMMU knows about all possible aliases, since the presence of a DMA quirk does not preclude the device still also emitting transactions (e.g. MSIs) on its 'real' RID. Similarly, the rules for bridge aliasing are relatively complex, and some bridges may only take ownership of transactions under particular transient circumstances, leading again to multiple RIDs potentially being seen at the IOMMU for the given device. Take all this into account in the OF code by translating every RID produced by the alias walk, not just whichever one comes out last. Happily, this also makes things tidy enough that we can reduce the number of both total lines of code, and confusing levels of indirection, by pulling the "iommus"/"iommu-map" parsing helpers back in-line again. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 104 ++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 57 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 8cb60829a7a1..be8ac1ddec06 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -140,75 +140,39 @@ static const struct iommu_ops return ops; } -static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) -{ - struct of_phandle_args *iommu_spec = data; +struct of_pci_iommu_alias_info { + struct device *dev; + struct device_node *np; +}; - iommu_spec->args[0] = alias; - return iommu_spec->np == pdev->bus->dev.of_node; -} - -static const struct iommu_ops -*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np) +static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) { + struct of_pci_iommu_alias_info *info = data; const struct iommu_ops *ops; - struct of_phandle_args iommu_spec; + struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; - /* - * Start by tracing the RID alias down the PCI topology as - * far as the host bridge whose OF node we have... - * (we're not even attempting to handle multi-alias devices yet) - */ - iommu_spec.args_count = 1; - iommu_spec.np = bridge_np; - pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec); - /* - * ...then find out what that becomes once it escapes the PCI - * bus into the system beyond, and which IOMMU it ends up at. - */ - iommu_spec.np = NULL; - err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", + err = of_pci_map_rid(info->np, alias, "iommu-map", "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? NULL : ERR_PTR(err); - - ops = of_iommu_xlate(&pdev->dev, &iommu_spec); + return err == -ENODEV ? 1 : err; + ops = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - return ops; -} -static const struct iommu_ops -*of_platform_iommu_init(struct device *dev, struct device_node *np) -{ - struct of_phandle_args iommu_spec; - const struct iommu_ops *ops = NULL; - int idx = 0; + if (IS_ERR(ops)) + return PTR_ERR(ops); - /* - * We don't currently walk up the tree looking for a parent IOMMU. - * See the `Notes:' section of - * Documentation/devicetree/bindings/iommu/iommu.txt - */ - while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", - idx, &iommu_spec)) { - ops = of_iommu_xlate(dev, &iommu_spec); - of_node_put(iommu_spec.np); - idx++; - if (IS_ERR_OR_NULL(ops)) - break; - } - - return ops; + return info->np == pdev->bus->dev.of_node; } const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { - const struct iommu_ops *ops; + const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec = dev->iommu_fwspec; + int err; if (!master_np) return NULL; @@ -221,18 +185,44 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, iommu_fwspec_free(dev); } - if (dev_is_pci(dev)) - ops = of_pci_iommu_init(to_pci_dev(dev), master_np); - else - ops = of_platform_iommu_init(dev, master_np); + /* + * We don't currently walk up the tree looking for a parent IOMMU. + * See the `Notes:' section of + * Documentation/devicetree/bindings/iommu/iommu.txt + */ + if (dev_is_pci(dev)) { + struct of_pci_iommu_alias_info info = { + .dev = dev, + .np = master_np, + }; + + err = pci_for_each_dma_alias(to_pci_dev(dev), + of_pci_iommu_init, &info); + if (err) /* err > 0 means the walk stopped, but non-fatally */ + ops = ERR_PTR(min(err, 0)); + else /* success implies both fwspec and ops are now valid */ + ops = dev->iommu_fwspec->ops; + } else { + struct of_phandle_args iommu_spec; + int idx = 0; + + while (!of_parse_phandle_with_args(master_np, "iommus", + "#iommu-cells", + idx, &iommu_spec)) { + ops = of_iommu_xlate(dev, &iommu_spec); + of_node_put(iommu_spec.np); + idx++; + if (IS_ERR_OR_NULL(ops)) + break; + } + } /* * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus && !dev->iommu_group) { - int err = ops->add_device(dev); - + err = ops->add_device(dev); if (err) ops = ERR_PTR(err); } From bc24c57159fac5f7e8df22ce3bc8069578684763 Mon Sep 17 00:00:00 2001 From: David Dillow Date: Wed, 28 Jun 2017 19:42:23 -0700 Subject: [PATCH 02/87] iommu/vt-d: Don't free parent pagetable of the PTE we're adding When adding a large scatterlist entry that covers more than the L3 superpage size (1GB) but has an alignment such that we must use L2 superpages (2MB) , we give dma_pte_free_level() a range that causes it to free the L3 pagetable we're about to populate. We fix this by telling dma_pte_free_pagetable() about the pagetable level we're about to populate to prevent freeing it. For example, mapping a scatterlist with entry lengths 854MB and 1194MB at IOVA 0xffff80000000 would, when processing the 2MB-aligned second entry, cause pfn_to_dma_pte() to create a L3 directory to hold L2 superpages for the mapping at IOVA 0xffffc0000000. We would previously call dma_pte_free_pagetable(domain, 0xffffc0000, 0xfffffffff), which would free the L3 directory pfn_to_dma_pte() just created for IO PFN 0xffffc0000. Telling dma_pte_free_pagetable() to retain the L3 directories while using L2 superpages avoids the erroneous free. Signed-off-by: David Dillow Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 38 +++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1a79a4ec6f09..bffc880f3fef 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1137,8 +1137,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain, } static void dma_pte_free_level(struct dmar_domain *domain, int level, - struct dma_pte *pte, unsigned long pfn, - unsigned long start_pfn, unsigned long last_pfn) + int retain_level, struct dma_pte *pte, + unsigned long pfn, unsigned long start_pfn, + unsigned long last_pfn) { pfn = max(start_pfn, pfn); pte = &pte[pfn_level_offset(pfn, level)]; @@ -1153,12 +1154,17 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, level_pfn = pfn & level_mask(level); level_pte = phys_to_virt(dma_pte_addr(pte)); - if (level > 2) - dma_pte_free_level(domain, level - 1, level_pte, - level_pfn, start_pfn, last_pfn); + if (level > 2) { + dma_pte_free_level(domain, level - 1, retain_level, + level_pte, level_pfn, start_pfn, + last_pfn); + } - /* If range covers entire pagetable, free it */ - if (!(start_pfn > level_pfn || + /* + * Free the page table if we're below the level we want to + * retain and the range covers the entire table. + */ + if (level < retain_level && !(start_pfn > level_pfn || last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); @@ -1169,10 +1175,14 @@ next: } while (!first_pte_in_page(++pte) && pfn <= last_pfn); } -/* clear last level (leaf) ptes and free page table pages. */ +/* + * clear last level (leaf) ptes and free page table pages below the + * level we wish to keep intact. + */ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, - unsigned long last_pfn) + unsigned long last_pfn, + int retain_level) { BUG_ON(!domain_pfn_supported(domain, start_pfn)); BUG_ON(!domain_pfn_supported(domain, last_pfn)); @@ -1181,7 +1191,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, dma_pte_clear_range(domain, start_pfn, last_pfn); /* We don't need lock here; nobody else touches the iova range */ - dma_pte_free_level(domain, agaw_to_level(domain->agaw), + dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level, domain->pgd, 0, start_pfn, last_pfn); /* free pgd */ @@ -2274,8 +2284,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* * Ensure that old small page tables are * removed to make room for superpage(s). + * We're adding new large pages, so make sure + * we don't remove their parent tables. */ - dma_pte_free_pagetable(domain, iov_pfn, end_pfn); + dma_pte_free_pagetable(domain, iov_pfn, end_pfn, + largepage_lvl + 1); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -3935,7 +3948,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); if (unlikely(ret)) { dma_pte_free_pagetable(domain, start_vpfn, - start_vpfn + size - 1); + start_vpfn + size - 1, + agaw_to_level(domain->agaw) + 1); free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); return 0; } From ebae3e830a991116526646f09dd67f68f332b330 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 5 Jul 2017 20:27:53 +0300 Subject: [PATCH 03/87] iommu: Correct iommu_map / iommu_unmap prototypes Commit 7d3002cc8c16 ("iommu/core: split mapping to page sizes as supported by the hardware") replaced 'int gfp_order' with a 'size_t size' of iommu_map / iommu_unmap function arguments, but missed the function prototypes for the disabled CONFIG_IOMMU_API case, let's correct them for consistency. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..f1ce8e517d8d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -425,13 +425,13 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot) + phys_addr_t paddr, size_t size, int prot) { return -ENODEV; } static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, - int gfp_order) + size_t size) { return -ENODEV; } From 1f59adb1766d0261085327d97ec185f502d359ab Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jul 2017 07:47:02 +0200 Subject: [PATCH 04/87] iommu/exynos: Replace non-existing big-endian Kconfig option Wrong Kconfig option was used when adding warning for untested big-endian capabilities. There is no CONFIG_BIG_ENDIAN option. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 2395478dde75..b7aebaf28b82 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -54,7 +54,7 @@ typedef u32 sysmmu_pte_t; #define lv2ent_small(pent) ((*(pent) & 2) == 2) #define lv2ent_large(pent) ((*(pent) & 3) == 1) -#ifdef CONFIG_BIG_ENDIAN +#ifdef CONFIG_CPU_BIG_ENDIAN #warning "revisit driver if we can enable big-endian ptes" #endif From 01da21e5624f9e880e6983273091cc9b68a925d9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 17 Jul 2017 22:05:10 +0900 Subject: [PATCH 05/87] iommu/ipmmu-vmsa: Use iommu_device_register()/unregister() Extend the driver to make use of iommu_device_register()/unregister() functions together with iommu_device_set_ops() and iommu_set_fwnode(). These used to be part of the earlier posted 64-bit ARM (r8a7795) series but it turns out that these days they are required on 32-bit ARM as well. Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2a38aa15be17..b87cafd77de0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -35,6 +35,7 @@ struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; + struct iommu_device iommu; struct list_head list; unsigned int num_utlbs; @@ -1054,6 +1055,13 @@ static int ipmmu_probe(struct platform_device *pdev) ipmmu_device_reset(mmu); + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); + iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); + + ret = iommu_device_register(&mmu->iommu); + if (ret) + return ret; + /* * We can't create the ARM mapping here as it requires the bus to have * an IOMMU, which only happens when bus_set_iommu() is called in @@ -1077,6 +1085,8 @@ static int ipmmu_remove(struct platform_device *pdev) list_del(&mmu->list); spin_unlock(&ipmmu_devices_lock); + iommu_device_unregister(&mmu->iommu); + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) arm_iommu_release_mapping(mmu->mapping); #endif From 49558da030d5559b6d80a052f4c51a69f89c31ac Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 17 Jul 2017 22:05:20 +0900 Subject: [PATCH 06/87] iommu/ipmmu-vmsa: Consistent ->of_xlate() handling The 32-bit ARM code gets updated to make use of ->of_xlate() and the code is shared between 64-bit and 32-bit ARM. The of_device_is_available() check gets dropped since it is included in of_iommu_xlate(). Suggested-by: Robin Murphy Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 51 +++++++++++++------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index b87cafd77de0..a0d752cc0470 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -734,6 +734,16 @@ error: return ret; } +static int ipmmu_of_xlate(struct device *dev, + struct of_phandle_args *spec) +{ + /* Initialize once - xlate() will call multiple times */ + if (to_priv(dev)) + return 0; + + return ipmmu_init_platform_device(dev); +} + #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) static struct iommu_domain *ipmmu_domain_alloc(unsigned type) @@ -750,11 +760,11 @@ static int ipmmu_add_device(struct device *dev) struct iommu_group *group; int ret; - if (to_priv(dev)) { - dev_warn(dev, "IOMMU driver already assigned to device %s\n", - dev_name(dev)); - return -EINVAL; - } + /* + * Only let through devices that have been verified in xlate() + */ + if (!to_priv(dev)) + return -ENODEV; /* Create a device group and add the device to it. */ group = iommu_group_alloc(); @@ -773,10 +783,6 @@ static int ipmmu_add_device(struct device *dev) goto error; } - ret = ipmmu_init_platform_device(dev); - if (ret < 0) - goto error; - /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. @@ -817,24 +823,13 @@ error: if (!IS_ERR_OR_NULL(group)) iommu_group_remove_device(dev); - kfree(to_priv(dev)->utlbs); - kfree(to_priv(dev)); - set_priv(dev, NULL); - return ret; } static void ipmmu_remove_device(struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); - arm_iommu_detach_device(dev); iommu_group_remove_device(dev); - - kfree(priv->utlbs); - kfree(priv); - - set_priv(dev, NULL); } static const struct iommu_ops ipmmu_ops = { @@ -849,6 +844,7 @@ static const struct iommu_ops ipmmu_ops = { .add_device = ipmmu_add_device, .remove_device = ipmmu_remove_device, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + .of_xlate = ipmmu_of_xlate, }; #endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */ @@ -958,19 +954,6 @@ static struct iommu_group *ipmmu_find_group_dma(struct device *dev) return group; } -static int ipmmu_of_xlate_dma(struct device *dev, - struct of_phandle_args *spec) -{ - /* If the IPMMU device is disabled in DT then return error - * to make sure the of_iommu code does not install ops - * even though the iommu device is disabled - */ - if (!of_device_is_available(spec->np)) - return -ENODEV; - - return ipmmu_init_platform_device(dev); -} - static const struct iommu_ops ipmmu_ops = { .domain_alloc = ipmmu_domain_alloc_dma, .domain_free = ipmmu_domain_free_dma, @@ -984,7 +967,7 @@ static const struct iommu_ops ipmmu_ops = { .remove_device = ipmmu_remove_device_dma, .device_group = ipmmu_find_group_dma, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, - .of_xlate = ipmmu_of_xlate_dma, + .of_xlate = ipmmu_of_xlate, }; #endif /* CONFIG_IOMMU_DMA */ From 3c49ed322b10a0a57a4695e2762ddd0efaf9ca91 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 17 Jul 2017 22:05:31 +0900 Subject: [PATCH 07/87] iommu/ipmmu-vmsa: Use fwspec on both 32 and 64-bit ARM Consolidate the 32-bit and 64-bit code to make use of fwspec instead of archdata for the 32-bit ARM case. This is a simplified version of the fwspec handling code from Robin posted as [PATCH] iommu/ipmmu-vmsa: Convert to iommu_fwspec Signed-off-by: Robin Murphy Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a0d752cc0470..6bad9e019dc3 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -73,22 +73,9 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) return container_of(dom, struct ipmmu_vmsa_domain, io_domain); } - static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev) { -#if defined(CONFIG_ARM) - return dev->archdata.iommu; -#else - return dev->iommu_fwspec->iommu_priv; -#endif -} -static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p) -{ -#if defined(CONFIG_ARM) - dev->archdata.iommu = p; -#else - dev->iommu_fwspec->iommu_priv = p; -#endif + return dev->iommu_fwspec ? dev->iommu_fwspec->iommu_priv : NULL; } #define TLB_LOOP_TIMEOUT 100 /* 100us */ @@ -726,7 +713,7 @@ static int ipmmu_init_platform_device(struct device *dev) priv->utlbs = utlbs; priv->num_utlbs = num_utlbs; priv->dev = dev; - set_priv(dev, priv); + dev->iommu_fwspec->iommu_priv = priv; return 0; error: @@ -887,14 +874,12 @@ static void ipmmu_domain_free_dma(struct iommu_domain *io_domain) static int ipmmu_add_device_dma(struct device *dev) { - struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct iommu_group *group; /* * Only let through devices that have been verified in xlate() - * We may get called with dev->iommu_fwspec set to NULL. */ - if (!fwspec || !fwspec->iommu_priv) + if (!to_priv(dev)) return -ENODEV; group = iommu_group_get_for_dev(dev); From 7b2d59611fef21211f4988b8f34b3c80d033f1e5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 17 Jul 2017 22:05:41 +0900 Subject: [PATCH 08/87] iommu/ipmmu-vmsa: Replace local utlb code with fwspec ids Now when both 32-bit and 64-bit code inside the driver is using fwspec it is possible to replace the utlb handling with fwspec ids that get populated from ->of_xlate(). Suggested-by: Robin Murphy Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 104 +++++++------------------------------ 1 file changed, 19 insertions(+), 85 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 6bad9e019dc3..47cbf70e4e19 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -59,8 +60,6 @@ struct ipmmu_vmsa_domain { struct ipmmu_vmsa_iommu_priv { struct ipmmu_vmsa_device *mmu; - unsigned int *utlbs; - unsigned int num_utlbs; struct device *dev; struct list_head list; }; @@ -550,13 +549,14 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, struct device *dev) { struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct ipmmu_vmsa_device *mmu = priv->mmu; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned long flags; unsigned int i; int ret = 0; - if (!mmu) { + if (!priv || !priv->mmu) { dev_err(dev, "Cannot attach to IPMMU\n"); return -ENXIO; } @@ -583,8 +583,8 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, if (ret < 0) return ret; - for (i = 0; i < priv->num_utlbs; ++i) - ipmmu_utlb_enable(domain, priv->utlbs[i]); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_enable(domain, fwspec->ids[i]); return 0; } @@ -592,12 +592,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, static void ipmmu_detach_device(struct iommu_domain *io_domain, struct device *dev) { - struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev); + struct iommu_fwspec *fwspec = dev->iommu_fwspec; struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); unsigned int i; - for (i = 0; i < priv->num_utlbs; ++i) - ipmmu_utlb_disable(domain, priv->utlbs[i]); + for (i = 0; i < fwspec->num_ids; ++i) + ipmmu_utlb_disable(domain, fwspec->ids[i]); /* * TODO: Optimize by disabling the context when no device is attached. @@ -633,102 +633,36 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, return domain->iop->iova_to_phys(domain->iop, iova); } -static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, - unsigned int *utlbs, unsigned int num_utlbs) -{ - unsigned int i; - - for (i = 0; i < num_utlbs; ++i) { - struct of_phandle_args args; - int ret; - - ret = of_parse_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells", i, &args); - if (ret < 0) - return ret; - - of_node_put(args.np); - - if (args.np != mmu->dev->of_node || args.args_count != 1) - return -EINVAL; - - utlbs[i] = args.args[0]; - } - - return 0; -} - -static int ipmmu_init_platform_device(struct device *dev) +static int ipmmu_init_platform_device(struct device *dev, + struct of_phandle_args *args) { + struct platform_device *ipmmu_pdev; struct ipmmu_vmsa_iommu_priv *priv; - struct ipmmu_vmsa_device *mmu; - unsigned int *utlbs; - unsigned int i; - int num_utlbs; - int ret = -ENODEV; - /* Find the master corresponding to the device. */ - - num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells"); - if (num_utlbs < 0) + ipmmu_pdev = of_find_device_by_node(args->np); + if (!ipmmu_pdev) return -ENODEV; - utlbs = kcalloc(num_utlbs, sizeof(*utlbs), GFP_KERNEL); - if (!utlbs) + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; - spin_lock(&ipmmu_devices_lock); - - list_for_each_entry(mmu, &ipmmu_devices, list) { - ret = ipmmu_find_utlbs(mmu, dev, utlbs, num_utlbs); - if (!ret) { - /* - * TODO Take a reference to the MMU to protect - * against device removal. - */ - break; - } - } - - spin_unlock(&ipmmu_devices_lock); - - if (ret < 0) - goto error; - - for (i = 0; i < num_utlbs; ++i) { - if (utlbs[i] >= mmu->num_utlbs) { - ret = -EINVAL; - goto error; - } - } - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - ret = -ENOMEM; - goto error; - } - - priv->mmu = mmu; - priv->utlbs = utlbs; - priv->num_utlbs = num_utlbs; + priv->mmu = platform_get_drvdata(ipmmu_pdev); priv->dev = dev; dev->iommu_fwspec->iommu_priv = priv; return 0; - -error: - kfree(utlbs); - return ret; } static int ipmmu_of_xlate(struct device *dev, struct of_phandle_args *spec) { + iommu_fwspec_add_ids(dev, spec->args, 1); + /* Initialize once - xlate() will call multiple times */ if (to_priv(dev)) return 0; - return ipmmu_init_platform_device(dev); + return ipmmu_init_platform_device(dev, spec); } #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) From 02dd44caecbeba8e622c332a55c10682bf143d1a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 17 Jul 2017 22:05:51 +0900 Subject: [PATCH 09/87] iommu/ipmmu-vmsa: Clean up device tracking Get rid of now unused device tracking code. Future code should instead be able to use driver_for_each_device() for this purpose. This is a simplified version of the following patch from Robin [PATCH] iommu/ipmmu-vmsa: Clean up group allocation Signed-off-by: Robin Murphy Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 47cbf70e4e19..5093d1c4f46d 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -37,7 +37,6 @@ struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; struct iommu_device iommu; - struct list_head list; unsigned int num_utlbs; spinlock_t lock; /* Protects ctx and domains[] */ @@ -64,9 +63,6 @@ struct ipmmu_vmsa_iommu_priv { struct list_head list; }; -static DEFINE_SPINLOCK(ipmmu_devices_lock); -static LIST_HEAD(ipmmu_devices); - static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom) { return container_of(dom, struct ipmmu_vmsa_domain, io_domain); @@ -970,10 +966,6 @@ static int ipmmu_probe(struct platform_device *pdev) * ipmmu_init() after the probe function returns. */ - spin_lock(&ipmmu_devices_lock); - list_add(&mmu->list, &ipmmu_devices); - spin_unlock(&ipmmu_devices_lock); - platform_set_drvdata(pdev, mmu); return 0; @@ -983,10 +975,6 @@ static int ipmmu_remove(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); - spin_lock(&ipmmu_devices_lock); - list_del(&mmu->list); - spin_unlock(&ipmmu_devices_lock); - iommu_device_unregister(&mmu->iommu); #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) From 6bd4f1c754b2fafac403073b0d8469bed1d37e2d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2017 16:43:09 -0500 Subject: [PATCH 10/87] iommu: Convert to using %pOF instead of full_name Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Signed-off-by: Rob Herring Cc: Joerg Roedel Cc: Heiko Stuebner Cc: iommu@lists.linux-foundation.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rockchip@lists.infradead.org Reviewed-by: Heiko Stuebner Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 20 ++++++++------------ drivers/iommu/fsl_pamu_domain.c | 10 ++++------ drivers/iommu/of_iommu.c | 3 +-- drivers/iommu/rockchip-iommu.c | 10 +++++----- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index a34355fca37a..919ad9045ac4 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -530,8 +530,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) if (node) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -557,8 +557,8 @@ found_cpu_node: if (stash_dest_hint == cache_level) { prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", - node->full_name); + pr_debug("missing cache-stash-id at %pOF\n", + node); of_node_put(node); return ~(u32)0; } @@ -568,8 +568,7 @@ found_cpu_node: prop = of_get_property(node, "next-level-cache", NULL); if (!prop) { - pr_debug("can't find next-level-cache at %s\n", - node->full_name); + pr_debug("can't find next-level-cache at %pOF\n", node); of_node_put(node); return ~(u32)0; /* can't traverse any further */ } @@ -1063,8 +1062,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) guts_node = of_find_matching_node(NULL, guts_device_ids); if (!guts_node) { - dev_err(dev, "could not find GUTS node %s\n", - dev->of_node->full_name); + dev_err(dev, "could not find GUTS node %pOF\n", dev->of_node); ret = -ENODEV; goto error; } @@ -1246,8 +1244,7 @@ static __init int fsl_pamu_init(void) pdev = platform_device_alloc("fsl-of-pamu", 0); if (!pdev) { - pr_err("could not allocate device %s\n", - np->full_name); + pr_err("could not allocate device %pOF\n", np); ret = -ENOMEM; goto error_device_alloc; } @@ -1259,8 +1256,7 @@ static __init int fsl_pamu_init(void) ret = platform_device_add(pdev); if (ret) { - pr_err("could not add device %s (err=%i)\n", - np->full_name, ret); + pr_err("could not add device %pOF (err=%i)\n", np, ret); goto error_device_add; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index da0e1e30ef37..01c73479345d 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -619,8 +619,8 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain, for (i = 0; i < num; i++) { /* Ensure that LIODN value is valid */ if (liodn[i] >= PAACE_NUMBER_ENTRIES) { - pr_debug("Invalid liodn %d, attach device failed for %s\n", - liodn[i], dev->of_node->full_name); + pr_debug("Invalid liodn %d, attach device failed for %pOF\n", + liodn[i], dev->of_node); ret = -EINVAL; break; } @@ -684,8 +684,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn_cnt = len / sizeof(u32); ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); } else { - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); ret = -EINVAL; } @@ -720,8 +719,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, if (prop) detach_device(dev, dma_domain); else - pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); } static int configure_domain_geometry(struct iommu_domain *domain, void *data) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index be8ac1ddec06..34160e7a8dd7 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -245,8 +245,7 @@ static int __init of_iommu_init(void) const of_iommu_init_fn init_fn = match->data; if (init_fn && init_fn(np)) - pr_err("Failed to initialise IOMMU %s\n", - of_node_full_name(np)); + pr_err("Failed to initialise IOMMU %pOF\n", np); } return 0; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4ba48a26b389..1b8155dada26 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1008,20 +1008,20 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group, ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0, &args); if (ret) { - dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n", - np->full_name, ret); + dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n", + np, ret); return ret; } if (args.args_count != 0) { - dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n", - args.np->full_name, args.args_count); + dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n", + args.np, args.args_count); return -EINVAL; } pd = of_find_device_by_node(args.np); of_node_put(args.np); if (!pd) { - dev_err(dev, "iommu %s not found\n", args.np->full_name); + dev_err(dev, "iommu %pOF not found\n", args.np); return -EPROBE_DEFER; } From 07218a4fc59f6ceca950994e20ab766adbeebafb Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Mon, 24 Jul 2017 10:37:13 +0800 Subject: [PATCH 11/87] Docs: dt: rockchip: add rockchip,disable-mmu-reset property Add rockchip,disable-mmu-reset property to disable some mmu reset operation Signed-off-by: Simon Xue Signed-off-by: Joerg Roedel --- Documentation/devicetree/bindings/iommu/rockchip,iommu.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt index 9a55ac3735e5..2098f7732264 100644 --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt @@ -15,6 +15,11 @@ Required properties: to associate with its master device. See: Documentation/devicetree/bindings/iommu/iommu.txt +Optional properties: +- rockchip,disable-mmu-reset : Don't use the mmu reset operation. + Some mmu instances may produce unexpected results + when the reset operation is used. + Example: vopl_mmu: iommu@ff940300 { From 03f732f89034b3f5fbe7ef34cd3482f2e9c335cf Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Mon, 24 Jul 2017 10:37:14 +0800 Subject: [PATCH 12/87] iommu/rockchip: add multi irqs support RK3368 vpu mmu have two irqs, this patch support multi irqs Signed-off-by: Simon Xue Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 35 +++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4ba48a26b389..e2852b041231 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -90,7 +90,8 @@ struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; - int irq; + int *irq; + int num_irq; struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ @@ -825,10 +826,12 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, iommu->domain = domain; - ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq, - IRQF_SHARED, dev_name(dev), iommu); - if (ret) - return ret; + for (i = 0; i < iommu->num_irq; i++) { + ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq, + IRQF_SHARED, dev_name(dev), iommu); + if (ret) + return ret; + } for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, @@ -878,7 +881,8 @@ static void rk_iommu_detach_device(struct iommu_domain *domain, } rk_iommu_disable_stall(iommu); - devm_free_irq(iommu->dev, iommu->irq, iommu); + for (i = 0; i < iommu->num_irq; i++) + devm_free_irq(iommu->dev, iommu->irq[i], iommu); iommu->domain = NULL; @@ -1157,10 +1161,23 @@ static int rk_iommu_probe(struct platform_device *pdev) if (iommu->num_mmu == 0) return PTR_ERR(iommu->bases[0]); - iommu->irq = platform_get_irq(pdev, 0); - if (iommu->irq < 0) { - dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq); + iommu->num_irq = platform_irq_count(pdev); + if (iommu->num_irq < 0) + return iommu->num_irq; + if (iommu->num_irq == 0) return -ENXIO; + + iommu->irq = devm_kcalloc(dev, iommu->num_irq, sizeof(*iommu->irq), + GFP_KERNEL); + if (!iommu->irq) + return -ENOMEM; + + for (i = 0; i < iommu->num_irq; i++) { + iommu->irq[i] = platform_get_irq(pdev, i); + if (iommu->irq[i] < 0) { + dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq[i]); + return -ENXIO; + } } err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); From c3aa47424918acdfed8982d5a3588351ebefdfc1 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Mon, 24 Jul 2017 10:37:15 +0800 Subject: [PATCH 13/87] iommu/rockchip: ignore isp mmu reset operation ISP mmu can't support reset operation, it won't get the expected result when reset, but rest functions work normally. Add this patch as a WA for this issue. Signed-off-by: Simon Xue Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index e2852b041231..78ea341c7c75 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -92,6 +92,7 @@ struct rk_iommu { int num_mmu; int *irq; int num_irq; + bool reset_disabled; struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ @@ -415,6 +416,9 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) int ret, i; u32 dte_addr; + if (iommu->reset_disabled) + return 0; + /* * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY * and verifying that upper 5 nybbles are read back. @@ -1180,6 +1184,9 @@ static int rk_iommu_probe(struct platform_device *pdev) } } + iommu->reset_disabled = device_property_read_bool(dev, + "rockchip,disable-mmu-reset"); + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); if (err) return err; From db3a7fd7a9027efac3e4f7cb2582e142cd6b9d47 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jul 2017 15:19:19 +0200 Subject: [PATCH 14/87] iommu/exynos: prevent building on big-endian kernels Since we print the correct warning, an allmodconfig build is no longer clean but always prints it, which defeats compile-testing: drivers/iommu/exynos-iommu.c:58:2: error: #warning "revisit driver if we can enable big-endian ptes" [-Werror=cpp] This replaces the #warning with a dependency, moving warning text into a comment. Fixes: 1f59adb1766d ("iommu/exynos: Replace non-existing big-endian Kconfig option") Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + drivers/iommu/exynos-iommu.c | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f73ff28f77e2..c5d80fe3dab9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -253,6 +253,7 @@ config TEGRA_IOMMU_SMMU config EXYNOS_IOMMU bool "Exynos IOMMU Support" depends on ARCH_EXYNOS && MMU + depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian ptes select IOMMU_API select ARM_DMA_USE_IOMMU help diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b7aebaf28b82..c6b69e91132b 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -54,10 +54,6 @@ typedef u32 sysmmu_pte_t; #define lv2ent_small(pent) ((*(pent) & 2) == 2) #define lv2ent_large(pent) ((*(pent) & 3) == 1) -#ifdef CONFIG_CPU_BIG_ENDIAN -#warning "revisit driver if we can enable big-endian ptes" -#endif - /* * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces * v5.0 introduced support for 36bit physical address space by shifting From 159d3e35da3b6ff36b9077a8719bcac215155771 Mon Sep 17 00:00:00 2001 From: Fernando Guzman Lugo Date: Fri, 28 Jul 2017 15:49:13 -0500 Subject: [PATCH 15/87] iommu/omap: Fix disabling of MMU upon a fault The IOMMU framework lets its client users be notified on a MMU fault and allows them to either handle the interrupt by dynamic reloading of an appropriate TLB/PTE for the offending fault address or to completely restart/recovery the device and its IOMMU. The OMAP remoteproc driver performs the latter option, and does so after unwinding the previous mappings. The OMAP IOMMU fault handler however disables the MMU and cuts off the clock upon a MMU fault at present, resulting in an interconnect abort during any subsequent operation that touches the MMU registers. So, disable the IP-level fault interrupts instead of disabling the MMU, to allow continued MMU register operations as well as to avoid getting interrupted again. Signed-off-by: Fernando Guzman Lugo [s-anna@ti.com: add commit description] Signed-off-by: Suman Anna Signed-off-by: Josue Albarran Acked-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 641e035cf866..10c9de8de45d 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -786,7 +786,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) if (!report_iommu_fault(domain, obj->dev, da, 0)) return IRQ_HANDLED; - iommu_disable(obj); + iommu_write_reg(obj, 0, MMU_IRQENABLE); iopgd = iopgd_offset(obj, da); From bfee0cf0ee1da0010b97ba88ed241d6f7bed3e7a Mon Sep 17 00:00:00 2001 From: Josue Albarran Date: Fri, 28 Jul 2017 15:49:14 -0500 Subject: [PATCH 16/87] iommu/omap: Use DMA-API for performing cache flushes The OMAP IOMMU driver was using ARM assembly code directly for flushing the MMU page table entries from the caches. This caused MMU faults on OMAP4 (Cortex-A9 based SoCs) as L2 caches were not handled due to the presence of a PL310 L2 Cache Controller. These faults were however not seen on OMAP5/DRA7 SoCs (Cortex-A15 based SoCs). The OMAP IOMMU driver is adapted to use the DMA Streaming API instead now to flush the page table/directory table entries from the CPU caches. This ensures that the devices always see the updated page table entries. The outer caches are now addressed automatically with the usage of the DMA API. Signed-off-by: Josue Albarran Acked-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 125 ++++++++++++++++++++++++------------- drivers/iommu/omap-iommu.h | 1 + 2 files changed, 81 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 10c9de8de45d..bd67e1b2c64e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -29,8 +30,6 @@ #include #include -#include - #include #include "omap-iopgtable.h" @@ -454,36 +453,35 @@ static void flush_iotlb_all(struct omap_iommu *obj) /* * H/W pagetable operations */ -static void flush_iopgd_range(u32 *first, u32 *last) +static void flush_iopte_range(struct device *dev, dma_addr_t dma, + unsigned long offset, int num_entries) { - /* FIXME: L2 cache should be taken care of if it exists */ - do { - asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pgd" - : : "r" (first)); - first += L1_CACHE_BYTES / sizeof(*first); - } while (first <= last); + size_t size = num_entries * sizeof(u32); + + dma_sync_single_range_for_device(dev, dma, offset, size, DMA_TO_DEVICE); } -static void flush_iopte_range(u32 *first, u32 *last) +static void iopte_free(struct omap_iommu *obj, u32 *iopte, bool dma_valid) { - /* FIXME: L2 cache should be taken care of if it exists */ - do { - asm("mcr p15, 0, %0, c7, c10, 1 @ flush_pte" - : : "r" (first)); - first += L1_CACHE_BYTES / sizeof(*first); - } while (first <= last); -} + dma_addr_t pt_dma; -static void iopte_free(u32 *iopte) -{ /* Note: freed iopte's must be clean ready for re-use */ - if (iopte) + if (iopte) { + if (dma_valid) { + pt_dma = virt_to_phys(iopte); + dma_unmap_single(obj->dev, pt_dma, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + } + kmem_cache_free(iopte_cachep, iopte); + } } -static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) +static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, + dma_addr_t *pt_dma, u32 da) { u32 *iopte; + unsigned long offset = iopgd_index(da) * sizeof(da); /* a table has already existed */ if (*iopgd) @@ -500,18 +498,38 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) if (!iopte) return ERR_PTR(-ENOMEM); - *iopgd = virt_to_phys(iopte) | IOPGD_TABLE; - flush_iopgd_range(iopgd, iopgd); + *pt_dma = dma_map_single(obj->dev, iopte, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(obj->dev, *pt_dma)) { + dev_err(obj->dev, "DMA map error for L2 table\n"); + iopte_free(obj, iopte, false); + return ERR_PTR(-ENOMEM); + } + /* + * we rely on dma address and the physical address to be + * the same for mapping the L2 table + */ + if (WARN_ON(*pt_dma != virt_to_phys(iopte))) { + dev_err(obj->dev, "DMA translation error for L2 table\n"); + dma_unmap_single(obj->dev, *pt_dma, IOPTE_TABLE_SIZE, + DMA_TO_DEVICE); + iopte_free(obj, iopte, false); + return ERR_PTR(-ENOMEM); + } + + *iopgd = virt_to_phys(iopte) | IOPGD_TABLE; + + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte); } else { /* We raced, free the reduniovant table */ - iopte_free(iopte); + iopte_free(obj, iopte, false); } pte_ready: iopte = iopte_offset(iopgd, da); - + *pt_dma = virt_to_phys(iopte); dev_vdbg(obj->dev, "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n", __func__, da, iopgd, *iopgd, iopte, *iopte); @@ -522,6 +540,7 @@ pte_ready: static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); + unsigned long offset = iopgd_index(da) * sizeof(da); if ((da | pa) & ~IOSECTION_MASK) { dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n", @@ -530,13 +549,14 @@ static int iopgd_alloc_section(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) } *iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION; - flush_iopgd_range(iopgd, iopgd); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); return 0; } static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); + unsigned long offset = iopgd_index(da) * sizeof(da); int i; if ((da | pa) & ~IOSUPER_MASK) { @@ -547,20 +567,22 @@ static int iopgd_alloc_super(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) for (i = 0; i < 16; i++) *(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER; - flush_iopgd_range(iopgd, iopgd + 15); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 16); return 0; } static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); - u32 *iopte = iopte_alloc(obj, iopgd, da); + dma_addr_t pt_dma; + u32 *iopte = iopte_alloc(obj, iopgd, &pt_dma, da); + unsigned long offset = iopte_index(da) * sizeof(da); if (IS_ERR(iopte)) return PTR_ERR(iopte); *iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL; - flush_iopte_range(iopte, iopte); + flush_iopte_range(obj->dev, pt_dma, offset, 1); dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n", __func__, da, pa, iopte, *iopte); @@ -571,7 +593,9 @@ static int iopte_alloc_page(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) { u32 *iopgd = iopgd_offset(obj, da); - u32 *iopte = iopte_alloc(obj, iopgd, da); + dma_addr_t pt_dma; + u32 *iopte = iopte_alloc(obj, iopgd, &pt_dma, da); + unsigned long offset = iopte_index(da) * sizeof(da); int i; if ((da | pa) & ~IOLARGE_MASK) { @@ -585,7 +609,7 @@ static int iopte_alloc_large(struct omap_iommu *obj, u32 da, u32 pa, u32 prot) for (i = 0; i < 16; i++) *(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE; - flush_iopte_range(iopte, iopte + 15); + flush_iopte_range(obj->dev, pt_dma, offset, 16); return 0; } @@ -674,6 +698,9 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) size_t bytes; u32 *iopgd = iopgd_offset(obj, da); int nent = 1; + dma_addr_t pt_dma; + unsigned long pd_offset = iopgd_index(da) * sizeof(da); + unsigned long pt_offset = iopte_index(da) * sizeof(da); if (!*iopgd) return 0; @@ -690,7 +717,8 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) } bytes *= nent; memset(iopte, 0, nent * sizeof(*iopte)); - flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte)); + pt_dma = virt_to_phys(iopte); + flush_iopte_range(obj->dev, pt_dma, pt_offset, nent); /* * do table walk to check if this table is necessary or not @@ -700,7 +728,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) if (iopte[i]) goto out; - iopte_free(iopte); + iopte_free(obj, iopte, true); nent = 1; /* for the next L1 entry */ } else { bytes = IOPGD_SIZE; @@ -712,7 +740,7 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da) bytes *= nent; } memset(iopgd, 0, nent * sizeof(*iopgd)); - flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd)); + flush_iopte_range(obj->dev, obj->pd_dma, pd_offset, nent); out: return bytes; } @@ -738,6 +766,7 @@ static size_t iopgtable_clear_entry(struct omap_iommu *obj, u32 da) static void iopgtable_clear_entry_all(struct omap_iommu *obj) { + unsigned long offset; int i; spin_lock(&obj->page_table_lock); @@ -748,15 +777,16 @@ static void iopgtable_clear_entry_all(struct omap_iommu *obj) da = i << IOPGD_SHIFT; iopgd = iopgd_offset(obj, da); + offset = iopgd_index(da) * sizeof(da); if (!*iopgd) continue; if (iopgd_is_table(*iopgd)) - iopte_free(iopte_offset(iopgd, 0)); + iopte_free(obj, iopte_offset(iopgd, 0), true); *iopgd = 0; - flush_iopgd_range(iopgd, iopgd); + flush_iopte_range(obj->dev, obj->pd_dma, offset, 1); } flush_iotlb_all(obj); @@ -815,10 +845,18 @@ static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) spin_lock(&obj->iommu_lock); + obj->pd_dma = dma_map_single(obj->dev, iopgd, IOPGD_TABLE_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(obj->dev, obj->pd_dma)) { + dev_err(obj->dev, "DMA map error for L1 table\n"); + err = -ENOMEM; + goto out_err; + } + obj->iopgd = iopgd; err = iommu_enable(obj); if (err) - goto err_enable; + goto out_err; flush_iotlb_all(obj); spin_unlock(&obj->iommu_lock); @@ -827,7 +865,7 @@ static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) return 0; -err_enable: +out_err: spin_unlock(&obj->iommu_lock); return err; @@ -844,7 +882,10 @@ static void omap_iommu_detach(struct omap_iommu *obj) spin_lock(&obj->iommu_lock); + dma_unmap_single(obj->dev, obj->pd_dma, IOPGD_TABLE_SIZE, + DMA_TO_DEVICE); iommu_disable(obj); + obj->pd_dma = 0; obj->iopgd = NULL; spin_unlock(&obj->iommu_lock); @@ -1008,11 +1049,6 @@ static struct platform_driver omap_iommu_driver = { }, }; -static void iopte_cachep_ctor(void *iopte) -{ - clean_dcache_area(iopte, IOPTE_TABLE_SIZE); -} - static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz) { memset(e, 0, sizeof(*e)); @@ -1159,7 +1195,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) goto fail_align; - clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE); spin_lock_init(&omap_domain->lock); omap_domain->domain.geometry.aperture_start = 0; @@ -1347,7 +1382,7 @@ static int __init omap_iommu_init(void) of_node_put(np); p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags, - iopte_cachep_ctor); + NULL); if (!p) return -ENOMEM; iopte_cachep = p; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 6e70515e6038..a675af29a6ec 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -61,6 +61,7 @@ struct omap_iommu { */ u32 *iopgd; spinlock_t page_table_lock; /* protect iopgd */ + dma_addr_t pd_dma; int nr_tlb_entries; From 75487860a874a79aaa286c600989249aa9fe8fdc Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Fri, 4 Aug 2017 09:32:25 +0800 Subject: [PATCH 17/87] memory: mtk-smi: Use of_device_get_match_data helper Replace custom code with generic helper to retrieve driver data. Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel --- drivers/memory/mtk-smi.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 4afbc412f959..2b798bb48b74 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -240,20 +240,15 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *smi_node; struct platform_device *smi_pdev; - const struct of_device_id *of_id; if (!dev->pm_domain) return -EPROBE_DEFER; - of_id = of_match_node(mtk_smi_larb_of_ids, pdev->dev.of_node); - if (!of_id) - return -EINVAL; - larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL); if (!larb) return -ENOMEM; - larb->larb_gen = of_id->data; + larb->larb_gen = of_device_get_match_data(dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); larb->base = devm_ioremap_resource(dev, res); if (IS_ERR(larb->base)) @@ -319,7 +314,6 @@ static int mtk_smi_common_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct mtk_smi *common; struct resource *res; - const struct of_device_id *of_id; enum mtk_smi_gen smi_gen; if (!dev->pm_domain) @@ -338,17 +332,13 @@ static int mtk_smi_common_probe(struct platform_device *pdev) if (IS_ERR(common->clk_smi)) return PTR_ERR(common->clk_smi); - of_id = of_match_node(mtk_smi_common_of_ids, pdev->dev.of_node); - if (!of_id) - return -EINVAL; - /* * for mtk smi gen 1, we need to get the ao(always on) base to config * m4u port, and we need to enable the aync clock for transform the smi * clock into emi clock domain, but for mtk smi gen2, there's no smi ao * base. */ - smi_gen = (enum mtk_smi_gen)of_id->data; + smi_gen = (enum mtk_smi_gen)of_device_get_match_data(dev); if (smi_gen == MTK_SMI_GEN1) { res = platform_get_resource(pdev, IORESOURCE_MEM, 0); common->smi_ao_base = devm_ioremap_resource(dev, res); From 363932cdef4ba4e359ffd544d5b31d3eb31b358a Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Fri, 4 Aug 2017 09:32:26 +0800 Subject: [PATCH 18/87] memory: mtk-smi: add larbid handle routine In the commit 3c8f4ad85c4b ("memory/mediatek: add support for mt2701"), the larb->larbid was added but not initialized. Mediatek's gen1 smi need this hardware larbid information to get the register offset which controls whether enable iommu for this larb. This patch add the initialize routine for larbid. Reviewed-by: Matthias Brugger Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel --- drivers/memory/mtk-smi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 2b798bb48b74..a3ff19aed087 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -42,6 +42,7 @@ #define SMI_SECUR_CON_VAL_DOMAIN(id) (0x3 << ((((id) & 0x7) << 2) + 1)) struct mtk_smi_larb_gen { + bool need_larbid; int port_in_larb[MTK_LARB_NR_MAX + 1]; void (*config_port)(struct device *); }; @@ -214,6 +215,7 @@ static const struct mtk_smi_larb_gen mtk_smi_larb_mt8173 = { }; static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = { + .need_larbid = true, .port_in_larb = { LARB0_PORT_OFFSET, LARB1_PORT_OFFSET, LARB2_PORT_OFFSET, LARB3_PORT_OFFSET @@ -240,6 +242,7 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *smi_node; struct platform_device *smi_pdev; + int err; if (!dev->pm_domain) return -EPROBE_DEFER; @@ -263,6 +266,15 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) return PTR_ERR(larb->smi.clk_smi); larb->smi.dev = dev; + if (larb->larb_gen->need_larbid) { + err = of_property_read_u32(dev->of_node, "mediatek,larb-id", + &larb->larbid); + if (err) { + dev_err(dev, "missing larbid property\n"); + return err; + } + } + smi_node = of_parse_phandle(dev->of_node, "mediatek,smi", 0); if (!smi_node) return -EINVAL; From 611de8fcf7f62718bcf9d0469ac84d66e6d89348 Mon Sep 17 00:00:00 2001 From: Honghui Zhang Date: Fri, 4 Aug 2017 09:32:27 +0800 Subject: [PATCH 19/87] dt-bindings: mediatek: add descriptions for larbid This patch add larbid descritptions for mediatek's gen1 smi larb hardware. Acked-by: Rob Herring Signed-off-by: Honghui Zhang Signed-off-by: Joerg Roedel --- .../memory-controllers/mediatek,smi-larb.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt index 21277a56e94c..ddf46b8856a5 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt @@ -15,6 +15,9 @@ Required properties: the register. - "smi" : It's the clock for transfer data and command. +Required property for mt2701: +- mediatek,larb-id :the hardware id of this larb. + Example: larb1: larb@16010000 { compatible = "mediatek,mt8173-smi-larb"; @@ -25,3 +28,15 @@ Example: <&vdecsys CLK_VDEC_LARB_CKEN>; clock-names = "apb", "smi"; }; + +Example for mt2701: + larb0: larb@14010000 { + compatible = "mediatek,mt2701-smi-larb"; + reg = <0 0x14010000 0 0x1000>; + mediatek,smi = <&smi_common>; + mediatek,larb-id = <0>; + clocks = <&mmsys CLK_MM_SMI_LARB0>, + <&mmsys CLK_MM_SMI_LARB0>; + clock-names = "apb", "smi"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_DISP>; + }; From 928055a01b3f9d8d50dea522227b3e7b65409fed Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 4 Aug 2017 12:28:33 +0200 Subject: [PATCH 20/87] iommu/exynos: Remove custom platform device registration code Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") postponed the moment of attaching IOMMU controller to its device, so there is no need to register IOMMU controllers very early, before all other devices in the system. This change gives us an opportunity to use standard platform device registration method also for Exynos SYSMMU controllers. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c6b69e91132b..622e44662ea1 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -655,6 +655,13 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev) } } + /* + * use the first registered sysmmu device for performing + * dma mapping operations on iommu page tables (cpu cache flush) + */ + if (!dma_dev) + dma_dev = &pdev->dev; + pm_runtime_enable(dev); return 0; @@ -1335,8 +1342,6 @@ static struct iommu_ops exynos_iommu_ops = { .of_xlate = exynos_iommu_of_xlate, }; -static bool init_done; - static int __init exynos_iommu_init(void) { int ret; @@ -1369,8 +1374,6 @@ static int __init exynos_iommu_init(void) goto err_set_iommu; } - init_done = true; - return 0; err_set_iommu: kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); @@ -1380,27 +1383,6 @@ err_reg_driver: kmem_cache_destroy(lv2table_kmem_cache); return ret; } +core_initcall(exynos_iommu_init); -static int __init exynos_iommu_of_setup(struct device_node *np) -{ - struct platform_device *pdev; - - if (!init_done) - exynos_iommu_init(); - - pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root); - if (!pdev) - return -ENODEV; - - /* - * use the first registered sysmmu device for performing - * dma mapping operations on iommu page tables (cpu cache flush) - */ - if (!dma_dev) - dma_dev = &pdev->dev; - - return 0; -} - -IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", - exynos_iommu_of_setup); +IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", NULL); From ce2eb8f44e60c748fac56ede46b526fdac773e1b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:35 +0100 Subject: [PATCH 21/87] iommu/msm: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware, using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index d0448353d501..04f4d51ffacb 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -393,6 +393,7 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev) static int msm_iommu_add_device(struct device *dev) { struct msm_iommu_dev *iommu; + struct iommu_group *group; unsigned long flags; int ret = 0; @@ -406,7 +407,16 @@ static int msm_iommu_add_device(struct device *dev) spin_unlock_irqrestore(&msm_iommu_lock, flags); - return ret; + if (ret) + return ret; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + + return 0; } static void msm_iommu_remove_device(struct device *dev) @@ -421,6 +431,8 @@ static void msm_iommu_remove_device(struct device *dev) iommu_device_unlink(&iommu->iommu, dev); spin_unlock_irqrestore(&msm_iommu_lock, flags); + + iommu_group_remove_device(dev); } static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -700,6 +712,7 @@ static struct iommu_ops msm_iommu_ops = { .iova_to_phys = msm_iommu_iova_to_phys, .add_device = msm_iommu_add_device, .remove_device = msm_iommu_remove_device, + .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, }; From d92e1f849830fc78c50a00b953361fc1449aa1e2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:36 +0100 Subject: [PATCH 22/87] iommu/tegra-smmu: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware (tegra_smmu_swgroup sounds suspiciously like something that might warrant representing at the iommu_group level), using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Tested-by: Mikko Perttunen Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index eeb19f560a05..faa9c1e70482 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -704,6 +704,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np) static int tegra_smmu_add_device(struct device *dev) { struct device_node *np = dev->of_node; + struct iommu_group *group; struct of_phandle_args args; unsigned int index = 0; @@ -725,12 +726,19 @@ static int tegra_smmu_add_device(struct device *dev) index++; } + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; } static void tegra_smmu_remove_device(struct device *dev) { dev->archdata.iommu = NULL; + iommu_group_remove_device(dev); } static const struct iommu_ops tegra_smmu_ops = { @@ -741,6 +749,7 @@ static const struct iommu_ops tegra_smmu_ops = { .detach_dev = tegra_smmu_detach_dev, .add_device = tegra_smmu_add_device, .remove_device = tegra_smmu_remove_device, + .device_group = generic_device_group, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, .map_sg = default_iommu_map_sg, From 15f9a3104b80a83e33ec04609aa61ac7e045fa2c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:37 +0100 Subject: [PATCH 23/87] iommu/tegra-gart: Add iommu_group support As the last step to making groups mandatory, clean up the remaining drivers by adding basic support. Whilst it may not perfectly reflect the isolation capabilities of the hardware, using generic_device_group() should at least maintain existing behaviour with respect to the API. Signed-off-by: Robin Murphy Tested-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 37e708fdbb5a..29bafc6e82ae 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -334,12 +334,31 @@ static bool gart_iommu_capable(enum iommu_cap cap) return false; } +static int gart_iommu_add_device(struct device *dev) +{ + struct iommu_group *group = iommu_group_get_for_dev(dev); + + if (IS_ERR(group)) + return PTR_ERR(group); + + iommu_group_put(group); + return 0; +} + +static void gart_iommu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, .domain_free = gart_iommu_domain_free, .attach_dev = gart_iommu_attach_dev, .detach_dev = gart_iommu_detach_dev, + .add_device = gart_iommu_add_device, + .remove_device = gart_iommu_remove_device, + .device_group = generic_device_group, .map = gart_iommu_map, .map_sg = default_iommu_map_sg, .unmap = gart_iommu_unmap, From 05f80300dc8bcfe8566b36256d01482cae5afa02 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Jul 2017 13:12:38 +0100 Subject: [PATCH 24/87] iommu: Finish making iommu_group support mandatory Now that all the drivers properly implementing the IOMMU API support groups (I'm ignoring the etnaviv GPU MMUs which seemingly only do just enough to convince the ARM DMA mapping ops), we can remove the FIXME workarounds from the core code. In the process, it also seems logical to make the .device_group callback non-optional for drivers calling iommu_group_get_for_dev() - the current callers all implement it anyway, and it doesn't make sense for any future callers not to either. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3f6ea160afed..af69bf7e035a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1005,11 +1005,10 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - group = ERR_PTR(-EINVAL); - - if (ops && ops->device_group) - group = ops->device_group(dev); + if (!ops) + return ERR_PTR(-EINVAL); + group = ops->device_group(dev); if (WARN_ON_ONCE(group == NULL)) return ERR_PTR(-EINVAL); @@ -1298,12 +1297,8 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) int ret; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_attach_device(domain, dev); - /* - * We have a group - lock it to make sure the device-count doesn't + * Lock the group to make sure the device-count doesn't * change while we are attaching */ mutex_lock(&group->mutex); @@ -1336,9 +1331,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return __iommu_detach_device(domain, dev); mutex_lock(&group->mutex); if (iommu_group_device_count(group) != 1) { @@ -1360,9 +1352,6 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); - /* FIXME: Remove this when groups a mandatory for iommu drivers */ - if (group == NULL) - return NULL; domain = group->domain; From a4d98fb3069090a856933a1416f449caf18ef334 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Aug 2017 15:00:36 +0200 Subject: [PATCH 25/87] iommu/pamu: Let PAMU depend on PCI The driver does not compile when PCI is not selected, so make it depend on it. Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f73ff28f77e2..e73b7c50a130 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -76,6 +76,7 @@ config IOMMU_DMA config FSL_PAMU bool "Freescale IOMMU support" + depends on PCI depends on PPC_E500MC || (COMPILE_TEST && PPC) select IOMMU_API select GENERIC_ALLOCATOR From af29d9fa41b4b427830aea3911224aef6d61ee50 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Aug 2017 15:04:47 +0200 Subject: [PATCH 26/87] iommu/pamu: Make driver depend on CONFIG_PHYS_64BIT Certain address calculations in the driver make the assumption that phys_addr_t and dma_addr_t are 64 bit wide. Force this by depending on CONFIG_PHYS_64BIT to be set. Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e73b7c50a130..e163b0223dec 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -77,6 +77,7 @@ config IOMMU_DMA config FSL_PAMU bool "Freescale IOMMU support" depends on PCI + depends on PHYS_64BIT depends on PPC_E500MC || (COMPILE_TEST && PPC) select IOMMU_API select GENERIC_ALLOCATOR From 07eb6fdf4933336098c3e955961071e85d3dff9f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Aug 2017 16:15:43 +0200 Subject: [PATCH 27/87] iommu/pamu: WARN when fsl_pamu_probe() is called more than once The function probes the PAMU hardware from device-tree specifications. It initializes global variables and can thus be only safely called once. Add a check that that prints a warning when its called more than once. Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index a34355fca37a..9ee8e9e161f5 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -42,6 +42,8 @@ struct pamu_isr_data { static struct paace *ppaact; static struct paace *spaact; +static bool probed; /* Has PAMU been probed? */ + /* * Table for matching compatible strings, for device tree * guts node, for QorIQ SOCs. @@ -1033,6 +1035,9 @@ static int fsl_pamu_probe(struct platform_device *pdev) * NOTE : All PAMUs share the same LIODN tables. */ + if (WARN_ON(probed)) + return -EBUSY; + pamu_regs = of_iomap(dev->of_node, 0); if (!pamu_regs) { dev_err(dev, "ioremap of PAMU node failed\n"); @@ -1172,6 +1177,8 @@ static int fsl_pamu_probe(struct platform_device *pdev) setup_liodns(); + probed = true; + return 0; error_genpool: From 68a17f0be6feb8de1f5e26b93f49791031374c4c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Aug 2017 16:36:00 +0200 Subject: [PATCH 28/87] iommu/pamu: Add support for generic iommu-device This patch adds a global iommu-handle to the pamu driver and initializes it at probe time. Also link devices added to the iommu to this handle. Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 17 +++++++++++++++++ drivers/iommu/fsl_pamu.h | 3 +++ drivers/iommu/fsl_pamu_domain.c | 5 ++++- drivers/iommu/fsl_pamu_domain.h | 2 ++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 9ee8e9e161f5..9238a85de53e 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -44,6 +44,8 @@ static struct paace *spaact; static bool probed; /* Has PAMU been probed? */ +struct iommu_device pamu_iommu; /* IOMMU core code handle */ + /* * Table for matching compatible strings, for device tree * guts node, for QorIQ SOCs. @@ -1154,6 +1156,18 @@ static int fsl_pamu_probe(struct platform_device *pdev) if (ret) goto error_genpool; + ret = iommu_device_sysfs_add(&pamu_iommu, dev, NULL, "iommu0"); + if (ret) + goto error_genpool; + + iommu_device_set_ops(&pamu_iommu, &fsl_pamu_ops); + + ret = iommu_device_register(&pamu_iommu); + if (ret) { + dev_err(dev, "Can't register iommu device\n"); + goto error_sysfs; + } + pamubypenr = in_be32(&guts_regs->pamubypenr); for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; @@ -1181,6 +1195,9 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; +error_sysfs: + iommu_device_sysfs_remove(&pamu_iommu); + error_genpool: gen_pool_destroy(spaace_pool); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index c3434f29c967..fa48222f3421 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -391,6 +391,9 @@ struct ome { #define EOE_WWSAOL 0x1e /* Write with stash allocate only and lock */ #define EOE_VALID 0x80 +extern const struct iommu_ops fsl_pamu_ops; +extern struct iommu_device pamu_iommu; /* IOMMU core code handle */ + /* Function prototypes */ int pamu_domain_init(void); int pamu_enable_liodn(int liodn); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index da0e1e30ef37..914953b87bf1 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -983,11 +983,14 @@ static int fsl_pamu_add_device(struct device *dev) iommu_group_put(group); + iommu_device_link(&pamu_iommu, dev); + return 0; } static void fsl_pamu_remove_device(struct device *dev) { + iommu_device_unlink(&pamu_iommu, dev); iommu_group_remove_device(dev); } @@ -1047,7 +1050,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) return dma_domain->win_cnt; } -static const struct iommu_ops fsl_pamu_ops = { +const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .domain_free = fsl_pamu_domain_free, diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h index f2b0f741d3de..6d8661e488fb 100644 --- a/drivers/iommu/fsl_pamu_domain.h +++ b/drivers/iommu/fsl_pamu_domain.h @@ -21,6 +21,8 @@ #include "fsl_pamu.h" +const struct iommu_ops fsl_pamu_ops; + struct dma_window { phys_addr_t paddr; u64 size; From a883a9443045346ef4a636a40ecf68cb6e5a3db2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 9 Aug 2017 10:43:02 -0400 Subject: [PATCH 29/87] Docs: dt: document qcom iommu bindings Cc: devicetree@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Rob Herring Signed-off-by: Joerg Roedel --- .../devicetree/bindings/iommu/qcom,iommu.txt | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 Documentation/devicetree/bindings/iommu/qcom,iommu.txt diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt new file mode 100644 index 000000000000..b2641ceb2b40 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt @@ -0,0 +1,121 @@ +* QCOM IOMMU v1 Implementation + +Qualcomm "B" family devices which are not compatible with arm-smmu have +a similar looking IOMMU but without access to the global register space, +and optionally requiring additional configuration to route context irqs +to non-secure vs secure interrupt line. + +** Required properties: + +- compatible : Should be one of: + + "qcom,msm8916-iommu" + + Followed by "qcom,msm-iommu-v1". + +- clock-names : Should be a pair of "iface" (required for IOMMUs + register group access) and "bus" (required for + the IOMMUs underlying bus access). + +- clocks : Phandles for respective clocks described by + clock-names. + +- #address-cells : must be 1. + +- #size-cells : must be 1. + +- #iommu-cells : Must be 1. Index identifies the context-bank #. + +- ranges : Base address and size of the iommu context banks. + +- qcom,iommu-secure-id : secure-id. + +- List of sub-nodes, one per translation context bank. Each sub-node + has the following required properties: + + - compatible : Should be one of: + - "qcom,msm-iommu-v1-ns" : non-secure context bank + - "qcom,msm-iommu-v1-sec" : secure context bank + - reg : Base address and size of context bank within the iommu + - interrupts : The context fault irq. + +** Optional properties: + +- reg : Base address and size of the SMMU local base, should + be only specified if the iommu requires configuration + for routing of context bank irq's to secure vs non- + secure lines. (Ie. if the iommu contains secure + context banks) + + +** Examples: + + apps_iommu: iommu@1e20000 { + #address-cells = <1>; + #size-cells = <1>; + #iommu-cells = <1>; + compatible = "qcom,msm8916-iommu", "qcom,msm-iommu-v1"; + ranges = <0 0x1e20000 0x40000>; + reg = <0x1ef0000 0x3000>; + clocks = <&gcc GCC_SMMU_CFG_CLK>, + <&gcc GCC_APSS_TCU_CLK>; + clock-names = "iface", "bus"; + qcom,iommu-secure-id = <17>; + + // mdp_0: + iommu-ctx@4000 { + compatible = "qcom,msm-iommu-v1-ns"; + reg = <0x4000 0x1000>; + interrupts = ; + }; + + // venus_ns: + iommu-ctx@5000 { + compatible = "qcom,msm-iommu-v1-sec"; + reg = <0x5000 0x1000>; + interrupts = ; + }; + }; + + gpu_iommu: iommu@1f08000 { + #address-cells = <1>; + #size-cells = <1>; + #iommu-cells = <1>; + compatible = "qcom,msm8916-iommu", "qcom,msm-iommu-v1"; + ranges = <0 0x1f08000 0x10000>; + clocks = <&gcc GCC_SMMU_CFG_CLK>, + <&gcc GCC_GFX_TCU_CLK>; + clock-names = "iface", "bus"; + qcom,iommu-secure-id = <18>; + + // gfx3d_user: + iommu-ctx@1000 { + compatible = "qcom,msm-iommu-v1-ns"; + reg = <0x1000 0x1000>; + interrupts = ; + }; + + // gfx3d_priv: + iommu-ctx@2000 { + compatible = "qcom,msm-iommu-v1-ns"; + reg = <0x2000 0x1000>; + interrupts = ; + }; + }; + + ... + + venus: video-codec@1d00000 { + ... + iommus = <&apps_iommu 5>; + }; + + mdp: mdp@1a01000 { + ... + iommus = <&apps_iommu 4>; + }; + + gpu@01c00000 { + ... + iommus = <&gpu_iommu 1>, <&gpu_iommu 2>; + }; From 2b03774bae5f95091dac125a492caad71644b6db Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 9 Aug 2017 10:43:03 -0400 Subject: [PATCH 30/87] iommu/arm-smmu: Split out register defines I want to re-use some of these for qcom_iommu, which has (roughly) the same context-bank registers. Signed-off-by: Rob Clark Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-regs.h | 220 ++++++++++++++++++++++++++++++++++ drivers/iommu/arm-smmu.c | 211 ++------------------------------ 2 files changed, 229 insertions(+), 202 deletions(-) create mode 100644 drivers/iommu/arm-smmu-regs.h diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h new file mode 100644 index 000000000000..a1226e4ab5f8 --- /dev/null +++ b/drivers/iommu/arm-smmu-regs.h @@ -0,0 +1,220 @@ +/* + * IOMMU API for ARM architected SMMU implementations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon + */ + +#ifndef _ARM_SMMU_REGS_H +#define _ARM_SMMU_REGS_H + +/* Configuration registers */ +#define ARM_SMMU_GR0_sCR0 0x0 +#define sCR0_CLIENTPD (1 << 0) +#define sCR0_GFRE (1 << 1) +#define sCR0_GFIE (1 << 2) +#define sCR0_EXIDENABLE (1 << 3) +#define sCR0_GCFGFRE (1 << 4) +#define sCR0_GCFGFIE (1 << 5) +#define sCR0_USFCFG (1 << 10) +#define sCR0_VMIDPNE (1 << 11) +#define sCR0_PTM (1 << 12) +#define sCR0_FB (1 << 13) +#define sCR0_VMID16EN (1 << 31) +#define sCR0_BSU_SHIFT 14 +#define sCR0_BSU_MASK 0x3 + +/* Auxiliary Configuration register */ +#define ARM_SMMU_GR0_sACR 0x10 + +/* Identification registers */ +#define ARM_SMMU_GR0_ID0 0x20 +#define ARM_SMMU_GR0_ID1 0x24 +#define ARM_SMMU_GR0_ID2 0x28 +#define ARM_SMMU_GR0_ID3 0x2c +#define ARM_SMMU_GR0_ID4 0x30 +#define ARM_SMMU_GR0_ID5 0x34 +#define ARM_SMMU_GR0_ID6 0x38 +#define ARM_SMMU_GR0_ID7 0x3c +#define ARM_SMMU_GR0_sGFSR 0x48 +#define ARM_SMMU_GR0_sGFSYNR0 0x50 +#define ARM_SMMU_GR0_sGFSYNR1 0x54 +#define ARM_SMMU_GR0_sGFSYNR2 0x58 + +#define ID0_S1TS (1 << 30) +#define ID0_S2TS (1 << 29) +#define ID0_NTS (1 << 28) +#define ID0_SMS (1 << 27) +#define ID0_ATOSNS (1 << 26) +#define ID0_PTFS_NO_AARCH32 (1 << 25) +#define ID0_PTFS_NO_AARCH32S (1 << 24) +#define ID0_CTTW (1 << 14) +#define ID0_NUMIRPT_SHIFT 16 +#define ID0_NUMIRPT_MASK 0xff +#define ID0_NUMSIDB_SHIFT 9 +#define ID0_NUMSIDB_MASK 0xf +#define ID0_EXIDS (1 << 8) +#define ID0_NUMSMRG_SHIFT 0 +#define ID0_NUMSMRG_MASK 0xff + +#define ID1_PAGESIZE (1 << 31) +#define ID1_NUMPAGENDXB_SHIFT 28 +#define ID1_NUMPAGENDXB_MASK 7 +#define ID1_NUMS2CB_SHIFT 16 +#define ID1_NUMS2CB_MASK 0xff +#define ID1_NUMCB_SHIFT 0 +#define ID1_NUMCB_MASK 0xff + +#define ID2_OAS_SHIFT 4 +#define ID2_OAS_MASK 0xf +#define ID2_IAS_SHIFT 0 +#define ID2_IAS_MASK 0xf +#define ID2_UBS_SHIFT 8 +#define ID2_UBS_MASK 0xf +#define ID2_PTFS_4K (1 << 12) +#define ID2_PTFS_16K (1 << 13) +#define ID2_PTFS_64K (1 << 14) +#define ID2_VMID16 (1 << 15) + +#define ID7_MAJOR_SHIFT 4 +#define ID7_MAJOR_MASK 0xf + +/* Global TLB invalidation */ +#define ARM_SMMU_GR0_TLBIVMID 0x64 +#define ARM_SMMU_GR0_TLBIALLNSNH 0x68 +#define ARM_SMMU_GR0_TLBIALLH 0x6c +#define ARM_SMMU_GR0_sTLBGSYNC 0x70 +#define ARM_SMMU_GR0_sTLBGSTATUS 0x74 +#define sTLBGSTATUS_GSACTIVE (1 << 0) + +/* Stream mapping registers */ +#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) +#define SMR_VALID (1 << 31) +#define SMR_MASK_SHIFT 16 +#define SMR_ID_SHIFT 0 + +#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) +#define S2CR_CBNDX_SHIFT 0 +#define S2CR_CBNDX_MASK 0xff +#define S2CR_EXIDVALID (1 << 10) +#define S2CR_TYPE_SHIFT 16 +#define S2CR_TYPE_MASK 0x3 +enum arm_smmu_s2cr_type { + S2CR_TYPE_TRANS, + S2CR_TYPE_BYPASS, + S2CR_TYPE_FAULT, +}; + +#define S2CR_PRIVCFG_SHIFT 24 +#define S2CR_PRIVCFG_MASK 0x3 +enum arm_smmu_s2cr_privcfg { + S2CR_PRIVCFG_DEFAULT, + S2CR_PRIVCFG_DIPAN, + S2CR_PRIVCFG_UNPRIV, + S2CR_PRIVCFG_PRIV, +}; + +/* Context bank attribute registers */ +#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) +#define CBAR_VMID_SHIFT 0 +#define CBAR_VMID_MASK 0xff +#define CBAR_S1_BPSHCFG_SHIFT 8 +#define CBAR_S1_BPSHCFG_MASK 3 +#define CBAR_S1_BPSHCFG_NSH 3 +#define CBAR_S1_MEMATTR_SHIFT 12 +#define CBAR_S1_MEMATTR_MASK 0xf +#define CBAR_S1_MEMATTR_WB 0xf +#define CBAR_TYPE_SHIFT 16 +#define CBAR_TYPE_MASK 0x3 +#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT) +#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT) +#define CBAR_IRPTNDX_SHIFT 24 +#define CBAR_IRPTNDX_MASK 0xff + +#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) +#define CBA2R_RW64_32BIT (0 << 0) +#define CBA2R_RW64_64BIT (1 << 0) +#define CBA2R_VMID_SHIFT 16 +#define CBA2R_VMID_MASK 0xffff + +#define ARM_SMMU_CB_SCTLR 0x0 +#define ARM_SMMU_CB_ACTLR 0x4 +#define ARM_SMMU_CB_RESUME 0x8 +#define ARM_SMMU_CB_TTBCR2 0x10 +#define ARM_SMMU_CB_TTBR0 0x20 +#define ARM_SMMU_CB_TTBR1 0x28 +#define ARM_SMMU_CB_TTBCR 0x30 +#define ARM_SMMU_CB_CONTEXTIDR 0x34 +#define ARM_SMMU_CB_S1_MAIR0 0x38 +#define ARM_SMMU_CB_S1_MAIR1 0x3c +#define ARM_SMMU_CB_PAR 0x50 +#define ARM_SMMU_CB_FSR 0x58 +#define ARM_SMMU_CB_FAR 0x60 +#define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIVA 0x600 +#define ARM_SMMU_CB_S1_TLBIASID 0x610 +#define ARM_SMMU_CB_S1_TLBIVAL 0x620 +#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 +#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_TLBSYNC 0x7f0 +#define ARM_SMMU_CB_TLBSTATUS 0x7f4 +#define ARM_SMMU_CB_ATS1PR 0x800 +#define ARM_SMMU_CB_ATSR 0x8f0 + +#define SCTLR_S1_ASIDPNE (1 << 12) +#define SCTLR_CFCFG (1 << 7) +#define SCTLR_CFIE (1 << 6) +#define SCTLR_CFRE (1 << 5) +#define SCTLR_E (1 << 4) +#define SCTLR_AFE (1 << 2) +#define SCTLR_TRE (1 << 1) +#define SCTLR_M (1 << 0) + +#define CB_PAR_F (1 << 0) + +#define ATSR_ACTIVE (1 << 0) + +#define RESUME_RETRY (0 << 0) +#define RESUME_TERMINATE (1 << 0) + +#define TTBCR2_SEP_SHIFT 15 +#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) +#define TTBCR2_AS (1 << 4) + +#define TTBRn_ASID_SHIFT 48 + +#define FSR_MULTI (1 << 31) +#define FSR_SS (1 << 30) +#define FSR_UUT (1 << 8) +#define FSR_ASF (1 << 7) +#define FSR_TLBLKF (1 << 6) +#define FSR_TLBMCF (1 << 5) +#define FSR_EF (1 << 4) +#define FSR_PF (1 << 3) +#define FSR_AFF (1 << 2) +#define FSR_TF (1 << 1) + +#define FSR_IGN (FSR_AFF | FSR_ASF | \ + FSR_TLBMCF | FSR_TLBLKF) +#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ + FSR_EF | FSR_PF | FSR_TF | FSR_IGN) + +#define FSYNR0_WNR (1 << 4) + +#endif /* _ARM_SMMU_REGS_H */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2d80fa8a0634..262e1a3bbeeb 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -54,6 +54,15 @@ #include #include "io-pgtable.h" +#include "arm-smmu-regs.h" + +#define ARM_MMU500_ACTLR_CPRE (1 << 1) + +#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) +#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) + +#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Maximum number of context banks per SMMU */ #define ARM_SMMU_MAX_CBS 128 @@ -83,211 +92,9 @@ #define smmu_write_atomic_lq writel_relaxed #endif -/* Configuration registers */ -#define ARM_SMMU_GR0_sCR0 0x0 -#define sCR0_CLIENTPD (1 << 0) -#define sCR0_GFRE (1 << 1) -#define sCR0_GFIE (1 << 2) -#define sCR0_EXIDENABLE (1 << 3) -#define sCR0_GCFGFRE (1 << 4) -#define sCR0_GCFGFIE (1 << 5) -#define sCR0_USFCFG (1 << 10) -#define sCR0_VMIDPNE (1 << 11) -#define sCR0_PTM (1 << 12) -#define sCR0_FB (1 << 13) -#define sCR0_VMID16EN (1 << 31) -#define sCR0_BSU_SHIFT 14 -#define sCR0_BSU_MASK 0x3 - -/* Auxiliary Configuration register */ -#define ARM_SMMU_GR0_sACR 0x10 - -/* Identification registers */ -#define ARM_SMMU_GR0_ID0 0x20 -#define ARM_SMMU_GR0_ID1 0x24 -#define ARM_SMMU_GR0_ID2 0x28 -#define ARM_SMMU_GR0_ID3 0x2c -#define ARM_SMMU_GR0_ID4 0x30 -#define ARM_SMMU_GR0_ID5 0x34 -#define ARM_SMMU_GR0_ID6 0x38 -#define ARM_SMMU_GR0_ID7 0x3c -#define ARM_SMMU_GR0_sGFSR 0x48 -#define ARM_SMMU_GR0_sGFSYNR0 0x50 -#define ARM_SMMU_GR0_sGFSYNR1 0x54 -#define ARM_SMMU_GR0_sGFSYNR2 0x58 - -#define ID0_S1TS (1 << 30) -#define ID0_S2TS (1 << 29) -#define ID0_NTS (1 << 28) -#define ID0_SMS (1 << 27) -#define ID0_ATOSNS (1 << 26) -#define ID0_PTFS_NO_AARCH32 (1 << 25) -#define ID0_PTFS_NO_AARCH32S (1 << 24) -#define ID0_CTTW (1 << 14) -#define ID0_NUMIRPT_SHIFT 16 -#define ID0_NUMIRPT_MASK 0xff -#define ID0_NUMSIDB_SHIFT 9 -#define ID0_NUMSIDB_MASK 0xf -#define ID0_EXIDS (1 << 8) -#define ID0_NUMSMRG_SHIFT 0 -#define ID0_NUMSMRG_MASK 0xff - -#define ID1_PAGESIZE (1 << 31) -#define ID1_NUMPAGENDXB_SHIFT 28 -#define ID1_NUMPAGENDXB_MASK 7 -#define ID1_NUMS2CB_SHIFT 16 -#define ID1_NUMS2CB_MASK 0xff -#define ID1_NUMCB_SHIFT 0 -#define ID1_NUMCB_MASK 0xff - -#define ID2_OAS_SHIFT 4 -#define ID2_OAS_MASK 0xf -#define ID2_IAS_SHIFT 0 -#define ID2_IAS_MASK 0xf -#define ID2_UBS_SHIFT 8 -#define ID2_UBS_MASK 0xf -#define ID2_PTFS_4K (1 << 12) -#define ID2_PTFS_16K (1 << 13) -#define ID2_PTFS_64K (1 << 14) -#define ID2_VMID16 (1 << 15) - -#define ID7_MAJOR_SHIFT 4 -#define ID7_MAJOR_MASK 0xf - -/* Global TLB invalidation */ -#define ARM_SMMU_GR0_TLBIVMID 0x64 -#define ARM_SMMU_GR0_TLBIALLNSNH 0x68 -#define ARM_SMMU_GR0_TLBIALLH 0x6c -#define ARM_SMMU_GR0_sTLBGSYNC 0x70 -#define ARM_SMMU_GR0_sTLBGSTATUS 0x74 -#define sTLBGSTATUS_GSACTIVE (1 << 0) -#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ -#define TLB_SPIN_COUNT 10 - -/* Stream mapping registers */ -#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) -#define SMR_VALID (1 << 31) -#define SMR_MASK_SHIFT 16 -#define SMR_ID_SHIFT 0 - -#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) -#define S2CR_CBNDX_SHIFT 0 -#define S2CR_CBNDX_MASK 0xff -#define S2CR_EXIDVALID (1 << 10) -#define S2CR_TYPE_SHIFT 16 -#define S2CR_TYPE_MASK 0x3 -enum arm_smmu_s2cr_type { - S2CR_TYPE_TRANS, - S2CR_TYPE_BYPASS, - S2CR_TYPE_FAULT, -}; - -#define S2CR_PRIVCFG_SHIFT 24 -#define S2CR_PRIVCFG_MASK 0x3 -enum arm_smmu_s2cr_privcfg { - S2CR_PRIVCFG_DEFAULT, - S2CR_PRIVCFG_DIPAN, - S2CR_PRIVCFG_UNPRIV, - S2CR_PRIVCFG_PRIV, -}; - -/* Context bank attribute registers */ -#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) -#define CBAR_VMID_SHIFT 0 -#define CBAR_VMID_MASK 0xff -#define CBAR_S1_BPSHCFG_SHIFT 8 -#define CBAR_S1_BPSHCFG_MASK 3 -#define CBAR_S1_BPSHCFG_NSH 3 -#define CBAR_S1_MEMATTR_SHIFT 12 -#define CBAR_S1_MEMATTR_MASK 0xf -#define CBAR_S1_MEMATTR_WB 0xf -#define CBAR_TYPE_SHIFT 16 -#define CBAR_TYPE_MASK 0x3 -#define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT) -#define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT) -#define CBAR_IRPTNDX_SHIFT 24 -#define CBAR_IRPTNDX_MASK 0xff - -#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) -#define CBA2R_RW64_32BIT (0 << 0) -#define CBA2R_RW64_64BIT (1 << 0) -#define CBA2R_VMID_SHIFT 16 -#define CBA2R_VMID_MASK 0xffff - /* Translation context bank */ #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift)) -#define ARM_SMMU_CB_SCTLR 0x0 -#define ARM_SMMU_CB_ACTLR 0x4 -#define ARM_SMMU_CB_RESUME 0x8 -#define ARM_SMMU_CB_TTBCR2 0x10 -#define ARM_SMMU_CB_TTBR0 0x20 -#define ARM_SMMU_CB_TTBR1 0x28 -#define ARM_SMMU_CB_TTBCR 0x30 -#define ARM_SMMU_CB_CONTEXTIDR 0x34 -#define ARM_SMMU_CB_S1_MAIR0 0x38 -#define ARM_SMMU_CB_S1_MAIR1 0x3c -#define ARM_SMMU_CB_PAR 0x50 -#define ARM_SMMU_CB_FSR 0x58 -#define ARM_SMMU_CB_FAR 0x60 -#define ARM_SMMU_CB_FSYNR0 0x68 -#define ARM_SMMU_CB_S1_TLBIVA 0x600 -#define ARM_SMMU_CB_S1_TLBIASID 0x610 -#define ARM_SMMU_CB_S1_TLBIVAL 0x620 -#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 -#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 -#define ARM_SMMU_CB_TLBSYNC 0x7f0 -#define ARM_SMMU_CB_TLBSTATUS 0x7f4 -#define ARM_SMMU_CB_ATS1PR 0x800 -#define ARM_SMMU_CB_ATSR 0x8f0 - -#define SCTLR_S1_ASIDPNE (1 << 12) -#define SCTLR_CFCFG (1 << 7) -#define SCTLR_CFIE (1 << 6) -#define SCTLR_CFRE (1 << 5) -#define SCTLR_E (1 << 4) -#define SCTLR_AFE (1 << 2) -#define SCTLR_TRE (1 << 1) -#define SCTLR_M (1 << 0) - -#define ARM_MMU500_ACTLR_CPRE (1 << 1) - -#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26) -#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8) - -#define CB_PAR_F (1 << 0) - -#define ATSR_ACTIVE (1 << 0) - -#define RESUME_RETRY (0 << 0) -#define RESUME_TERMINATE (1 << 0) - -#define TTBCR2_SEP_SHIFT 15 -#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) -#define TTBCR2_AS (1 << 4) - -#define TTBRn_ASID_SHIFT 48 - -#define FSR_MULTI (1 << 31) -#define FSR_SS (1 << 30) -#define FSR_UUT (1 << 8) -#define FSR_ASF (1 << 7) -#define FSR_TLBLKF (1 << 6) -#define FSR_TLBMCF (1 << 5) -#define FSR_EF (1 << 4) -#define FSR_PF (1 << 3) -#define FSR_AFF (1 << 2) -#define FSR_TF (1 << 1) - -#define FSR_IGN (FSR_AFF | FSR_ASF | \ - FSR_TLBMCF | FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF | FSR_IGN) - -#define FSYNR0_WNR (1 << 4) - #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 From 0ae349a0f33fb040a2bc228fdc6d60111455feab Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 9 Aug 2017 10:43:04 -0400 Subject: [PATCH 31/87] iommu/qcom: Add qcom_iommu An iommu driver for Qualcomm "B" family devices which do implement the ARM SMMU spec, but not in a way that is compatible with how the arm-smmu driver is designed. It seems SMMU_SCR1.GASRAE=1 so the global register space is not accessible. This means it needs to get configuration from devicetree instead of setting it up dynamically. In the end, other than register definitions, there is not much code to share with arm-smmu (other than what has already been refactored out into the pgtable helpers). Signed-off-by: Rob Clark Tested-by: Riku Voipio Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 10 + drivers/iommu/Makefile | 1 + drivers/iommu/qcom_iommu.c | 868 +++++++++++++++++++++++++++++++++++++ 3 files changed, 879 insertions(+) create mode 100644 drivers/iommu/qcom_iommu.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f73ff28f77e2..92f5fd2e0e4b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -367,4 +367,14 @@ config MTK_IOMMU_V1 if unsure, say N here. +config QCOM_IOMMU + # Note: iommu drivers cannot (yet?) be built as modules + bool "Qualcomm IOMMU Support" + depends on ARCH_QCOM || COMPILE_TEST + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU + help + Support for IOMMU on certain Qualcomm SoCs. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 195f7b997d8e..b910aea813a1 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,3 +27,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o +obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c new file mode 100644 index 000000000000..860cad1cb167 --- /dev/null +++ b/drivers/iommu/qcom_iommu.c @@ -0,0 +1,868 @@ +/* + * IOMMU API for QCOM secure IOMMUs. Somewhat based on arm-smmu.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Copyright (C) 2013 ARM Limited + * Copyright (C) 2017 Red Hat + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io-pgtable.h" +#include "arm-smmu-regs.h" + +#define SMMU_INTR_SEL_NS 0x2000 + +struct qcom_iommu_ctx; + +struct qcom_iommu_dev { + /* IOMMU core code handle */ + struct iommu_device iommu; + struct device *dev; + struct clk *iface_clk; + struct clk *bus_clk; + void __iomem *local_base; + u32 sec_id; + u8 num_ctxs; + struct qcom_iommu_ctx *ctxs[0]; /* indexed by asid-1 */ +}; + +struct qcom_iommu_ctx { + struct device *dev; + void __iomem *base; + bool secure_init; + u8 asid; /* asid and ctx bank # are 1:1 */ +}; + +struct qcom_iommu_domain { + struct io_pgtable_ops *pgtbl_ops; + spinlock_t pgtbl_lock; + struct mutex init_mutex; /* Protects iommu pointer */ + struct iommu_domain domain; + struct qcom_iommu_dev *iommu; +}; + +static struct qcom_iommu_domain *to_qcom_iommu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct qcom_iommu_domain, domain); +} + +static const struct iommu_ops qcom_iommu_ops; + +static struct qcom_iommu_dev * to_iommu(struct iommu_fwspec *fwspec) +{ + if (!fwspec || fwspec->ops != &qcom_iommu_ops) + return NULL; + return fwspec->iommu_priv; +} + +static struct qcom_iommu_ctx * to_ctx(struct iommu_fwspec *fwspec, unsigned asid) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); + if (!qcom_iommu) + return NULL; + return qcom_iommu->ctxs[asid - 1]; +} + +static inline void +iommu_writel(struct qcom_iommu_ctx *ctx, unsigned reg, u32 val) +{ + writel_relaxed(val, ctx->base + reg); +} + +static inline void +iommu_writeq(struct qcom_iommu_ctx *ctx, unsigned reg, u64 val) +{ + writeq_relaxed(val, ctx->base + reg); +} + +static inline u32 +iommu_readl(struct qcom_iommu_ctx *ctx, unsigned reg) +{ + return readl_relaxed(ctx->base + reg); +} + +static inline u64 +iommu_readq(struct qcom_iommu_ctx *ctx, unsigned reg) +{ + return readq_relaxed(ctx->base + reg); +} + +static void qcom_iommu_tlb_sync(void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + unsigned int val, ret; + + iommu_writel(ctx, ARM_SMMU_CB_TLBSYNC, 0); + + ret = readl_poll_timeout(ctx->base + ARM_SMMU_CB_TLBSTATUS, val, + (val & 0x1) == 0, 0, 5000000); + if (ret) + dev_err(ctx->dev, "timeout waiting for TLB SYNC\n"); + } +} + +static void qcom_iommu_tlb_inv_context(void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + iommu_writel(ctx, ARM_SMMU_CB_S1_TLBIASID, ctx->asid); + } + + qcom_iommu_tlb_sync(cookie); +} + +static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct iommu_fwspec *fwspec = cookie; + unsigned i, reg; + + reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + size_t s = size; + + iova &= ~12UL; + iova |= ctx->asid; + do { + iommu_writel(ctx, reg, iova); + iova += granule; + } while (s -= granule); + } +} + +static const struct iommu_gather_ops qcom_gather_ops = { + .tlb_flush_all = qcom_iommu_tlb_inv_context, + .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, + .tlb_sync = qcom_iommu_tlb_sync, +}; + +static irqreturn_t qcom_iommu_fault(int irq, void *dev) +{ + struct qcom_iommu_ctx *ctx = dev; + u32 fsr, fsynr; + u64 iova; + + fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); + + if (!(fsr & FSR_FAULT)) + return IRQ_NONE; + + fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); + iova = iommu_readq(ctx, ARM_SMMU_CB_FAR); + + dev_err_ratelimited(ctx->dev, + "Unhandled context fault: fsr=0x%x, " + "iova=0x%016llx, fsynr=0x%x, cb=%d\n", + fsr, iova, fsynr, ctx->asid); + + iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); + + return IRQ_HANDLED; +} + +static int qcom_iommu_init_domain(struct iommu_domain *domain, + struct qcom_iommu_dev *qcom_iommu, + struct iommu_fwspec *fwspec) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg pgtbl_cfg; + int i, ret = 0; + u32 reg; + + mutex_lock(&qcom_domain->init_mutex); + if (qcom_domain->iommu) + goto out_unlock; + + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, + .ias = 32, + .oas = 40, + .tlb = &qcom_gather_ops, + .iommu_dev = qcom_iommu->dev, + }; + + qcom_domain->iommu = qcom_iommu; + pgtbl_ops = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &pgtbl_cfg, fwspec); + if (!pgtbl_ops) { + dev_err(qcom_iommu->dev, "failed to allocate pagetable ops\n"); + ret = -ENOMEM; + goto out_clear_iommu; + } + + /* Update the domain's page sizes to reflect the page table format */ + domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + domain->geometry.aperture_end = (1ULL << pgtbl_cfg.ias) - 1; + domain->geometry.force_aperture = true; + + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + + if (!ctx->secure_init) { + ret = qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, ctx->asid); + if (ret) { + dev_err(qcom_iommu->dev, "secure init failed: %d\n", ret); + goto out_clear_iommu; + } + ctx->secure_init = true; + } + + /* TTBRs */ + iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, + pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | + ((u64)ctx->asid << TTBRn_ASID_SHIFT)); + iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, + pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | + ((u64)ctx->asid << TTBRn_ASID_SHIFT)); + + /* TTBCR */ + iommu_writel(ctx, ARM_SMMU_CB_TTBCR2, + (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | + TTBCR2_SEP_UPSTREAM); + iommu_writel(ctx, ARM_SMMU_CB_TTBCR, + pgtbl_cfg.arm_lpae_s1_cfg.tcr); + + /* MAIRs (stage-1 only) */ + iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, + pgtbl_cfg.arm_lpae_s1_cfg.mair[0]); + iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR1, + pgtbl_cfg.arm_lpae_s1_cfg.mair[1]); + + /* SCTLR */ + reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | + SCTLR_M | SCTLR_S1_ASIDPNE; + + if (IS_ENABLED(CONFIG_BIG_ENDIAN)) + reg |= SCTLR_E; + + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); + } + + mutex_unlock(&qcom_domain->init_mutex); + + /* Publish page table ops for map/unmap */ + qcom_domain->pgtbl_ops = pgtbl_ops; + + return 0; + +out_clear_iommu: + qcom_domain->iommu = NULL; +out_unlock: + mutex_unlock(&qcom_domain->init_mutex); + return ret; +} + +static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type) +{ + struct qcom_iommu_domain *qcom_domain; + + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + return NULL; + /* + * Allocate the domain and initialise some of its data structures. + * We can't really do anything meaningful until we've added a + * master. + */ + qcom_domain = kzalloc(sizeof(*qcom_domain), GFP_KERNEL); + if (!qcom_domain) + return NULL; + + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&qcom_domain->domain)) { + kfree(qcom_domain); + return NULL; + } + + mutex_init(&qcom_domain->init_mutex); + spin_lock_init(&qcom_domain->pgtbl_lock); + + return &qcom_domain->domain; +} + +static void qcom_iommu_domain_free(struct iommu_domain *domain) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + + if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ + return; + + iommu_put_dma_cookie(domain); + + /* NOTE: unmap can be called after client device is powered off, + * for example, with GPUs or anything involving dma-buf. So we + * cannot rely on the device_link. Make sure the IOMMU is on to + * avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + + pm_runtime_put_sync(qcom_domain->iommu->dev); + + kfree(qcom_domain); +} + +static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + int ret; + + if (!qcom_iommu) { + dev_err(dev, "cannot attach to IOMMU, is it on the same bus?\n"); + return -ENXIO; + } + + /* Ensure that the domain is finalized */ + pm_runtime_get_sync(qcom_iommu->dev); + ret = qcom_iommu_init_domain(domain, qcom_iommu, dev->iommu_fwspec); + pm_runtime_put_sync(qcom_iommu->dev); + if (ret < 0) + return ret; + + /* + * Sanity check the domain. We don't support domains across + * different IOMMUs. + */ + if (qcom_domain->iommu != qcom_iommu) { + dev_err(dev, "cannot attach to IOMMU %s while already " + "attached to domain on IOMMU %s\n", + dev_name(qcom_domain->iommu->dev), + dev_name(qcom_iommu->dev)); + return -EINVAL; + } + + return 0; +} + +static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + struct qcom_iommu_dev *qcom_iommu = to_iommu(fwspec); + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + unsigned i; + + if (!qcom_domain->iommu) + return; + + pm_runtime_get_sync(qcom_iommu->dev); + for (i = 0; i < fwspec->num_ids; i++) { + struct qcom_iommu_ctx *ctx = to_ctx(fwspec, fwspec->ids[i]); + + /* Disable the context bank: */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + } + pm_runtime_put_sync(qcom_iommu->dev); + + qcom_domain->iommu = NULL; +} + +static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + int ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return -ENODEV; + + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->map(ops, iova, paddr, size, prot); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + return ret; +} + +static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + size_t ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return 0; + + /* NOTE: unmap can be called after client device is powered off, + * for example, with GPUs or anything involving dma-buf. So we + * cannot rely on the device_link. Make sure the IOMMU is on to + * avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->unmap(ops, iova, size); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + pm_runtime_put_sync(qcom_domain->iommu->dev); + + return ret; +} + +static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + phys_addr_t ret; + unsigned long flags; + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable_ops *ops = qcom_domain->pgtbl_ops; + + if (!ops) + return 0; + + spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); + ret = ops->iova_to_phys(ops, iova); + spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); + + return ret; +} + +static bool qcom_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + /* + * Return true here as the SMMU can always send out coherent + * requests. + */ + return true; + case IOMMU_CAP_NOEXEC: + return true; + default: + return false; + } +} + +static int qcom_iommu_add_device(struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + struct iommu_group *group; + struct device_link *link; + + if (!qcom_iommu) + return -ENODEV; + + /* + * Establish the link between iommu and master, so that the + * iommu gets runtime enabled/disabled as per the master's + * needs. + */ + link = device_link_add(dev, qcom_iommu->dev, DL_FLAG_PM_RUNTIME); + if (!link) { + dev_err(qcom_iommu->dev, "Unable to create device link between %s and %s\n", + dev_name(qcom_iommu->dev), dev_name(dev)); + return -ENODEV; + } + + group = iommu_group_get_for_dev(dev); + if (IS_ERR_OR_NULL(group)) + return PTR_ERR_OR_ZERO(group); + + iommu_group_put(group); + iommu_device_link(&qcom_iommu->iommu, dev); + + return 0; +} + +static void qcom_iommu_remove_device(struct device *dev) +{ + struct qcom_iommu_dev *qcom_iommu = to_iommu(dev->iommu_fwspec); + + if (!qcom_iommu) + return; + + iommu_device_unlink(&qcom_iommu->iommu, dev); + iommu_group_remove_device(dev); + iommu_fwspec_free(dev); +} + +static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct qcom_iommu_dev *qcom_iommu; + struct platform_device *iommu_pdev; + unsigned asid = args->args[0]; + + if (args->args_count != 1) { + dev_err(dev, "incorrect number of iommu params found for %s " + "(found %d, expected 1)\n", + args->np->full_name, args->args_count); + return -EINVAL; + } + + iommu_pdev = of_find_device_by_node(args->np); + if (WARN_ON(!iommu_pdev)) + return -EINVAL; + + qcom_iommu = platform_get_drvdata(iommu_pdev); + + /* make sure the asid specified in dt is valid, so we don't have + * to sanity check this elsewhere, since 'asid - 1' is used to + * index into qcom_iommu->ctxs: + */ + if (WARN_ON(asid < 1) || + WARN_ON(asid > qcom_iommu->num_ctxs)) + return -EINVAL; + + if (!dev->iommu_fwspec->iommu_priv) { + dev->iommu_fwspec->iommu_priv = qcom_iommu; + } else { + /* make sure devices iommus dt node isn't referring to + * multiple different iommu devices. Multiple context + * banks are ok, but multiple devices are not: + */ + if (WARN_ON(qcom_iommu != dev->iommu_fwspec->iommu_priv)) + return -EINVAL; + } + + return iommu_fwspec_add_ids(dev, &asid, 1); +} + +static const struct iommu_ops qcom_iommu_ops = { + .capable = qcom_iommu_capable, + .domain_alloc = qcom_iommu_domain_alloc, + .domain_free = qcom_iommu_domain_free, + .attach_dev = qcom_iommu_attach_dev, + .detach_dev = qcom_iommu_detach_dev, + .map = qcom_iommu_map, + .unmap = qcom_iommu_unmap, + .map_sg = default_iommu_map_sg, + .iova_to_phys = qcom_iommu_iova_to_phys, + .add_device = qcom_iommu_add_device, + .remove_device = qcom_iommu_remove_device, + .device_group = generic_device_group, + .of_xlate = qcom_iommu_of_xlate, + .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, +}; + +static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu) +{ + int ret; + + ret = clk_prepare_enable(qcom_iommu->iface_clk); + if (ret) { + dev_err(qcom_iommu->dev, "Couldn't enable iface_clk\n"); + return ret; + } + + ret = clk_prepare_enable(qcom_iommu->bus_clk); + if (ret) { + dev_err(qcom_iommu->dev, "Couldn't enable bus_clk\n"); + clk_disable_unprepare(qcom_iommu->iface_clk); + return ret; + } + + return 0; +} + +static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) +{ + clk_disable_unprepare(qcom_iommu->bus_clk); + clk_disable_unprepare(qcom_iommu->iface_clk); +} + +static int get_asid(const struct device_node *np) +{ + u32 reg; + + /* read the "reg" property directly to get the relative address + * of the context bank, and calculate the asid from that: + */ + if (of_property_read_u32_index(np, "reg", 0, ®)) + return -ENODEV; + + return reg / 0x1000; /* context banks are 0x1000 apart */ +} + +static int qcom_iommu_ctx_probe(struct platform_device *pdev) +{ + struct qcom_iommu_ctx *ctx; + struct device *dev = &pdev->dev; + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev->parent); + struct resource *res; + int ret, irq; + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = dev; + platform_set_drvdata(pdev, ctx); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctx->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ctx->base)) + return PTR_ERR(ctx->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return -ENODEV; + } + + /* clear IRQs before registering fault handler, just in case the + * boot-loader left us a surprise: + */ + iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); + + ret = devm_request_irq(dev, irq, + qcom_iommu_fault, + IRQF_SHARED, + "qcom-iommu-fault", + ctx); + if (ret) { + dev_err(dev, "failed to request IRQ %u\n", irq); + return ret; + } + + ret = get_asid(dev->of_node); + if (ret < 0) { + dev_err(dev, "missing reg property\n"); + return ret; + } + + ctx->asid = ret; + + dev_dbg(dev, "found asid %u\n", ctx->asid); + + qcom_iommu->ctxs[ctx->asid - 1] = ctx; + + return 0; +} + +static int qcom_iommu_ctx_remove(struct platform_device *pdev) +{ + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(pdev->dev.parent); + struct qcom_iommu_ctx *ctx = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + qcom_iommu->ctxs[ctx->asid - 1] = NULL; + + return 0; +} + +static const struct of_device_id ctx_of_match[] = { + { .compatible = "qcom,msm-iommu-v1-ns" }, + { .compatible = "qcom,msm-iommu-v1-sec" }, + { /* sentinel */ } +}; + +static struct platform_driver qcom_iommu_ctx_driver = { + .driver = { + .name = "qcom-iommu-ctx", + .of_match_table = of_match_ptr(ctx_of_match), + }, + .probe = qcom_iommu_ctx_probe, + .remove = qcom_iommu_ctx_remove, +}; + +static int qcom_iommu_device_probe(struct platform_device *pdev) +{ + struct device_node *child; + struct qcom_iommu_dev *qcom_iommu; + struct device *dev = &pdev->dev; + struct resource *res; + int ret, sz, max_asid = 0; + + /* find the max asid (which is 1:1 to ctx bank idx), so we know how + * many child ctx devices we have: + */ + for_each_child_of_node(dev->of_node, child) + max_asid = max(max_asid, get_asid(child)); + + sz = sizeof(*qcom_iommu) + (max_asid * sizeof(qcom_iommu->ctxs[0])); + + qcom_iommu = devm_kzalloc(dev, sz, GFP_KERNEL); + if (!qcom_iommu) + return -ENOMEM; + qcom_iommu->num_ctxs = max_asid; + qcom_iommu->dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res) + qcom_iommu->local_base = devm_ioremap_resource(dev, res); + + qcom_iommu->iface_clk = devm_clk_get(dev, "iface"); + if (IS_ERR(qcom_iommu->iface_clk)) { + dev_err(dev, "failed to get iface clock\n"); + return PTR_ERR(qcom_iommu->iface_clk); + } + + qcom_iommu->bus_clk = devm_clk_get(dev, "bus"); + if (IS_ERR(qcom_iommu->bus_clk)) { + dev_err(dev, "failed to get bus clock\n"); + return PTR_ERR(qcom_iommu->bus_clk); + } + + if (of_property_read_u32(dev->of_node, "qcom,iommu-secure-id", + &qcom_iommu->sec_id)) { + dev_err(dev, "missing qcom,iommu-secure-id property\n"); + return -ENODEV; + } + + platform_set_drvdata(pdev, qcom_iommu); + + pm_runtime_enable(dev); + + /* register context bank devices, which are child nodes: */ + ret = devm_of_platform_populate(dev); + if (ret) { + dev_err(dev, "Failed to populate iommu contexts\n"); + return ret; + } + + ret = iommu_device_sysfs_add(&qcom_iommu->iommu, dev, NULL, + dev_name(dev)); + if (ret) { + dev_err(dev, "Failed to register iommu in sysfs\n"); + return ret; + } + + iommu_device_set_ops(&qcom_iommu->iommu, &qcom_iommu_ops); + iommu_device_set_fwnode(&qcom_iommu->iommu, dev->fwnode); + + ret = iommu_device_register(&qcom_iommu->iommu); + if (ret) { + dev_err(dev, "Failed to register iommu\n"); + return ret; + } + + bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); + + if (qcom_iommu->local_base) { + pm_runtime_get_sync(dev); + writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS); + pm_runtime_put_sync(dev); + } + + return 0; +} + +static int qcom_iommu_device_remove(struct platform_device *pdev) +{ + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + bus_set_iommu(&platform_bus_type, NULL); + + pm_runtime_force_suspend(&pdev->dev); + platform_set_drvdata(pdev, NULL); + iommu_device_sysfs_remove(&qcom_iommu->iommu); + iommu_device_unregister(&qcom_iommu->iommu); + + return 0; +} + +#ifdef CONFIG_PM +static int qcom_iommu_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + return qcom_iommu_enable_clocks(qcom_iommu); +} + +static int qcom_iommu_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + + qcom_iommu_disable_clocks(qcom_iommu); + + return 0; +} +#endif + +static const struct dev_pm_ops qcom_iommu_pm_ops = { + SET_RUNTIME_PM_OPS(qcom_iommu_suspend, qcom_iommu_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +static const struct of_device_id qcom_iommu_of_match[] = { + { .compatible = "qcom,msm-iommu-v1" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, qcom_iommu_of_match); + +static struct platform_driver qcom_iommu_driver = { + .driver = { + .name = "qcom-iommu", + .of_match_table = of_match_ptr(qcom_iommu_of_match), + .pm = &qcom_iommu_pm_ops, + }, + .probe = qcom_iommu_device_probe, + .remove = qcom_iommu_device_remove, +}; + +static int __init qcom_iommu_init(void) +{ + int ret; + + ret = platform_driver_register(&qcom_iommu_ctx_driver); + if (ret) + return ret; + + ret = platform_driver_register(&qcom_iommu_driver); + if (ret) + platform_driver_unregister(&qcom_iommu_ctx_driver); + + return ret; +} + +static void __exit qcom_iommu_exit(void) +{ + platform_driver_unregister(&qcom_iommu_driver); + platform_driver_unregister(&qcom_iommu_ctx_driver); +} + +module_init(qcom_iommu_init); +module_exit(qcom_iommu_exit); + +IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1", NULL); + +MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations"); +MODULE_LICENSE("GPL v2"); From d051f28c880712639aadd981bfed7dcd4dd0e159 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Wed, 9 Aug 2017 10:43:05 -0400 Subject: [PATCH 32/87] iommu/qcom: Initialize secure page table This basically gets the secure page table size, allocates memory for secure pagetables and passes the physical address to the trusted zone. Signed-off-by: Stanimir Varbanov Signed-off-by: Rob Clark Signed-off-by: Joerg Roedel --- drivers/iommu/qcom_iommu.c | 64 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 860cad1cb167..48b62aa52787 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -604,6 +604,51 @@ static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) clk_disable_unprepare(qcom_iommu->iface_clk); } +static int qcom_iommu_sec_ptbl_init(struct device *dev) +{ + size_t psize = 0; + unsigned int spare = 0; + void *cpu_addr; + dma_addr_t paddr; + unsigned long attrs; + static bool allocated = false; + int ret; + + if (allocated) + return 0; + + ret = qcom_scm_iommu_secure_ptbl_size(spare, &psize); + if (ret) { + dev_err(dev, "failed to get iommu secure pgtable size (%d)\n", + ret); + return ret; + } + + dev_info(dev, "iommu sec: pgtable size: %zu\n", psize); + + attrs = DMA_ATTR_NO_KERNEL_MAPPING; + + cpu_addr = dma_alloc_attrs(dev, psize, &paddr, GFP_KERNEL, attrs); + if (!cpu_addr) { + dev_err(dev, "failed to allocate %zu bytes for pgtable\n", + psize); + return -ENOMEM; + } + + ret = qcom_scm_iommu_secure_ptbl_init(paddr, psize, spare); + if (ret) { + dev_err(dev, "failed to init iommu pgtable (%d)\n", ret); + goto free_mem; + } + + allocated = true; + return 0; + +free_mem: + dma_free_attrs(dev, psize, cpu_addr, paddr, attrs); + return ret; +} + static int get_asid(const struct device_node *np) { u32 reg; @@ -700,6 +745,17 @@ static struct platform_driver qcom_iommu_ctx_driver = { .remove = qcom_iommu_ctx_remove, }; +static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) +{ + struct device_node *child; + + for_each_child_of_node(qcom_iommu->dev->of_node, child) + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) + return true; + + return false; +} + static int qcom_iommu_device_probe(struct platform_device *pdev) { struct device_node *child; @@ -744,6 +800,14 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) return -ENODEV; } + if (qcom_iommu_has_secure_context(qcom_iommu)) { + ret = qcom_iommu_sec_ptbl_init(dev); + if (ret) { + dev_err(dev, "cannot init secure pg table(%d)\n", ret); + return ret; + } + } + platform_set_drvdata(pdev, qcom_iommu); pm_runtime_enable(dev); From 46cc815d6d8c3c9dd18ffae1e3e818eb3ce2c788 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 10 Aug 2017 10:47:32 +0530 Subject: [PATCH 33/87] memory: mtk-smi: Handle return value of clk_prepare_enable clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Joerg Roedel --- drivers/memory/mtk-smi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index a3ff19aed087..13f8c45dbf0d 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -327,6 +327,7 @@ static int mtk_smi_common_probe(struct platform_device *pdev) struct mtk_smi *common; struct resource *res; enum mtk_smi_gen smi_gen; + int ret; if (!dev->pm_domain) return -EPROBE_DEFER; @@ -361,7 +362,9 @@ static int mtk_smi_common_probe(struct platform_device *pdev) if (IS_ERR(common->clk_async)) return PTR_ERR(common->clk_async); - clk_prepare_enable(common->clk_async); + ret = clk_prepare_enable(common->clk_async); + if (ret) + return ret; } pm_runtime_enable(dev); platform_set_drvdata(pdev, common); From 4c232a708be1047fe26b7c75ceaa5d73deb6a798 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:33 +0800 Subject: [PATCH 34/87] iommu/amd: Detect pre enabled translation Add functions to check whether translation is already enabled in IOMMU. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 24 ++++++++++++++++++++++++ drivers/iommu/amd_iommu_proto.h | 1 + drivers/iommu/amd_iommu_types.h | 3 +++ 3 files changed, 28 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 372303700566..3f72f44fa2df 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -258,6 +258,25 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +bool translation_pre_enabled(struct amd_iommu *iommu) +{ + return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); +} + +static void clear_translation_pre_enabled(struct amd_iommu *iommu) +{ + iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; +} + +static void init_translation_status(struct amd_iommu *iommu) +{ + u32 ctrl; + + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; +} + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1399,6 +1418,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; + init_translation_status(iommu); + + if (translation_pre_enabled(iommu)) + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + ret = init_iommu_from_acpi(iommu, h); if (ret) return ret; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 466260f8a1df..a9666d2005bb 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -87,4 +87,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) return !!(iommu->features & f); } +extern bool translation_pre_enabled(struct amd_iommu *iommu); #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 294a409e283b..0c98b2cf04cc 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -435,6 +435,8 @@ struct iommu_domain; struct irq_domain; struct amd_irte_ops; +#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -569,6 +571,7 @@ struct amd_iommu { struct amd_irte_ops *irte_ops; #endif + u32 flags; volatile u64 __aligned(8) cmd_sem; }; From 78d313c611adcd354d35295e30f1495e02e005f9 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:34 +0800 Subject: [PATCH 35/87] iommu/amd: Add several helper functions Move single iommu enabling codes into a wrapper function early_enable_iommu(). This can make later kdump change easier. And also add iommu_disable_command_buffer and iommu_disable_event_buffer for later usage. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 42 +++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3f72f44fa2df..277838dbc3a6 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -634,6 +634,14 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) amd_iommu_reset_cmd_buffer(iommu); } +/* + * This function disables the command buffer + */ +static void iommu_disable_command_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); +} + static void __init free_command_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); @@ -666,6 +674,14 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); } +/* + * This function disables the event log buffer + */ +static void iommu_disable_event_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); +} + static void __init free_event_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); @@ -2046,6 +2062,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #endif } +static void early_enable_iommu(struct amd_iommu *iommu) +{ + iommu_disable(iommu); + iommu_init_flags(iommu); + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_set_exclusion_range(iommu); + iommu_enable_ga(iommu); + iommu_enable(iommu); + iommu_flush_all_caches(iommu); +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -2054,17 +2083,8 @@ static void early_enable_iommus(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) { - iommu_disable(iommu); - iommu_init_flags(iommu); - iommu_set_device_table(iommu); - iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); - iommu_set_exclusion_range(iommu); - iommu_enable_ga(iommu); - iommu_enable(iommu); - iommu_flush_all_caches(iommu); - } + for_each_iommu(iommu) + early_enable_iommu(iommu); #ifdef CONFIG_IRQ_REMAP if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) From 9494ea90a56d013f4257686c8daf49203cd900c0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:35 +0800 Subject: [PATCH 36/87] Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel" This reverts commit 54bd63570484167cb13edf81e31fff107b879981. We still need the IO_PAGE_FAULT message to warn error after the issue of on-flight dma in kdump kernel is fixed. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +-- drivers/iommu/amd_iommu_init.c | 9 --------- drivers/iommu/amd_iommu_types.h | 1 - 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 354cbd6392cd..6d2fc40a086d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2086,8 +2086,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - - flags &= ~(DTE_FLAG_SA | 0xffffULL); + flags &= ~(0xffffUL); flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 277838dbc3a6..7044510654fe 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -1942,14 +1941,6 @@ static void init_device_table_dma(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - /* - * In kdump kernels in-flight DMA from the old kernel might - * cause IO_PAGE_FAULTs. There are no reports that a kdump - * actually failed because of that, so just disable fault - * reporting in the hardware to get rid of the messages - */ - if (is_kdump_kernel()) - set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0c98b2cf04cc..db7ceb4d0957 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -322,7 +322,6 @@ #define IOMMU_PTE_IW (1ULL << 62) #define DTE_FLAG_IOTLB (1ULL << 32) -#define DTE_FLAG_SA (1ULL << 34) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) From 07a80a6b5920873a8b161ac49c5c12db7af30c0f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:36 +0800 Subject: [PATCH 37/87] iommu/amd: Define bit fields for DTE particularly In AMD-Vi spec several bits of IO PTE fields and DTE fields are similar so that both of them can share the same MACRO definition. However defining them respectively can make code more read-able. Do it now. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 8 ++++---- drivers/iommu/amd_iommu_types.h | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6d2fc40a086d..097db07354b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1537,9 +1537,9 @@ static int iommu_map_page(struct protection_domain *dom, if (count > 1) { __pte = PAGE_SIZE_PTE(phys_addr, page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -2053,7 +2053,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; flags = amd_iommu_dev_table[devid].data[1]; @@ -2096,7 +2096,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) static void clear_dte_entry(u16 devid) { /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index db7ceb4d0957..f88e802481a3 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -265,7 +265,7 @@ #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ - IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) #define PM_MAP_4k 0 @@ -314,13 +314,23 @@ #define PTE_LEVEL_PAGE_SIZE(level) \ (1ULL << (12 + (9 * (level)))) -#define IOMMU_PTE_P (1ULL << 0) -#define IOMMU_PTE_TV (1ULL << 1) +/* + * Bit value definition for I/O PTE fields + */ +#define IOMMU_PTE_PR (1ULL << 0) #define IOMMU_PTE_U (1ULL << 59) #define IOMMU_PTE_FC (1ULL << 60) #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +/* + * Bit value definition for DTE fields + */ +#define DTE_FLAG_V (1ULL << 0) +#define DTE_FLAG_TV (1ULL << 1) +#define DTE_FLAG_IR (1ULL << 61) +#define DTE_FLAG_IW (1ULL << 62) + #define DTE_FLAG_IOTLB (1ULL << 32) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) @@ -342,7 +352,7 @@ #define GCR3_VALID 0x01ULL #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) -#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) From 45a01c42933b93e59811099f97aa4179d499a42c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:37 +0800 Subject: [PATCH 38/87] iommu/amd: Add function copy_dev_tables() Add function copy_dev_tables to copy the old DEV table entries of the panicked kernel to the new allocated device table. Since all iommus share the same device table the copy only need be done one time. Here add a new global old_dev_tbl_cpy to point to the newly allocated device table which the content of old device table will be copied to. Besides, we also need to: - Check whether all IOMMUs actually use the same device table with the same size - Verify that the size of the old device table is the expected size. - Reserve the old domain id occupied in 1st kernel to avoid touching the old io-page tables. Then on-flight DMA can continue looking it up. And also define MACRO DEV_DOMID_MASK to replace magic number 0xffffULL, it can be reused in copy_dev_tables(). Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/amd_iommu_init.c | 62 +++++++++++++++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 1 + 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 097db07354b4..b22b58b33400 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2086,7 +2086,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - flags &= ~(0xffffUL); + flags &= ~DEV_DOMID_MASK; flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 7044510654fe..e2857204d32a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -195,6 +195,11 @@ spinlock_t amd_iommu_pd_lock; * page table root pointer. */ struct dev_table_entry *amd_iommu_dev_table; +/* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ +static struct dev_table_entry *old_dev_tbl_cpy; /* * The alias table is a driver specific data structure which contains the @@ -842,6 +847,63 @@ static int get_dev_entry_bit(u16 devid, u8 bit) } +static bool copy_device_table(void) +{ + struct dev_table_entry *old_devtb = NULL; + u32 lo, hi, devid, old_devtb_size; + phys_addr_t old_devtb_phys; + u64 entry, last_entry = 0; + struct amd_iommu *iommu; + u16 dom_id, dte_v; + gfp_t gfp_flag; + + + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + for_each_iommu(iommu) { + /* All IOMMUs should use the same device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; + if (last_entry && last_entry != entry) { + pr_err("IOMMU:%d should use the same dev table as others!/n", + iommu->index); + return false; + } + last_entry = entry; + + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!/n", + iommu->index); + return false; + } + } + + old_devtb_phys = entry & PAGE_MASK; + old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + if (!old_devtb) + return false; + + gfp_flag = GFP_KERNEL | __GFP_ZERO; + old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(dev_table_size)); + if (old_dev_tbl_cpy == NULL) { + pr_err("Failed to allocate memory for copying old device table!/n"); + return false; + } + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + old_dev_tbl_cpy[devid] = old_devtb[devid]; + dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; + dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; + if (dte_v && dom_id) + __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + } + memunmap(old_devtb); + + return true; +} + void amd_iommu_apply_erratum_63(u16 devid) { int sysmgt; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f88e802481a3..a7f6cf8c841e 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -336,6 +336,7 @@ #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) +#define DEV_DOMID_MASK 0xffffULL #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) From 3ac3e5ee5ed56b07448f295902e44916eb6979fc Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:38 +0800 Subject: [PATCH 39/87] iommu/amd: Copy old trans table from old kernel Here several things need be done: - If iommu is pre-enabled in a normal kernel, just disable it and print warning. - If any one of IOMMUs is not pre-enabled in kdump kernel, just continue as it does in normal kernel. - If failed to copy dev table of old kernel, continue to proceed as it does in normal kernel. - Only if all IOMMUs are pre-enabled and copy dev table is done well, free the dev table allocated in early_amd_iommu_init() and make amd_iommu_dev_table point to the copied one. - Disable and Re-enable event/cmd buffer, install the copied DTE table to reg, and detect and enable guest vapic. - Flush all caches Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 59 +++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index e2857204d32a..959c25d997e1 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -37,6 +37,7 @@ #include #include +#include #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" @@ -262,6 +263,8 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static bool __initdata amd_iommu_pre_enabled = true; + bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); @@ -857,6 +860,8 @@ static bool copy_device_table(void) u16 dom_id, dte_v; gfp_t gfp_flag; + if (!amd_iommu_pre_enabled) + return false; pr_warn("Translation is already enabled - trying to copy translation structures\n"); for_each_iommu(iommu) { @@ -1496,9 +1501,14 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; init_translation_status(iommu); - - if (translation_pre_enabled(iommu)) - pr_warn("Translation is already enabled - trying to copy translation structures\n"); + if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { + iommu_disable(iommu); + clear_translation_pre_enabled(iommu); + pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n", + iommu->index); + } + if (amd_iommu_pre_enabled) + amd_iommu_pre_enabled = translation_pre_enabled(iommu); ret = init_iommu_from_acpi(iommu, h); if (ret) @@ -1993,8 +2003,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) } /* - * Init the device table to not allow DMA access for devices and - * suppress all page faults + * Init the device table to not allow DMA access for devices */ static void init_device_table_dma(void) { @@ -2130,14 +2139,48 @@ static void early_enable_iommu(struct amd_iommu *iommu) /* * This function finally enables all IOMMUs found in the system after - * they have been initialized + * they have been initialized. + * + * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy + * the old content of device table entries. Not this case or copy failed, + * just continue as normal kernel does. */ static void early_enable_iommus(void) { struct amd_iommu *iommu; - for_each_iommu(iommu) - early_enable_iommu(iommu); + + if (!copy_device_table()) { + /* + * If come here because of failure in copying device table from old + * kernel with all IOMMUs enabled, print error message and try to + * free allocated old_dev_tbl_cpy. + */ + if (amd_iommu_pre_enabled) + pr_err("Failed to copy DEV table from previous kernel.\n"); + if (old_dev_tbl_cpy != NULL) + free_pages((unsigned long)old_dev_tbl_cpy, + get_order(dev_table_size)); + + for_each_iommu(iommu) { + clear_translation_pre_enabled(iommu); + early_enable_iommu(iommu); + } + } else { + pr_info("Copied DEV table from previous kernel.\n"); + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); + amd_iommu_dev_table = old_dev_tbl_cpy; + for_each_iommu(iommu) { + iommu_disable_command_buffer(iommu); + iommu_disable_event_buffer(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); + iommu_enable_ga(iommu); + iommu_set_device_table(iommu); + iommu_flush_all_caches(iommu); + } + } #ifdef CONFIG_IRQ_REMAP if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) From 53019a9e88cc14bae2780ba807faba87a5829891 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:39 +0800 Subject: [PATCH 40/87] iommu/amd: Do sanity check for address translation and irq remap of old dev table entry Firstly split the dev table entry copy into address translation part and irq remapping part. Because these two parts could be enabled independently. Secondly do sanity check for address translation and irq remap of old dev table entry separately. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 ----- drivers/iommu/amd_iommu_init.c | 23 ++++++++++++++++++++--- drivers/iommu/amd_iommu_types.h | 8 ++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b22b58b33400..dab901b4f0f9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3776,11 +3776,6 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; -#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) -#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) -#define DTE_IRQ_REMAP_ENABLE 1ULL - static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) { u64 dte; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 959c25d997e1..d08ad74b0928 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -852,12 +852,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit) static bool copy_device_table(void) { + u64 int_ctl, int_tab_len, entry, last_entry = 0; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; - u64 entry, last_entry = 0; struct amd_iommu *iommu; - u16 dom_id, dte_v; + u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; if (!amd_iommu_pre_enabled) @@ -901,8 +901,25 @@ static bool copy_device_table(void) old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; - if (dte_v && dom_id) + + if (dte_v && dom_id) { + old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + } + + irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; + int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; + int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK; + if (irq_v && (int_ctl || int_tab_len)) { + if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || + (int_tab_len != DTE_IRQ_TABLE_LEN)) { + pr_err("Wrong old irq remapping flag: %#x\n", devid); + return false; + } + + old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + } } memunmap(old_devtb); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index a7f6cf8c841e..f0979183ec9b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -250,6 +250,14 @@ #define GA_GUEST_NR 0x1 +/* Bit value definition for dte irq remapping fields*/ +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) +#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 From e01d1913b0d0817191418381a6fcebaa01abde2a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:40 +0800 Subject: [PATCH 41/87] iommu: Add is_attach_deferred call-back to iommu-ops This new call-back will be used to check if the domain attach need be deferred for now. If yes, the domain attach/detach will return directly. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++++ include/linux/iommu.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3f6ea160afed..86581b115b92 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1283,6 +1283,10 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { int ret; + if ((domain->ops->is_attach_deferred != NULL) && + domain->ops->is_attach_deferred(domain, dev)) + return 0; + if (unlikely(domain->ops->attach_dev == NULL)) return -ENODEV; @@ -1324,6 +1328,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); static void __iommu_detach_device(struct iommu_domain *domain, struct device *dev) { + if ((domain->ops->is_attach_deferred != NULL) && + domain->ops->is_attach_deferred(domain, dev)) + return; + if (unlikely(domain->ops->detach_dev == NULL)) return; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..63983c9e6c3a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -225,6 +225,7 @@ struct iommu_ops { u32 (*domain_get_windows)(struct iommu_domain *domain); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); unsigned long pgsize_bitmap; }; From df3f7a6e8e855e4ff533508807cd7c3723faa51f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:41 +0800 Subject: [PATCH 42/87] iommu/amd: Use is_attach_deferred call-back Implement call-back is_attach_deferred and use it to defer the domain attach from iommu driver init to device driver init when iommu is pre-enabled in kdump kernel. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index dab901b4f0f9..eebf4590cef9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -121,6 +121,7 @@ struct iommu_dev_data { PPR completions */ u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ + bool defer_attach; struct ratelimit_state rs; /* Ratelimit IOPF messages */ }; @@ -371,12 +372,17 @@ static u16 get_alias(struct device *dev) static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; dev_data = search_dev_data(devid); - if (dev_data == NULL) + if (dev_data == NULL) { dev_data = alloc_dev_data(devid); + if (translation_pre_enabled(iommu)) + dev_data->defer_attach = true; + } + return dev_data; } @@ -2477,11 +2483,18 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) static struct protection_domain *get_domain(struct device *dev) { struct protection_domain *domain; + struct iommu_domain *io_domain; if (!check_device(dev)) return ERR_PTR(-EINVAL); domain = get_dev_data(dev)->domain; + if (domain == NULL && get_dev_data(dev)->defer_attach) { + get_dev_data(dev)->defer_attach = false; + io_domain = iommu_get_domain_for_dev(dev); + domain = to_pdomain(io_domain); + attach_device(dev, domain); + } if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); @@ -3372,6 +3385,13 @@ static void amd_iommu_apply_resv_region(struct device *dev, WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } +static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + return dev_data->defer_attach; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3388,6 +3408,7 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .put_resv_regions = amd_iommu_put_resv_regions, .apply_resv_region = amd_iommu_apply_resv_region, + .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; From b336781b82cc12c7940a2b1c806fbe7f78ecd72a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:42 +0800 Subject: [PATCH 43/87] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled AMD pointed out it's unsafe to update the device-table while iommu is enabled. It turns out that device-table pointer update is split up into two 32bit writes in the IOMMU hardware. So updating it while the IOMMU is enabled could have some nasty side effects. The safe way to work around this is to always allocate the device-table below 4G, including the old device-table in normal kernel and the device-table used for copying the content of the old device-table in kdump kernel. Meanwhile we need check if the address of old device-table is above 4G because it might has been touched accidentally in corrupted 1st kernel. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index d08ad74b0928..c348732f27d7 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -885,11 +885,15 @@ static bool copy_device_table(void) } old_devtb_phys = entry & PAGE_MASK; + if (old_devtb_phys >= 0x100000000ULL) { + pr_err("The address of old device table is above 4G, not trustworthy!/n"); + return false; + } old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); if (!old_devtb) return false; - gfp_flag = GFP_KERNEL | __GFP_ZERO; + gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, get_order(dev_table_size)); if (old_dev_tbl_cpy == NULL) { @@ -2432,7 +2436,8 @@ static int __init early_amd_iommu_init(void) /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + amd_iommu_dev_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO | GFP_DMA32, get_order(dev_table_size)); if (amd_iommu_dev_table == NULL) goto out; From daae2d25a4779b272a66ddd01f5810bcee822b9e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:43 +0800 Subject: [PATCH 44/87] iommu/amd: Don't copy GCR3 table root pointer When iommu is pre_enabled in kdump kernel, if a device is set up with guest translations (DTE.GV=1), then don't copy GCR3 table root pointer but move the device over to an empty guest-cr3 table and handle the faults in the PPR log (which answer them with INVALID). After all these PPR faults are recoverable for the device and we should not allow the device to change old-kernels data when we don't have to. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 28 +++------------------------- drivers/iommu/amd_iommu_init.c | 12 ++++++++++++ drivers/iommu/amd_iommu_proto.h | 1 + drivers/iommu/amd_iommu_types.h | 24 ++++++++++++++++++++++++ drivers/iommu/amd_iommu_v2.c | 18 +++++++++++++++++- 5 files changed, 57 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index eebf4590cef9..9e8ea1907796 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -102,30 +102,6 @@ int amd_iommu_max_glx_val = -1; static const struct dma_map_ops amd_iommu_dma_ops; -/* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct list_head dev_data_list; /* For global dev_data_list */ - struct protection_domain *domain; /* Domain the device is bound to */ - u16 devid; /* PCI Device ID */ - u16 alias; /* Alias Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Device is identity mapped */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for - PPR completions */ - u32 errata; /* Bitmap for errata to apply */ - bool use_vapic; /* Enable device to use vapic mode */ - bool defer_attach; - - struct ratelimit_state rs; /* Ratelimit IOPF messages */ -}; - /* * general struct to manage commands send to an IOMMU */ @@ -386,10 +362,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -static struct iommu_dev_data *get_dev_data(struct device *dev) +struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; } +EXPORT_SYMBOL(get_dev_data); /* * Find or create an IOMMU group for a acpihid device. @@ -2540,6 +2517,7 @@ static int dir2prot(enum dma_data_direction direction) else return 0; } + /* * This function contains common code for mapping of a physically * contiguous memory region into DMA address space. It is used by all diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index c348732f27d7..88e7a6e950ae 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -214,6 +214,7 @@ u16 *amd_iommu_alias_table; * for a specific device. It is also indexed by the PCI device id. */ struct amd_iommu **amd_iommu_rlookup_table; +EXPORT_SYMBOL(amd_iommu_rlookup_table); /* * This table is used to find the irq remapping table for a given device id @@ -269,6 +270,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); } +EXPORT_SYMBOL(translation_pre_enabled); static void clear_translation_pre_enabled(struct amd_iommu *iommu) { @@ -859,6 +861,7 @@ static bool copy_device_table(void) struct amd_iommu *iommu; u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; + u64 tmp; if (!amd_iommu_pre_enabled) return false; @@ -910,6 +913,15 @@ static bool copy_device_table(void) old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + /* If gcr3 table existed, mask it out */ + if (old_devtb[devid].data[0] & DTE_FLAG_GV) { + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; + tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + old_dev_tbl_cpy[devid].data[1] &= ~tmp; + tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; + tmp |= DTE_FLAG_GV; + old_dev_tbl_cpy[devid].data[0] &= ~tmp; + } } irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index a9666d2005bb..90e62e9b01c5 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -88,4 +88,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) } extern bool translation_pre_enabled(struct amd_iommu *iommu); +extern struct iommu_dev_data *get_dev_data(struct device *dev); #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f0979183ec9b..9e5af13be7c5 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -618,6 +618,30 @@ struct devid_map { bool cmd_line; }; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct protection_domain *domain; /* Domain the device is bound to */ + u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Device is identity mapped */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ + bool use_vapic; /* Enable device to use vapic mode */ + bool defer_attach; + + struct ratelimit_state rs; /* Ratelimit IOPF messages */ +}; + /* Map HPET and IOAPIC ids to the devid used by the IOMMU */ extern struct list_head ioapic_map; extern struct list_head hpet_map; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 6629c472eafd..e705fac89cb4 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -562,14 +562,30 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag; + u16 tag, devid; int ret; + struct iommu_dev_data *dev_data; + struct pci_dev *pdev = NULL; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; + devid = iommu_fault->device_id; + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (!pdev) + return -ENODEV; + dev_data = get_dev_data(&pdev->dev); + + /* In kdump kernel pci dev is not initialized yet -> send INVALID */ ret = NOTIFY_DONE; + if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) + && dev_data->defer_attach) { + amd_iommu_complete_ppr(pdev, iommu_fault->pasid, + PPR_INVALID, tag); + goto out; + } + dev_state = get_device_state(iommu_fault->device_id); if (dev_state == NULL) goto out; From 20b46dff13bd1d250dc968b318f0fac3c996b3fa Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:44 +0800 Subject: [PATCH 45/87] iommu/amd: Disable iommu only if amd_iommu=off is specified It's ok to disable iommu early in normal kernel or in kdump kernel when amd_iommu=off is specified. While we should not disable it in kdump kernel when on-flight dma is still on-going. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 88e7a6e950ae..c7d03251c80a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2499,7 +2499,8 @@ static int __init early_amd_iommu_init(void) goto out; /* Disable any previously enabled IOMMUs */ - disable_iommus(); + if (!is_kdump_kernel() || amd_iommu_disabled) + disable_iommus(); if (amd_iommu_irq_remap) amd_iommu_irq_remap = check_ioapic_information(); From f42c22351455bcc8b9eecea19b48bdb499a50a65 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 27 Apr 2017 14:44:06 +0200 Subject: [PATCH 46/87] iommu/s390: Add support for iommu_device handling Add support for the iommu_device_register interface to make the s390 hardware iommus visible to the iommu core and in sysfs. Acked-by: Sebastian Ott Signed-off-by: Joerg Roedel --- arch/s390/include/asm/pci.h | 7 +++++++ arch/s390/pci/pci.c | 9 ++++++++- drivers/iommu/s390-iommu.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index f36b4b726057..386df9adef0a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -122,6 +123,8 @@ struct zpci_dev { unsigned long iommu_pages; unsigned int next_bit; + struct iommu_device iommu_dev; /* IOMMU core handle */ + char res_name[16]; struct zpci_bar_struct bars[PCI_BAR_COUNT]; @@ -174,6 +177,10 @@ int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); +/* IOMMU Interface */ +int zpci_init_iommu(struct zpci_dev *zdev); +void zpci_destroy_iommu(struct zpci_dev *zdev); + #ifdef CONFIG_PCI /* Error handling and recovery */ void zpci_event_error(void *); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 7b30af5da222..001ca80fa2fe 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -776,6 +776,7 @@ void pcibios_remove_bus(struct pci_bus *bus) zpci_exit_slot(zdev); zpci_cleanup_bus_resources(zdev); + zpci_destroy_iommu(zdev); zpci_free_domain(zdev); spin_lock(&zpci_list_lock); @@ -848,11 +849,15 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out; + rc = zpci_init_iommu(zdev); + if (rc) + goto out_free; + mutex_init(&zdev->lock); if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { rc = zpci_enable_device(zdev); if (rc) - goto out_free; + goto out_destroy_iommu; } rc = zpci_scan_bus(zdev); if (rc) @@ -869,6 +874,8 @@ int zpci_create_device(struct zpci_dev *zdev) out_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); +out_destroy_iommu: + zpci_destroy_iommu(zdev); out_free: zpci_free_domain(zdev); out: diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 8788640756a7..85f3bc52efc2 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -18,6 +18,8 @@ */ #define S390_IOMMU_PGSIZES (~0xFFFUL) +static struct iommu_ops s390_iommu_ops; + struct s390_domain { struct iommu_domain domain; struct list_head devices; @@ -166,11 +168,13 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, static int s390_iommu_add_device(struct device *dev) { struct iommu_group *group = iommu_group_get_for_dev(dev); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; if (IS_ERR(group)) return PTR_ERR(group); iommu_group_put(group); + iommu_device_link(&zdev->iommu_dev, dev); return 0; } @@ -197,6 +201,7 @@ static void s390_iommu_remove_device(struct device *dev) s390_iommu_detach_device(domain, dev); } + iommu_device_unlink(&zdev->iommu_dev, dev); iommu_group_remove_device(dev); } @@ -327,6 +332,36 @@ static size_t s390_iommu_unmap(struct iommu_domain *domain, return size; } +int zpci_init_iommu(struct zpci_dev *zdev) +{ + int rc = 0; + + rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, + "s390-iommu.%08x", zdev->fid); + if (rc) + goto out_err; + + iommu_device_set_ops(&zdev->iommu_dev, &s390_iommu_ops); + + rc = iommu_device_register(&zdev->iommu_dev); + if (rc) + goto out_sysfs; + + return 0; + +out_sysfs: + iommu_device_sysfs_remove(&zdev->iommu_dev); + +out_err: + return rc; +} + +void zpci_destroy_iommu(struct zpci_dev *zdev) +{ + iommu_device_unregister(&zdev->iommu_dev); + iommu_device_sysfs_remove(&zdev->iommu_dev); +} + static struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, From da4b02750a9fe1d1c4d047d14e69ec7542dddeb3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Aug 2017 17:29:06 +0100 Subject: [PATCH 47/87] iommu/of: Fix of_iommu_configure() for disabled IOMMUs Sudeep reports that the logic got slightly broken when a PCI iommu-map entry targets an IOMMU marked as disabled in DT, since of_pci_map_rid() succeeds in following a phandle, and of_iommu_xlate() doesn't return an error value, but we miss checking whether ops was actually non-NULL. Whilst this could be solved with a point fix in of_pci_iommu_init(), it suggests that all the juggling of ERR_PTR values through the ops pointer is proving rather too complicated for its own good, so let's instead simplify the whole flow (with a side-effect of eliminating the cause of the bug). The fact that we now rely on iommu_fwspec means that we no longer need to pass around an iommu_ops pointer at all - we can simply propagate a regular int return value until we know whether we have a viable IOMMU, then retrieve the ops from the fwspec if and when we actually need them. This makes everything a bit more uniform and certainly easier to follow. Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly") Reported-by: Sudeep Holla Tested-by: Sudeep Holla Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 59 ++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 34160e7a8dd7..e60e3dba85a0 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -25,6 +25,8 @@ #include #include +#define NO_IOMMU 1 + static const struct of_device_id __iommu_of_table_sentinel __used __section(__iommu_of_table_end); @@ -109,8 +111,8 @@ static bool of_iommu_driver_present(struct device_node *np) return of_match_node(&__iommu_of_table, np); } -static const struct iommu_ops -*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) +static int of_iommu_xlate(struct device *dev, + struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; @@ -120,24 +122,20 @@ static const struct iommu_ops if ((ops && !ops->of_xlate) || !of_device_is_available(iommu_spec->np) || (!ops && !of_iommu_driver_present(iommu_spec->np))) - return NULL; + return NO_IOMMU; err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); if (err) - return ERR_PTR(err); + return err; /* * The otherwise-empty fwspec handily serves to indicate the specific * IOMMU device we're waiting for, which will be useful if we ever get * a proper probe-ordering dependency mechanism in future. */ if (!ops) - return ERR_PTR(-EPROBE_DEFER); + return -EPROBE_DEFER; - err = ops->of_xlate(dev, iommu_spec); - if (err) - return ERR_PTR(err); - - return ops; + return ops->of_xlate(dev, iommu_spec); } struct of_pci_iommu_alias_info { @@ -148,7 +146,6 @@ struct of_pci_iommu_alias_info { static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) { struct of_pci_iommu_alias_info *info = data; - const struct iommu_ops *ops; struct of_phandle_args iommu_spec = { .args_count = 1 }; int err; @@ -156,13 +153,12 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) "iommu-map-mask", &iommu_spec.np, iommu_spec.args); if (err) - return err == -ENODEV ? 1 : err; + return err == -ENODEV ? NO_IOMMU : err; - ops = of_iommu_xlate(info->dev, &iommu_spec); + err = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - - if (IS_ERR(ops)) - return PTR_ERR(ops); + if (err) + return err; return info->np == pdev->bus->dev.of_node; } @@ -172,7 +168,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, { const struct iommu_ops *ops = NULL; struct iommu_fwspec *fwspec = dev->iommu_fwspec; - int err; + int err = NO_IOMMU; if (!master_np) return NULL; @@ -198,10 +194,6 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); - if (err) /* err > 0 means the walk stopped, but non-fatally */ - ops = ERR_PTR(min(err, 0)); - else /* success implies both fwspec and ops are now valid */ - ops = dev->iommu_fwspec->ops; } else { struct of_phandle_args iommu_spec; int idx = 0; @@ -209,27 +201,34 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, while (!of_parse_phandle_with_args(master_np, "iommus", "#iommu-cells", idx, &iommu_spec)) { - ops = of_iommu_xlate(dev, &iommu_spec); + err = of_iommu_xlate(dev, &iommu_spec); of_node_put(iommu_spec.np); idx++; - if (IS_ERR_OR_NULL(ops)) + if (err) break; } } + + /* + * Two success conditions can be represented by non-negative err here: + * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons + * 0 : we found an IOMMU, and dev->fwspec is initialised appropriately + * <0 : any actual error + */ + if (!err) + ops = dev->iommu_fwspec->ops; /* * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ - if (!IS_ERR_OR_NULL(ops) && ops->add_device && - dev->bus && !dev->iommu_group) { + if (ops && ops->add_device && dev->bus && !dev->iommu_group) err = ops->add_device(dev); - if (err) - ops = ERR_PTR(err); - } /* Ignore all other errors apart from EPROBE_DEFER */ - if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) { - dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops)); + if (err == -EPROBE_DEFER) { + ops = ERR_PTR(err); + } else if (err < 0) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); ops = NULL; } From 42f87e71c3df12d8f29ec1bb7b47772ffaeaf1ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 14:44:28 +0200 Subject: [PATCH 48/87] iommu/iova: Add flush-queue data structures This patch adds the basic data-structures to implement flush-queues in the generic IOVA code. It also adds the initialization and destroy routines for these data structures. The initialization routine is designed so that the use of this feature is optional for the users of IOVA code. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 246f14c83944..b9f6ce02a1e1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -50,10 +50,48 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->flush_cb = NULL; + iovad->fq = NULL; init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); +static void free_iova_flush_queue(struct iova_domain *iovad) +{ + if (!iovad->fq) + return; + + free_percpu(iovad->fq); + + iovad->fq = NULL; + iovad->flush_cb = NULL; + iovad->entry_dtor = NULL; +} + +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) +{ + int cpu; + + iovad->fq = alloc_percpu(struct iova_fq); + if (!iovad->fq) + return -ENOMEM; + + iovad->flush_cb = flush_cb; + iovad->entry_dtor = entry_dtor; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + fq->head = 0; + fq->tail = 0; + } + + return 0; +} +EXPORT_SYMBOL_GPL(init_iova_flush_queue); + static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { @@ -433,6 +471,7 @@ void put_iova_domain(struct iova_domain *iovad) struct rb_node *node; unsigned long flags; + free_iova_flush_queue(iovad); free_iova_rcaches(iovad); spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); node = rb_first(&iovad->rbroot); diff --git a/include/linux/iova.h b/include/linux/iova.h index e0a892ae45c0..8aa10896150e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -36,6 +36,30 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; +struct iova_domain; + +/* Call-Back from IOVA code into IOMMU drivers */ +typedef void (* iova_flush_cb)(struct iova_domain *domain); + +/* Destructor for per-entry data */ +typedef void (* iova_entry_dtor)(unsigned long data); + +/* Number of entries per Flush Queue */ +#define IOVA_FQ_SIZE 256 + +/* Flush Queue entry for defered flushing */ +struct iova_fq_entry { + unsigned long iova_pfn; + unsigned long pages; + unsigned long data; +}; + +/* Per-CPU Flush Queue structure */ +struct iova_fq { + struct iova_fq_entry entries[IOVA_FQ_SIZE]; + unsigned head, tail; +}; + /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -45,6 +69,14 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + + iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU + TLBs */ + + iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for + iova entry */ + + struct iova_fq __percpu *fq; /* Flush Queue */ }; static inline unsigned long iova_size(struct iova *iova) @@ -102,6 +134,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit); +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, @@ -174,6 +208,13 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +static inline int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, + iova_entry_dtor entry_dtor) +{ + return -ENODEV; +} + static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { From 1928210107edd4fa786199fef6b875d3af3bef88 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 15:49:44 +0200 Subject: [PATCH 49/87] iommu/iova: Implement Flush-Queue ring buffer Add a function to add entries to the Flush-Queue ring buffer. If the buffer is full, call the flush-callback and free the entries. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/iova.h | 9 +++++ 2 files changed, 89 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b9f6ce02a1e1..e5c9a7ae6088 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -32,6 +32,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long limit_pfn); static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); +static void fq_destroy_all_entries(struct iova_domain *iovad); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -61,6 +62,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!iovad->fq) return; + fq_destroy_all_entries(iovad); free_percpu(iovad->fq); iovad->fq = NULL; @@ -461,6 +463,84 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +#define fq_ring_for_each(i, fq) \ + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + +static inline bool fq_full(struct iova_fq *fq) +{ + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); +} + +static inline unsigned fq_ring_add(struct iova_fq *fq) +{ + unsigned idx = fq->tail; + + fq->tail = (idx + 1) % IOVA_FQ_SIZE; + + return idx; +} + +static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) +{ + unsigned idx; + + fq_ring_for_each(idx, fq) { + + if (iovad->entry_dtor) + iovad->entry_dtor(fq->entries[idx].data); + + free_iova_fast(iovad, + fq->entries[idx].iova_pfn, + fq->entries[idx].pages); + } + + fq->head = 0; + fq->tail = 0; +} + +static void fq_destroy_all_entries(struct iova_domain *iovad) +{ + int cpu; + + /* + * This code runs when the iova_domain is being detroyed, so don't + * bother to free iovas, just call the entry_dtor on all remaining + * entries. + */ + if (!iovad->entry_dtor) + return; + + for_each_possible_cpu(cpu) { + struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); + int idx; + + fq_ring_for_each(idx, fq) + iovad->entry_dtor(fq->entries[idx].data); + } +} + +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ + struct iova_fq *fq = get_cpu_ptr(iovad->fq); + unsigned idx; + + if (fq_full(fq)) { + iovad->flush_cb(iovad); + fq_ring_free(iovad, fq); + } + + idx = fq_ring_add(fq); + + fq->entries[idx].iova_pfn = pfn; + fq->entries[idx].pages = pages; + fq->entries[idx].data = data; + + put_cpu_ptr(iovad->fq); +} +EXPORT_SYMBOL_GPL(queue_iova); + /** * put_iova_domain - destroys the iova doamin * @iovad: - iova domain in question. diff --git a/include/linux/iova.h b/include/linux/iova.h index 8aa10896150e..1ae85248ec50 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -127,6 +127,9 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, @@ -182,6 +185,12 @@ static inline void free_iova_fast(struct iova_domain *iovad, { } +static inline void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ +} + static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn) From fb418dab8a4f01dde0c025d15145c589ec02796b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:14:59 +0200 Subject: [PATCH 50/87] iommu/iova: Add flush counters to Flush-Queue implementation There are two counters: * fq_flush_start_cnt - Increased when a TLB flush is started. * fq_flush_finish_cnt - Increased when a TLB flush is finished. The fq_flush_start_cnt is assigned to every Flush-Queue entry on its creation. When freeing entries from the Flush-Queue, the value in the entry is compared to the fq_flush_finish_cnt. The entry can only be freed when its value is less than the value of fq_flush_finish_cnt. The reason for these counters it to take advantage of IOMMU TLB flushes that happened on other CPUs. These already flushed the TLB for Flush-Queue entries on other CPUs so that they can already be freed without flushing the TLB again. This makes it less likely that the Flush-Queue is full and saves IOMMU TLB flushes. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 29 +++++++++++++++++++++++++---- include/linux/iova.h | 8 ++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index e5c9a7ae6088..47b144e417ad 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -75,6 +75,9 @@ int init_iova_flush_queue(struct iova_domain *iovad, { int cpu; + atomic64_set(&iovad->fq_flush_start_cnt, 0); + atomic64_set(&iovad->fq_flush_finish_cnt, 0); + iovad->fq = alloc_percpu(struct iova_fq); if (!iovad->fq) return -ENOMEM; @@ -482,20 +485,30 @@ static inline unsigned fq_ring_add(struct iova_fq *fq) static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) { + u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); unsigned idx; fq_ring_for_each(idx, fq) { + if (fq->entries[idx].counter >= counter) + break; + if (iovad->entry_dtor) iovad->entry_dtor(fq->entries[idx].data); free_iova_fast(iovad, fq->entries[idx].iova_pfn, fq->entries[idx].pages); - } - fq->head = 0; - fq->tail = 0; + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + } +} + +static void iova_domain_flush(struct iova_domain *iovad) +{ + atomic64_inc(&iovad->fq_flush_start_cnt); + iovad->flush_cb(iovad); + atomic64_inc(&iovad->fq_flush_finish_cnt); } static void fq_destroy_all_entries(struct iova_domain *iovad) @@ -526,8 +539,15 @@ void queue_iova(struct iova_domain *iovad, struct iova_fq *fq = get_cpu_ptr(iovad->fq); unsigned idx; + /* + * First remove all entries from the flush queue that have already been + * flushed out on another CPU. This makes the fq_full() check below less + * likely to be true. + */ + fq_ring_free(iovad, fq); + if (fq_full(fq)) { - iovad->flush_cb(iovad); + iova_domain_flush(iovad); fq_ring_free(iovad, fq); } @@ -536,6 +556,7 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].iova_pfn = pfn; fq->entries[idx].pages = pages; fq->entries[idx].data = data; + fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); put_cpu_ptr(iovad->fq); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 1ae85248ec50..985b8008999e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* iova structure */ @@ -52,6 +53,7 @@ struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; unsigned long data; + u64 counter; /* Flush counter when this entrie was added */ }; /* Per-CPU Flush Queue structure */ @@ -77,6 +79,12 @@ struct iova_domain { iova entry */ struct iova_fq __percpu *fq; /* Flush Queue */ + + atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that + have been started */ + + atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that + have been finished */ }; static inline unsigned long iova_size(struct iova *iova) From 8109c2a2f8463852dddd6a1c3fcf262047c0c124 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:31:17 +0200 Subject: [PATCH 51/87] iommu/iova: Add locking to Flush-Queues The lock is taken from the same CPU most of the time. But having it allows to flush the queue also from another CPU if necessary. This will be used by a timer to regularily flush any pending IOVAs from the Flush-Queues. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 11 +++++++++++ include/linux/iova.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 47b144e417ad..749d39533e0b 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -91,6 +91,8 @@ int init_iova_flush_queue(struct iova_domain *iovad, fq = per_cpu_ptr(iovad->fq, cpu); fq->head = 0; fq->tail = 0; + + spin_lock_init(&fq->lock); } return 0; @@ -471,6 +473,7 @@ EXPORT_SYMBOL_GPL(free_iova_fast); static inline bool fq_full(struct iova_fq *fq) { + assert_spin_locked(&fq->lock); return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); } @@ -478,6 +481,8 @@ static inline unsigned fq_ring_add(struct iova_fq *fq) { unsigned idx = fq->tail; + assert_spin_locked(&fq->lock); + fq->tail = (idx + 1) % IOVA_FQ_SIZE; return idx; @@ -488,6 +493,8 @@ static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); unsigned idx; + assert_spin_locked(&fq->lock); + fq_ring_for_each(idx, fq) { if (fq->entries[idx].counter >= counter) @@ -537,8 +544,11 @@ void queue_iova(struct iova_domain *iovad, unsigned long data) { struct iova_fq *fq = get_cpu_ptr(iovad->fq); + unsigned long flags; unsigned idx; + spin_lock_irqsave(&fq->lock, flags); + /* * First remove all entries from the flush queue that have already been * flushed out on another CPU. This makes the fq_full() check below less @@ -558,6 +568,7 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].data = data; fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); + spin_unlock_irqrestore(&fq->lock, flags); put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova); diff --git a/include/linux/iova.h b/include/linux/iova.h index 985b8008999e..913a690cd4b0 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -60,6 +60,7 @@ struct iova_fq_entry { struct iova_fq { struct iova_fq_entry entries[IOVA_FQ_SIZE]; unsigned head, tail; + spinlock_t lock; }; /* holds all the iova translations for a domain */ From 9a005a800ae817c2c90ef117d7cd77614d866777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:58:18 +0200 Subject: [PATCH 52/87] iommu/iova: Add flush timer Add a timer to flush entries from the Flush-Queues every 10ms. This makes sure that no stale TLB entries remain for too long after an IOVA has been unmapped. Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 32 ++++++++++++++++++++++++++++++++ include/linux/iova.h | 8 ++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 749d39533e0b..33edfa794ae9 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -33,6 +33,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); static void fq_destroy_all_entries(struct iova_domain *iovad); +static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, @@ -62,7 +63,11 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!iovad->fq) return; + if (timer_pending(&iovad->fq_timer)) + del_timer(&iovad->fq_timer); + fq_destroy_all_entries(iovad); + free_percpu(iovad->fq); iovad->fq = NULL; @@ -95,6 +100,9 @@ int init_iova_flush_queue(struct iova_domain *iovad, spin_lock_init(&fq->lock); } + setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad); + atomic_set(&iovad->fq_timer_on, 0); + return 0; } EXPORT_SYMBOL_GPL(init_iova_flush_queue); @@ -539,6 +547,25 @@ static void fq_destroy_all_entries(struct iova_domain *iovad) } } +static void fq_flush_timeout(unsigned long data) +{ + struct iova_domain *iovad = (struct iova_domain *)data; + int cpu; + + atomic_set(&iovad->fq_timer_on, 0); + iova_domain_flush(iovad); + + for_each_possible_cpu(cpu) { + unsigned long flags; + struct iova_fq *fq; + + fq = per_cpu_ptr(iovad->fq, cpu); + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free(iovad, fq); + spin_unlock_irqrestore(&fq->lock, flags); + } +} + void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data) @@ -569,6 +596,11 @@ void queue_iova(struct iova_domain *iovad, fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); spin_unlock_irqrestore(&fq->lock, flags); + + if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) + mod_timer(&iovad->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); + put_cpu_ptr(iovad->fq); } EXPORT_SYMBOL_GPL(queue_iova); diff --git a/include/linux/iova.h b/include/linux/iova.h index 913a690cd4b0..d179b9bf7814 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -48,6 +48,9 @@ typedef void (* iova_entry_dtor)(unsigned long data); /* Number of entries per Flush Queue */ #define IOVA_FQ_SIZE 256 +/* Timeout (in ms) after which entries are flushed from the Flush-Queue */ +#define IOVA_FQ_TIMEOUT 10 + /* Flush Queue entry for defered flushing */ struct iova_fq_entry { unsigned long iova_pfn; @@ -86,6 +89,11 @@ struct iova_domain { atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that have been finished */ + + struct timer_list fq_timer; /* Timer to regularily empty the + flush-queues */ + atomic_t fq_timer_on; /* 1 when timer is active, 0 + when not */ }; static inline unsigned long iova_size(struct iova *iova) From 9003d6186321e22b19125721b6fb2aa390ff8be6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 17:19:13 +0200 Subject: [PATCH 53/87] iommu/amd: Make use of iova queue flushing Rip out the implementation in the AMD IOMMU driver and use the one in the common iova code instead. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 229 ++------------------------------------ 1 file changed, 9 insertions(+), 220 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 688e77576e5a..cabcaa506ed6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); - -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - u64 counter; /* Flush counter when this entry was added to the queue */ -}; - -struct flush_queue { - struct flush_queue_entry *entries; - unsigned head, tail; - spinlock_t lock; -}; +static void iova_domain_flush_tlb(struct iova_domain *iovad); /* * Data container for a dma_ops specific protection domain @@ -161,36 +148,6 @@ struct dma_ops_domain { /* IOVA RB-Tree */ struct iova_domain iovad; - - struct flush_queue __percpu *flush_queue; - - /* - * We need two counter here to be race-free wrt. IOTLB flushing and - * adding entries to the flush queue. - * - * The flush_start_cnt is incremented _before_ the IOTLB flush starts. - * New entries added to the flush ring-buffer get their 'counter' value - * from here. This way we can make sure that entries added to the queue - * (or other per-cpu queues of the same domain) while the TLB is about - * to be flushed are not considered to be flushed already. - */ - atomic64_t flush_start_cnt; - - /* - * The flush_finish_cnt is incremented when an IOTLB flush is complete. - * This value is always smaller than flush_start_cnt. The queue_add - * function frees all IOVAs that have a counter value smaller than - * flush_finish_cnt. This makes sure that we only free IOVAs that are - * flushed out of the IOTLB of the domain. - */ - atomic64_t flush_finish_cnt; - - /* - * Timer to make sure we don't keep IOVAs around unflushed - * for too long - */ - struct timer_list flush_timer; - atomic_t flush_timer_on; }; static struct iova_domain reserved_iova_ranges; @@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - kfree(queue->entries); - } - - free_percpu(dom->flush_queue); - - dom->flush_queue = NULL; -} - -static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - atomic64_set(&dom->flush_start_cnt, 0); - atomic64_set(&dom->flush_finish_cnt, 0); - - dom->flush_queue = alloc_percpu(struct flush_queue); - if (!dom->flush_queue) - return -ENOMEM; - - /* First make sure everything is cleared */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->head = 0; - queue->tail = 0; - queue->entries = NULL; - } - - /* Now start doing the allocation */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) { - dma_ops_domain_free_flush_queue(dom); - return -ENOMEM; - } - - spin_lock_init(&queue->lock); - } - - return 0; -} - static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { - atomic64_inc(&dom->flush_start_cnt); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); - atomic64_inc(&dom->flush_finish_cnt); } -static inline bool queue_ring_full(struct flush_queue *queue) +static void iova_domain_flush_tlb(struct iova_domain *iovad) { - assert_spin_locked(&queue->lock); + struct dma_ops_domain *dom; - return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); -} - -#define queue_ring_for_each(i, q) \ - for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) - -static inline unsigned queue_ring_add(struct flush_queue *queue) -{ - unsigned idx = queue->tail; - - assert_spin_locked(&queue->lock); - queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; - - return idx; -} - -static inline void queue_ring_remove_head(struct flush_queue *queue) -{ - assert_spin_locked(&queue->lock); - queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; -} - -static void queue_ring_free_flushed(struct dma_ops_domain *dom, - struct flush_queue *queue) -{ - u64 counter = atomic64_read(&dom->flush_finish_cnt); - int idx; - - queue_ring_for_each(idx, queue) { - /* - * This assumes that counter values in the ring-buffer are - * monotonously rising. - */ - if (queue->entries[idx].counter >= counter) - break; - - free_iova_fast(&dom->iovad, - queue->entries[idx].iova_pfn, - queue->entries[idx].pages); - - queue_ring_remove_head(queue); - } -} - -static void queue_add(struct dma_ops_domain *dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(dom->flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - /* - * First remove the enries from the ring-buffer that are already - * flushed to make the below queue_ring_full() check less likely - */ - queue_ring_free_flushed(dom, queue); - - /* - * When ring-queue is full, flush the entries from the IOTLB so - * that we can free all entries with queue_ring_free_flushed() - * below. - */ - if (queue_ring_full(queue)) { - dma_ops_domain_flush_tlb(dom); - queue_ring_free_flushed(dom, queue); - } - - idx = queue_ring_add(queue); - - queue->entries[idx].iova_pfn = address; - queue->entries[idx].pages = pages; - queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) - mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(dom->flush_queue); -} - -static void queue_flush_timeout(unsigned long data) -{ - struct dma_ops_domain *dom = (struct dma_ops_domain *)data; - int cpu; - - atomic_set(&dom->flush_timer_on, 0); + dom = container_of(iovad, struct dma_ops_domain, iovad); dma_ops_domain_flush_tlb(dom); - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - queue_ring_free_flushed(dom, queue); - spin_unlock_irqrestore(&queue->lock, flags); - } } /* @@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); - if (timer_pending(&dom->flush_timer)) - del_timer(&dom->flush_timer); - - dma_ops_domain_free_flush_queue(dom); - put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); - /* Initialize reserved ranges */ - copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); - - if (dma_ops_domain_alloc_flush_queue(dma_dom)) + if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; - setup_timer(&dma_dom->flush_timer, queue_flush_timeout, - (unsigned long)dma_dom); - - atomic_set(&dma_dom->flush_timer_on, 0); + /* Initialize reserved ranges */ + copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); add_domain_to_list(&dma_dom->domain); @@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - queue_add(dma_dom, dma_addr, pages); + pages = __roundup_pow_of_two(pages); + queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); } } From c8acb28b331364b32a5c81dbfbdfc8475b2f1f27 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Aug 2017 11:42:46 +0200 Subject: [PATCH 54/87] iommu/vt-d: Allow to flush more than 4GB of device TLBs The shift qi_flush_dev_iotlb() is done on an int, which limits the mask to 32 bits. Make the mask 64 bits wide so that more than 4GB of address range can be flushed at once. Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c8b0329c85d2..ca5ebaeafd6a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, if (mask) { BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); - addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1; + addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else desc.high = QI_DEV_IOTLB_ADDR(addr); From 13cf01744608e1dc3f13dd316c95cb7a1fdaf740 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Aug 2017 11:40:10 +0200 Subject: [PATCH 55/87] iommu/vt-d: Make use of iova deferred flushing Remove the deferred flushing implementation in the Intel VT-d driver and use the one from the common iova code instead. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 197 +++++++----------------------------- 1 file changed, 38 insertions(+), 159 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 687f18f65cea..d5e8b8628a1a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units); #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) -static void flush_unmaps_timeout(unsigned long data); - -struct deferred_flush_entry { - unsigned long iova_pfn; - unsigned long nrpages; - struct dmar_domain *domain; - struct page *freelist; -}; - -#define HIGH_WATER_MARK 250 -struct deferred_flush_table { - int next; - struct deferred_flush_entry entries[HIGH_WATER_MARK]; -}; - -struct deferred_flush_data { - spinlock_t lock; - int timer_on; - struct timer_list timer; - long size; - struct deferred_flush_table *tables; -}; - -static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); - /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; @@ -1309,6 +1284,13 @@ static void dma_free_pagelist(struct page *freelist) } } +static void iova_entry_free(unsigned long data) +{ + struct page *freelist = (struct page *)data; + + dma_free_pagelist(freelist); +} + /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1622,6 +1604,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, addr, mask); } +static void iommu_flush_iova(struct iova_domain *iovad) +{ + struct dmar_domain *domain; + int idx; + + domain = container_of(iovad, struct dmar_domain, iovad); + + for_each_domain_iommu(idx, domain) { + struct intel_iommu *iommu = g_iommus[idx]; + u16 did = domain->iommu_did[iommu->seq_id]; + + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + + if (!cap_caching_mode(iommu->cap)) + iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), + 0, MAX_AGAW_PFN_WIDTH); + } +} + static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) { u32 pmen; @@ -1932,9 +1933,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, { int adjust_width, agaw; unsigned long sagaw; + int err; init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); + + err = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (err) + return err; + domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -1986,14 +1994,6 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; - /* Flush any lazy unmaps that may reference this domain */ - if (!intel_iommu_strict) { - int cpu; - - for_each_possible_cpu(cpu) - flush_unmaps_timeout(cpu); - } - /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); domain_remove_dev_info(domain); @@ -3206,7 +3206,7 @@ static int __init init_dmars(void) bool copied_tables = false; struct device *dev; struct intel_iommu *iommu; - int i, ret, cpu; + int i, ret; /* * for each drhd @@ -3239,22 +3239,6 @@ static int __init init_dmars(void) goto error; } - for_each_possible_cpu(cpu) { - struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush, - cpu); - - dfd->tables = kzalloc(g_num_of_iommus * - sizeof(struct deferred_flush_table), - GFP_KERNEL); - if (!dfd->tables) { - ret = -ENOMEM; - goto free_g_iommus; - } - - spin_lock_init(&dfd->lock); - setup_timer(&dfd->timer, flush_unmaps_timeout, cpu); - } - for_each_active_iommu(iommu, drhd) { g_iommus[iommu->seq_id] = iommu; @@ -3437,10 +3421,9 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } -free_g_iommus: - for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(&deferred_flush, cpu)->tables); + kfree(g_iommus); + error: return ret; } @@ -3645,110 +3628,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, dir, *dev->dma_mask); } -static void flush_unmaps(struct deferred_flush_data *flush_data) -{ - int i, j; - - flush_data->timer_on = 0; - - /* just flush them all */ - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct deferred_flush_table *flush_table = - &flush_data->tables[i]; - if (!iommu) - continue; - - if (!flush_table->next) - continue; - - /* In caching mode, global flushes turn emulation expensive */ - if (!cap_caching_mode(iommu->cap)) - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - for (j = 0; j < flush_table->next; j++) { - unsigned long mask; - struct deferred_flush_entry *entry = - &flush_table->entries[j]; - unsigned long iova_pfn = entry->iova_pfn; - unsigned long nrpages = entry->nrpages; - struct dmar_domain *domain = entry->domain; - struct page *freelist = entry->freelist; - - /* On real hardware multiple invalidations are expensive */ - if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain, - mm_to_dma_pfn(iova_pfn), - nrpages, !freelist, 0); - else { - mask = ilog2(nrpages); - iommu_flush_dev_iotlb(domain, - (uint64_t)iova_pfn << PAGE_SHIFT, mask); - } - free_iova_fast(&domain->iovad, iova_pfn, nrpages); - if (freelist) - dma_free_pagelist(freelist); - } - flush_table->next = 0; - } - - flush_data->size = 0; -} - -static void flush_unmaps_timeout(unsigned long cpuid) -{ - struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid); - unsigned long flags; - - spin_lock_irqsave(&flush_data->lock, flags); - flush_unmaps(flush_data); - spin_unlock_irqrestore(&flush_data->lock, flags); -} - -static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, - unsigned long nrpages, struct page *freelist) -{ - unsigned long flags; - int entry_id, iommu_id; - struct intel_iommu *iommu; - struct deferred_flush_entry *entry; - struct deferred_flush_data *flush_data; - - flush_data = raw_cpu_ptr(&deferred_flush); - - /* Flush all CPUs' entries to avoid deferring too much. If - * this becomes a bottleneck, can just flush us, and rely on - * flush timer for the rest. - */ - if (flush_data->size == HIGH_WATER_MARK) { - int cpu; - - for_each_online_cpu(cpu) - flush_unmaps_timeout(cpu); - } - - spin_lock_irqsave(&flush_data->lock, flags); - - iommu = domain_get_iommu(dom); - iommu_id = iommu->seq_id; - - entry_id = flush_data->tables[iommu_id].next; - ++(flush_data->tables[iommu_id].next); - - entry = &flush_data->tables[iommu_id].entries[entry_id]; - entry->domain = dom; - entry->iova_pfn = iova_pfn; - entry->nrpages = nrpages; - entry->freelist = freelist; - - if (!flush_data->timer_on) { - mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10)); - flush_data->timer_on = 1; - } - flush_data->size++; - spin_unlock_irqrestore(&flush_data->lock, flags); -} - static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) { struct dmar_domain *domain; @@ -3784,7 +3663,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages)); dma_free_pagelist(freelist); } else { - add_unmap(domain, iova_pfn, nrpages, freelist); + queue_iova(&domain->iovad, iova_pfn, nrpages, + (unsigned long)freelist); /* * queue up the release of the unmap to save the 1/6th of the * cpu used up by the iotlb flush operation... @@ -4721,7 +4601,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) static int intel_iommu_cpu_dead(unsigned int cpu) { free_all_cpu_cached_iovas(cpu); - flush_unmaps_timeout(cpu); return 0; } From d43ecff3efbe0dcff4c239a65b17e68134388ed6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 16 Aug 2017 09:27:02 -0400 Subject: [PATCH 56/87] MAINTAINERS: Add entry for qcom_iommu Add maintainer entry for qcom_iommu. Signed-off-by: Rob Clark Signed-off-by: Joerg Roedel --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6f7721d1634c..c7a6ac0996da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10941,6 +10941,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux-hexagon-kernel.g S: Supported F: arch/hexagon/ +QUALCOMM IOMMU +M: Rob Clark +L: iommu@lists.linux-foundation.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: drivers/iommu/qcom_iommu.c + QUALCOMM VENUS VIDEO ACCELERATOR DRIVER M: Stanimir Varbanov L: linux-media@vger.kernel.org From 7aa8619a66aea52b145e04cbab4f8d6a4e5f3f3b Mon Sep 17 00:00:00 2001 From: Nate Watterson Date: Thu, 29 Jun 2017 18:18:15 -0400 Subject: [PATCH 57/87] iommu/arm-smmu-v3: Implement shutdown method The shutdown method disables the SMMU to avoid corrupting a new kernel started with kexec. Signed-off-by: Nate Watterson Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 7 +++++++ drivers/iommu/arm-smmu.c | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 568c400eeaed..e67ba6c40faf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2852,9 +2852,15 @@ static int arm_smmu_device_remove(struct platform_device *pdev) struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); + return 0; } +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); +} + static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, @@ -2868,6 +2874,7 @@ static struct platform_driver arm_smmu_driver = { }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, + .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b97188acc4f1..b0b126ea9d85 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2329,6 +2329,11 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return 0; } +static void arm_smmu_device_shutdown(struct platform_device *pdev) +{ + arm_smmu_device_remove(pdev); +} + static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", @@ -2336,6 +2341,7 @@ static struct platform_driver arm_smmu_driver = { }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, + .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); From 90df373cc62e527b010025249d11d10d19b086bd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 8 Aug 2017 14:56:14 +0100 Subject: [PATCH 58/87] iommu/arm-smmu: Track context bank state Echoing what we do for Stream Map Entries, maintain a software shadow state for context bank configuration. With this in place, we are mere moments away from blissfully easy suspend/resume support. Reviewed-by: Sricharan R Signed-off-by: Robin Murphy [will: fix sparse warning by only clearing .cfg during domain destruction] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 156 ++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 59 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b0b126ea9d85..0a5fa126f379 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -338,6 +338,13 @@ struct arm_smmu_smr { bool valid; }; +struct arm_smmu_cb { + u64 ttbr[2]; + u32 tcr[2]; + u32 mair[2]; + struct arm_smmu_cfg *cfg; +}; + struct arm_smmu_master_cfg { struct arm_smmu_device *smmu; s16 smendx[]; @@ -380,6 +387,7 @@ struct arm_smmu_device { u32 num_context_banks; u32 num_s2_context_banks; DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS); + struct arm_smmu_cb *cbs; atomic_t irptndx; u32 num_mapping_groups; @@ -776,17 +784,74 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { - u32 reg, reg2; - u64 reg64; - bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx]; + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + + cb->cfg = cfg; + + /* TTBCR */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr; + } else { + cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + cb->tcr[1] |= TTBCR2_SEP_UPSTREAM; + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) + cb->tcr[1] |= TTBCR2_AS; + } + } else { + cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + } + + /* TTBRs */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; + cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + } else { + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + } + } else { + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + } + + /* MAIRs (stage-1 only) */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr; + cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr; + } else { + cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; + cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; + } + } +} + +static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) +{ + u32 reg; + bool stage1; + struct arm_smmu_cb *cb = &smmu->cbs[idx]; + struct arm_smmu_cfg *cfg = cb->cfg; void __iomem *cb_base, *gr1_base; + cb_base = ARM_SMMU_CB(smmu, idx); + + /* Unassigned context banks only need disabling */ + if (!cfg) { + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + return; + } + gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); + /* CBA2R */ if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) reg = CBA2R_RW64_64BIT; @@ -796,7 +861,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, if (smmu->features & ARM_SMMU_FEAT_VMID16) reg |= cfg->vmid << CBA2R_VMID_SHIFT; - writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx)); } /* CBAR */ @@ -815,72 +880,41 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* 8-bit VMIDs live in CBAR */ reg |= cfg->vmid << CBAR_VMID_SHIFT; } - writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx)); /* * TTBCR * We must write this before the TTBRs, since it determines the * access behaviour of some fields (in particular, ASID[15:8]). */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.tcr; - reg2 = 0; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - reg2 |= TTBCR2_SEP_UPSTREAM; - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg2 |= TTBCR2_AS; - } - if (smmu->version > ARM_SMMU_V1) - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); - } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + if (stage1 && smmu->version > ARM_SMMU_V1) + writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2); + writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR); /* TTBRs */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); - writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); - } else { - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); - } + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); + writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0); + writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1); } else { - reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); + writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0); + if (stage1) + writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1); } /* MAIRs (stage-1 only) */ if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.prrr; - reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1); + writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0); + writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1); } /* SCTLR */ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M; if (stage1) reg |= SCTLR_S1_ASIDPNE; -#ifdef __BIG_ENDIAN - reg |= SCTLR_E; -#endif + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + reg |= SCTLR_E; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR); } @@ -1043,6 +1077,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, /* Initialise the context bank with our page table cfg */ arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); + arm_smmu_write_context_bank(smmu, cfg->cbndx); /* * Request context fault interrupt. Do this last to avoid the @@ -1075,7 +1110,6 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - void __iomem *cb_base; int irq; if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) @@ -1085,8 +1119,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) * Disable the context bank and free the page tables before freeing * it. */ - cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + smmu->cbs[cfg->cbndx].cfg = NULL; + arm_smmu_write_context_bank(smmu, cfg->cbndx); if (cfg->irptndx != INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; @@ -1729,7 +1763,6 @@ static struct iommu_ops arm_smmu_ops = { static void arm_smmu_device_reset(struct arm_smmu_device *smmu) { void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - void __iomem *cb_base; int i; u32 reg, major; @@ -1765,8 +1798,9 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { - cb_base = ARM_SMMU_CB(smmu, i); - writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + void __iomem *cb_base = ARM_SMMU_CB(smmu, i); + + arm_smmu_write_context_bank(smmu, i); writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); /* * Disable MMU-500's not-particularly-beneficial next-page @@ -1972,6 +2006,10 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->cavium_id_base -= smmu->num_context_banks; dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n"); } + smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks, + sizeof(*smmu->cbs), GFP_KERNEL); + if (!smmu->cbs) + return -ENOMEM; /* ID2 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); From a2d866f7d66574d980a2e4544bc626e29bb33365 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 8 Aug 2017 14:56:15 +0100 Subject: [PATCH 59/87] iommu/arm-smmu: Add system PM support With all our hardware state tracked in such a way that we can naturally restore it as part of the necessary reset, resuming is trivial, and there's nothing to do on suspend at all. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 0a5fa126f379..445f0ea38272 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2372,10 +2372,21 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev) arm_smmu_device_remove(pdev); } +static int __maybe_unused arm_smmu_pm_resume(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + arm_smmu_device_reset(smmu); + return 0; +} + +static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume); + static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", .of_match_table = of_match_ptr(arm_smmu_of_match), + .pm = &arm_smmu_pm_ops, }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, From 0b480e447006144ffcfaf178574b9a13344588a6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Aug 2017 17:41:52 +0200 Subject: [PATCH 60/87] iommu/tegra: Add support for struct iommu_device Add a struct iommu_device to each tegra-smmu and register it with the iommu-core. Also link devices added to the driver to their respective hardware iommus. Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index faa9c1e70482..2802e12e6a54 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -36,6 +36,8 @@ struct tegra_smmu { struct list_head list; struct dentry *debugfs; + + struct iommu_device iommu; /* IOMMU Core code handle */ }; struct tegra_smmu_as { @@ -720,6 +722,9 @@ static int tegra_smmu_add_device(struct device *dev) * first match. */ dev->archdata.iommu = smmu; + + iommu_device_link(&smmu->iommu, dev); + break; } @@ -737,6 +742,11 @@ static int tegra_smmu_add_device(struct device *dev) static void tegra_smmu_remove_device(struct device *dev) { + struct tegra_smmu *smmu = dev->archdata.iommu; + + if (smmu) + iommu_device_unlink(&smmu->iommu, dev); + dev->archdata.iommu = NULL; iommu_group_remove_device(dev); } @@ -943,6 +953,18 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (err < 0) return ERR_PTR(err); + err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev)); + if (err) + return ERR_PTR(err); + + iommu_device_set_ops(&smmu->iommu, &tegra_smmu_ops); + + err = iommu_device_register(&smmu->iommu); + if (err) { + iommu_device_sysfs_remove(&smmu->iommu); + return ERR_PTR(err); + } + if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_init(smmu); @@ -951,6 +973,9 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, void tegra_smmu_remove(struct tegra_smmu *smmu) { + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_exit(smmu); } From c184ae83c8337d8ee0e5e9de9e7ce2fc652137aa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 00:17:28 +0200 Subject: [PATCH 61/87] iommu/tegra-gart: Add support for struct iommu_device Add a struct iommu_device to each tegra-gart and register it with the iommu-core. Also link devices added to the driver to their respective hardware iommus. Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 29bafc6e82ae..b62f790ad1ba 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -61,6 +61,8 @@ struct gart_device { struct list_head client; spinlock_t client_lock; /* for client list */ struct device *dev; + + struct iommu_device iommu; /* IOMMU Core handle */ }; struct gart_domain { @@ -342,12 +344,16 @@ static int gart_iommu_add_device(struct device *dev) return PTR_ERR(group); iommu_group_put(group); + + iommu_device_link(&gart_handle->iommu, dev); + return 0; } static void gart_iommu_remove_device(struct device *dev) { iommu_group_remove_device(dev); + iommu_device_unlink(&gart_handle->iommu, dev); } static const struct iommu_ops gart_iommu_ops = { @@ -397,6 +403,7 @@ static int tegra_gart_probe(struct platform_device *pdev) struct resource *res, *res_remap; void __iomem *gart_regs; struct device *dev = &pdev->dev; + int ret; if (gart_handle) return -EIO; @@ -423,6 +430,22 @@ static int tegra_gart_probe(struct platform_device *pdev) return -ENXIO; } + ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) { + dev_err(dev, "Failed to register IOMMU in sysfs\n"); + return ret; + } + + iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); + + ret = iommu_device_register(&gart->iommu); + if (ret) { + dev_err(dev, "Failed to register IOMMU\n"); + iommu_device_sysfs_remove(&gart->iommu); + return ret; + } + gart->dev = &pdev->dev; spin_lock_init(&gart->pte_lock); spin_lock_init(&gart->client_lock); @@ -449,6 +472,9 @@ static int tegra_gart_remove(struct platform_device *pdev) { struct gart_device *gart = platform_get_drvdata(pdev); + iommu_device_unregister(&gart->iommu); + iommu_device_sysfs_remove(&gart->iommu); + writel(0, gart->regs + GART_CONFIG); if (gart->savedata) vfree(gart->savedata); From 1464d0b1defe421aef8c8877e19c7ae011e32eb9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 17 Aug 2017 11:40:08 +0100 Subject: [PATCH 62/87] iommu: Avoid NULL group dereference The recently-removed FIXME in iommu_get_domain_for_dev() turns out to have been a little misleading, since that check is still worthwhile even when groups *are* universal. We have a few IOMMU-aware drivers which only care whether their device is already attached to an existing domain or not, for which the previous behaviour of iommu_get_domain_for_dev() was ideal, and who now crash if their device does not have an IOMMU. With IOMMU groups now serving as a reliable indicator of whether a device has an IOMMU or not (barring false-positives from VFIO no-IOMMU mode), drivers could arguably do this: group = iommu_group_get(dev); if (group) { domain = iommu_get_domain_for_dev(dev); iommu_group_put(group); } However, rather than duplicate that code across multiple callsites, particularly when it's still only the domain they care about, let's skip straight to the next step and factor out the check into the common place it applies - in iommu_get_domain_for_dev() itself. Sure, it ends up looking rather familiar, but now it's backed by the reasoning of having a robust API able to do the expected thing for all devices regardless. Fixes: 05f80300dc8b ("iommu: Finish making iommu_group support mandatory") Reported-by: Shawn Lin Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index af69bf7e035a..5499a0387349 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1352,6 +1352,8 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); + if (!group) + return NULL; domain = group->domain; From ae162efbf2870c326e06b4f905423fb888f9cd2a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 19 Aug 2017 00:28:02 +0200 Subject: [PATCH 63/87] iommu/amd: Fix compiler warning in copy_device_table() This was reported by the kbuild bot. The condition in which entry would be used uninitialized can not happen, because when there is no iommu this function would never be called. But its no fast-path, so fix the warning anyway. Reported-by: kbuild test robot Acked-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index c7d03251c80a..f2023cd08ef7 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -854,7 +854,7 @@ static int get_dev_entry_bit(u16 devid, u8 bit) static bool copy_device_table(void) { - u64 int_ctl, int_tab_len, entry, last_entry = 0; + u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; From 2479c631d16c53b01f897fe6a4666c04f71075fb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 19 Aug 2017 00:35:40 +0200 Subject: [PATCH 64/87] iommu/amd: Fix section mismatch warning The variable amd_iommu_pre_enabled is used in non-init code-paths, so remove the __initdata annotation. Reported-by: kbuild test robot Fixes: 3ac3e5ee5ed56 ('iommu/amd: Copy old trans table from old kernel') Acked-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f2023cd08ef7..ff8887ac5555 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -264,7 +264,7 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); -static bool __initdata amd_iommu_pre_enabled = true; +static bool amd_iommu_pre_enabled = true; bool translation_pre_enabled(struct amd_iommu *iommu) { From 7af9a5fdb9e0ca33c9c18d5a9b1512c98a03120f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Mon, 21 Aug 2017 14:53:35 +0900 Subject: [PATCH 65/87] iommu/ipmmu-vmsa: Use iommu_device_sysfs_add()/remove() Extend the driver to make use of iommu_device_sysfs_add()/remove() functions to hook up initial sysfs support. Suggested-by: Joerg Roedel Signed-off-by: Magnus Damm Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5093d1c4f46d..5a350582f359 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -953,6 +953,11 @@ static int ipmmu_probe(struct platform_device *pdev) ipmmu_device_reset(mmu); + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; + iommu_device_set_ops(&mmu->iommu, &ipmmu_ops); iommu_device_set_fwnode(&mmu->iommu, &pdev->dev.of_node->fwnode); @@ -975,6 +980,7 @@ static int ipmmu_remove(struct platform_device *pdev) { struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev); + iommu_device_sysfs_remove(&mmu->iommu); iommu_device_unregister(&mmu->iommu); #if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA) From a9467d954226f1a513cfe789a3a39d8fc73b5d16 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:15 +0800 Subject: [PATCH 66/87] iommu/mediatek: Move MTK_M4U_TO_LARB/PORT into mtk_iommu.c The definition of MTK_M4U_TO_LARB and MTK_M4U_TO_PORT are shared by all the gen2 M4U HWs. Thus, Move them out from mt8173-larb-port.h, and put them into the c file. Suggested-by: Honghui Zhang Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 8 +++++++- include/dt-bindings/memory/mt8173-larb-port.h | 4 ---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 91c6d367ab35..4db6c8f66b0c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "mtk_iommu.h" @@ -93,6 +92,13 @@ #define MTK_PROTECT_PA_ALIGN 128 +/* + * Get the local arbiter ID and the portid within the larb arbiter + * from mtk_m4u_id which is defined by MTK_M4U_ID. + */ +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x7) +#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) + struct mtk_iommu_domain { spinlock_t pgtlock; /* lock for page table */ diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index 5fef5d1f8f82..111b4b0ec85a 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -15,10 +15,6 @@ #define __DTS_IOMMU_PORT_MT8173_H #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) -/* Local arbiter ID */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x7) -/* PortID within the local arbiter */ -#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 From e6dec92308628cff5f1f8bd1bcdf87581c9dc676 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:16 +0800 Subject: [PATCH 67/87] iommu/mediatek: Add mt2712 IOMMU support The M4U IP blocks in mt2712 is MTK's generation2 M4U which use the ARM Short-descriptor like mt8173, and most of the HW registers are the same. The difference is that there are 2 M4U HWs in mt2712 while there's only one in mt8173. The purpose of 2 M4U HWs is for balance the bandwidth. Normally if there are 2 M4U HWs, there should be 2 iommu domains, each M4U has a iommu domain. Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 71 ++++++++++++++++++-------------------- drivers/iommu/mtk_iommu.h | 7 ++++ drivers/memory/mtk-smi.c | 54 +++++++++++++++++++++++++++-- include/soc/mediatek/smi.h | 2 +- 4 files changed, 93 insertions(+), 41 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 4db6c8f66b0c..df23e0201336 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -53,7 +53,11 @@ #define REG_MMU_CTRL_REG 0x110 #define F_MMU_PREFETCH_RT_REPLACE_MOD BIT(4) -#define F_MMU_TF_PROTECT_SEL(prot) (((prot) & 0x3) << 5) +#define F_MMU_TF_PROTECT_SEL_SHIFT(data) \ + ((data)->m4u_plat == M4U_MT2712 ? 4 : 5) +/* It's named by F_MMU_TF_PROT_SEL in mt2712. */ +#define F_MMU_TF_PROTECT_SEL(prot, data) \ + (((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data)) #define REG_MMU_IVRP_PADDR 0x114 #define F_MMU_IVRP_PA_SET(pa, ext) (((pa) >> 1) | ((!!(ext)) << 31)) @@ -96,7 +100,7 @@ * Get the local arbiter ID and the portid within the larb arbiter * from mtk_m4u_id which is defined by MTK_M4U_ID. */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x7) +#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0xf) #define MTK_M4U_TO_PORT(id) ((id) & 0x1f) struct mtk_iommu_domain { @@ -307,10 +311,6 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, data->m4u_dom = NULL; return ret; } - } else if (data->m4u_dom != dom) { - /* All the client devices should be in the same m4u domain */ - dev_err(dev, "try to attach into the error iommu domain\n"); - return -EPERM; } mtk_iommu_config(data, dev, true); @@ -470,8 +470,9 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) return ret; } - regval = F_MMU_PREFETCH_RT_REPLACE_MOD | - F_MMU_TF_PROTECT_SEL(2); + regval = F_MMU_TF_PROTECT_SEL(2, data); + if (data->m4u_plat == M4U_MT8173) + regval |= F_MMU_PREFETCH_RT_REPLACE_MOD; writel_relaxed(regval, data->base + REG_MMU_CTRL_REG); regval = F_L2_MULIT_HIT_EN | @@ -493,9 +494,11 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), data->base + REG_MMU_IVRP_PADDR); - writel_relaxed(0, data->base + REG_MMU_DCM_DIS); - writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); + + /* It's MISC control register whose default value is ok except mt8173.*/ + if (data->m4u_plat == M4U_MT8173) + writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE); if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0, dev_name(data->dev), (void *)data)) { @@ -527,6 +530,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (!data) return -ENOMEM; data->dev = dev; + data->m4u_plat = (enum mtk_iommu_plat)of_device_get_match_data(dev); /* Protect memory. HW will access here while translation fault.*/ protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL); @@ -560,6 +564,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) for (i = 0; i < larb_nr; i++) { struct device_node *larbnode; struct platform_device *plarbdev; + u32 id; larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i); if (!larbnode) @@ -568,17 +573,14 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (!of_device_is_available(larbnode)) continue; + ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id); + if (ret)/* The id is consecutive if there is no this property */ + id = i; + plarbdev = of_find_device_by_node(larbnode); - if (!plarbdev) { - plarbdev = of_platform_device_create( - larbnode, NULL, - platform_bus_type.dev_root); - if (!plarbdev) { - of_node_put(larbnode); - return -EPROBE_DEFER; - } - } - data->smi_imu.larb_imu[i].dev = &plarbdev->dev; + if (!plarbdev) + return -EPROBE_DEFER; + data->smi_imu.larb_imu[id].dev = &plarbdev->dev; component_match_add_release(dev, &match, release_of, compare_of, larbnode); @@ -646,8 +648,6 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; - writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], - base + REG_MMU_PT_BASE_ADDR); writel_relaxed(reg->standard_axi_mode, base + REG_MMU_STANDARD_AXI_MODE); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); @@ -656,15 +656,19 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL); writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), base + REG_MMU_IVRP_PADDR); + if (data->m4u_dom) + writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], + base + REG_MMU_PT_BASE_ADDR); return 0; } -const struct dev_pm_ops mtk_iommu_pm_ops = { +static const struct dev_pm_ops mtk_iommu_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) }; static const struct of_device_id mtk_iommu_of_ids[] = { - { .compatible = "mediatek,mt8173-m4u", }, + { .compatible = "mediatek,mt2712-m4u", .data = (void *)M4U_MT2712}, + { .compatible = "mediatek,mt8173-m4u", .data = (void *)M4U_MT8173}, {} }; @@ -673,27 +677,20 @@ static struct platform_driver mtk_iommu_driver = { .remove = mtk_iommu_remove, .driver = { .name = "mtk-iommu", - .of_match_table = mtk_iommu_of_ids, + .of_match_table = of_match_ptr(mtk_iommu_of_ids), .pm = &mtk_iommu_pm_ops, } }; -static int mtk_iommu_init_fn(struct device_node *np) +static int __init mtk_iommu_init(void) { int ret; - struct platform_device *pdev; - - pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root); - if (!pdev) - return -ENOMEM; ret = platform_driver_register(&mtk_iommu_driver); - if (ret) { - pr_err("%s: Failed to register driver\n", __func__); - return ret; - } + if (ret != 0) + pr_err("Failed to register MTK IOMMU driver\n"); - return 0; + return ret; } -IOMMU_OF_DECLARE(mtkm4u, "mediatek,mt8173-m4u", mtk_iommu_init_fn); +subsys_initcall(mtk_iommu_init) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index c06cc91b5d9a..462e593b7d71 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -34,6 +34,12 @@ struct mtk_iommu_suspend_reg { u32 int_main_control; }; +enum mtk_iommu_plat { + M4U_MT2701, + M4U_MT2712, + M4U_MT8173, +}; + struct mtk_iommu_domain; struct mtk_iommu_data { @@ -50,6 +56,7 @@ struct mtk_iommu_data { bool tlb_flush_active; struct iommu_device iommu; + enum mtk_iommu_plat m4u_plat; }; static inline int compare_of(struct device *dev, void *data) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 13f8c45dbf0d..8ffe3216d092 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -23,7 +23,10 @@ #include #include +/* mt8173 */ #define SMI_LARB_MMU_EN 0xf00 + +/* mt2701 */ #define REG_SMI_SECUR_CON_BASE 0x5c0 /* every register control 8 port, register offset 0x4 */ @@ -41,6 +44,10 @@ /* mt2701 domain should be set to 3 */ #define SMI_SECUR_CON_VAL_DOMAIN(id) (0x3 << ((((id) & 0x7) << 2) + 1)) +/* mt2712 */ +#define SMI_LARB_NONSEC_CON(id) (0x380 + ((id) * 4)) +#define F_MMU_EN BIT(0) + struct mtk_smi_larb_gen { bool need_larbid; int port_in_larb[MTK_LARB_NR_MAX + 1]; @@ -149,6 +156,15 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) struct mtk_smi_iommu *smi_iommu = data; unsigned int i; + if (larb->larb_gen->need_larbid) { + larb->mmu = &smi_iommu->larb_imu[larb->larbid].mmu; + return 0; + } + + /* + * If there is no larbid property, Loop to find the corresponding + * iommu information. + */ for (i = 0; i < smi_iommu->larb_nr; i++) { if (dev == smi_iommu->larb_imu[i].dev) { /* The 'mmu' may be updated in iommu-attach/detach. */ @@ -159,14 +175,33 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data) return -ENODEV; } -static void mtk_smi_larb_config_port(struct device *dev) +static void mtk_smi_larb_config_port_mt2712(struct device *dev) +{ + struct mtk_smi_larb *larb = dev_get_drvdata(dev); + u32 reg; + int i; + + /* + * larb 8/9 is the bdpsys larb, the iommu_en is enabled defaultly. + * Don't need to set it again. + */ + if (larb->larbid == 8 || larb->larbid == 9) + return; + + for_each_set_bit(i, (unsigned long *)larb->mmu, 32) { + reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i)); + reg |= F_MMU_EN; + writel(reg, larb->base + SMI_LARB_NONSEC_CON(i)); + } +} + +static void mtk_smi_larb_config_port_mt8173(struct device *dev) { struct mtk_smi_larb *larb = dev_get_drvdata(dev); writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN); } - static void mtk_smi_larb_config_port_gen1(struct device *dev) { struct mtk_smi_larb *larb = dev_get_drvdata(dev); @@ -211,7 +246,7 @@ static const struct component_ops mtk_smi_larb_component_ops = { static const struct mtk_smi_larb_gen mtk_smi_larb_mt8173 = { /* mt8173 do not need the port in larb */ - .config_port = mtk_smi_larb_config_port, + .config_port = mtk_smi_larb_config_port_mt8173, }; static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = { @@ -223,6 +258,11 @@ static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = { .config_port = mtk_smi_larb_config_port_gen1, }; +static const struct mtk_smi_larb_gen mtk_smi_larb_mt2712 = { + .need_larbid = true, + .config_port = mtk_smi_larb_config_port_mt2712, +}; + static const struct of_device_id mtk_smi_larb_of_ids[] = { { .compatible = "mediatek,mt8173-smi-larb", @@ -232,6 +272,10 @@ static const struct of_device_id mtk_smi_larb_of_ids[] = { .compatible = "mediatek,mt2701-smi-larb", .data = &mtk_smi_larb_mt2701 }, + { + .compatible = "mediatek,mt2712-smi-larb", + .data = &mtk_smi_larb_mt2712 + }, {} }; @@ -318,6 +362,10 @@ static const struct of_device_id mtk_smi_common_of_ids[] = { .compatible = "mediatek,mt2701-smi-common", .data = (void *)MTK_SMI_GEN1 }, + { + .compatible = "mediatek,mt2712-smi-common", + .data = (void *)MTK_SMI_GEN2 + }, {} }; diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 8893c5eacd07..5201e9022c86 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -19,7 +19,7 @@ #ifdef CONFIG_MTK_SMI -#define MTK_LARB_NR_MAX 8 +#define MTK_LARB_NR_MAX 16 #define MTK_SMI_MMU_EN(port) BIT(port) From 7c3a2ec02806a6f83270f34f8ab4e501e7d8ea69 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:17 +0800 Subject: [PATCH 68/87] iommu/mediatek: Merge 2 M4U HWs into one iommu domain In theory, If there are 2 M4U HWs, there should be 2 IOMMU domains. But one IOMMU domain(4GB iova range) is enough for us currently, It's unnecessary to maintain 2 pagetables. Besides, This patch can simplify our consumer code largely. They don't need map a iova range from one domain into another, They can share the iova address easily. Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 92 +++++++++++++++++++++++++++++---------- drivers/iommu/mtk_iommu.h | 2 + 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index df23e0201336..1503dfaa0a69 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -114,6 +114,27 @@ struct mtk_iommu_domain { static struct iommu_ops mtk_iommu_ops; +static LIST_HEAD(m4ulist); /* List all the M4U HWs */ + +#define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) + +/* + * There may be 1 or 2 M4U HWs, But we always expect they are in the same domain + * for the performance. + * + * Here always return the mtk_iommu_data of the first probed M4U where the + * iommu domain information is recorded. + */ +static struct mtk_iommu_data *mtk_iommu_get_m4u_data(void) +{ + struct mtk_iommu_data *data; + + for_each_m4u(data) + return data; + + return NULL; +} + static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom) { return container_of(dom, struct mtk_iommu_domain, domain); @@ -123,9 +144,12 @@ static void mtk_iommu_tlb_flush_all(void *cookie) { struct mtk_iommu_data *data = cookie; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); - writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); - wmb(); /* Make sure the tlb flush all done */ + for_each_m4u(data) { + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); + writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE); + wmb(); /* Make sure the tlb flush all done */ + } } static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, @@ -134,12 +158,17 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, { struct mtk_iommu_data *data = cookie; - writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL); + for_each_m4u(data) { + writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, + data->base + REG_MMU_INV_SEL); - writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); - writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A); - writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); - data->tlb_flush_active = true; + writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); + writel_relaxed(iova + size - 1, + data->base + REG_MMU_INVLD_END_A); + writel_relaxed(F_MMU_INV_RANGE, + data->base + REG_MMU_INVALIDATE); + data->tlb_flush_active = true; + } } static void mtk_iommu_tlb_sync(void *cookie) @@ -148,20 +177,22 @@ static void mtk_iommu_tlb_sync(void *cookie) int ret; u32 tmp; - /* Avoid timing out if there's nothing to wait for */ - if (!data->tlb_flush_active) - return; + for_each_m4u(data) { + /* Avoid timing out if there's nothing to wait for */ + if (!data->tlb_flush_active) + return; - ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp, - tmp != 0, 10, 100000); - if (ret) { - dev_warn(data->dev, - "Partial TLB flush timed out, falling back to full flush\n"); - mtk_iommu_tlb_flush_all(cookie); + ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, + tmp, tmp != 0, 10, 100000); + if (ret) { + dev_warn(data->dev, + "Partial TLB flush timed out, falling back to full flush\n"); + mtk_iommu_tlb_flush_all(cookie); + } + /* Clear the CPE status */ + writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + data->tlb_flush_active = false; } - /* Clear the CPE status */ - writel_relaxed(0, data->base + REG_MMU_CPE_DONE); - data->tlb_flush_active = false; } static const struct iommu_gather_ops mtk_iommu_gather_ops = { @@ -298,10 +329,11 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *curdata = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); int ret; - if (!data) + if (!data || !curdata) return -ENODEV; if (!data->m4u_dom) { @@ -313,7 +345,17 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, } } - mtk_iommu_config(data, dev, true); + /* + * Update the pgtable base address register of another M4U HW with the + * existed pgtable if there are more than one M4U HW. + */ + if (!curdata->m4u_dom) { + curdata->m4u_dom = data->m4u_dom; + writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], + curdata->base + REG_MMU_PT_BASE_ADDR); + } + + mtk_iommu_config(curdata, dev, true); return 0; } @@ -405,7 +447,7 @@ static void mtk_iommu_remove_device(struct device *dev) static struct iommu_group *mtk_iommu_device_group(struct device *dev) { - struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); if (!data) return ERR_PTR(-ENODEV); @@ -604,6 +646,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; + list_add_tail(&data->list, &m4ulist); + if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 462e593b7d71..b4451a1c7c2f 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -57,6 +57,8 @@ struct mtk_iommu_data { struct iommu_device iommu; enum mtk_iommu_plat m4u_plat; + + struct list_head list; }; static inline int compare_of(struct device *dev, void *data) From 4b00f5ac12fcee634c41b81444d981e1217ef618 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:18 +0800 Subject: [PATCH 69/87] iommu/mediatek: Move pgtable allocation into domain_alloc After adding the global list for M4U HW, We get a chance to move the pagetable allocation into the mtk_iommu_domain_alloc. Let the domain_alloc do the right thing. This patch is for fixing this problem[1]. [1]: https://patchwork.codeaurora.org/patch/53987/ Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 52 +++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 1503dfaa0a69..a82196c0c5df 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -262,9 +262,9 @@ static void mtk_iommu_config(struct mtk_iommu_data *data, } } -static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) +static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) { - struct mtk_iommu_domain *dom = data->m4u_dom; + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); spin_lock_init(&dom->pgtlock); @@ -290,9 +290,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data) /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - - writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], - data->base + REG_MMU_PT_BASE_ADDR); return 0; } @@ -307,20 +304,30 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type) if (!dom) return NULL; - if (iommu_get_dma_cookie(&dom->domain)) { - kfree(dom); - return NULL; - } + if (iommu_get_dma_cookie(&dom->domain)) + goto free_dom; + + if (mtk_iommu_domain_finalise(dom)) + goto put_dma_cookie; dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = DMA_BIT_MASK(32); dom->domain.geometry.force_aperture = true; return &dom->domain; + +put_dma_cookie: + iommu_put_dma_cookie(&dom->domain); +free_dom: + kfree(dom); + return NULL; } static void mtk_iommu_domain_free(struct iommu_domain *domain) { + struct mtk_iommu_domain *dom = to_mtk_domain(domain); + + free_io_pgtable_ops(dom->iop); iommu_put_dma_cookie(domain); kfree(to_mtk_domain(domain)); } @@ -329,33 +336,19 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - struct mtk_iommu_data *curdata = dev->iommu_fwspec->iommu_priv; - struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - int ret; + struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv; - if (!data || !curdata) + if (!data) return -ENODEV; + /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { data->m4u_dom = dom; - ret = mtk_iommu_domain_finalise(data); - if (ret) { - data->m4u_dom = NULL; - return ret; - } + writel(dom->cfg.arm_v7s_cfg.ttbr[0], + data->base + REG_MMU_PT_BASE_ADDR); } - /* - * Update the pgtable base address register of another M4U HW with the - * existed pgtable if there are more than one M4U HW. - */ - if (!curdata->m4u_dom) { - curdata->m4u_dom = data->m4u_dom; - writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0], - curdata->base + REG_MMU_PT_BASE_ADDR); - } - - mtk_iommu_config(curdata, dev, true); + mtk_iommu_config(data, dev, true); return 0; } @@ -664,7 +657,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) if (iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, NULL); - free_io_pgtable_ops(data->m4u_dom->iop); clk_disable_unprepare(data->bclk); devm_free_irq(&pdev->dev, data->irq, data); component_master_del(&pdev->dev, &mtk_iommu_com_ops); From 6254b64f570622d4ebb7af00d46229bfc7346212 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:19 +0800 Subject: [PATCH 70/87] iommu/mediatek: Disable iommu clock when system suspend When system suspend, infra power domain may be off, and the iommu's clock must be disabled when system off, or the iommu's bclk clock maybe disabled after system resume. Signed-off-by: Honghui Zhang Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a82196c0c5df..e21de8634cba 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -675,6 +675,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev) reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG); reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0); reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL); + clk_disable_unprepare(data->bclk); return 0; } @@ -683,7 +684,13 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) struct mtk_iommu_data *data = dev_get_drvdata(dev); struct mtk_iommu_suspend_reg *reg = &data->reg; void __iomem *base = data->base; + int ret; + ret = clk_prepare_enable(data->bclk); + if (ret) { + dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret); + return ret; + } writel_relaxed(reg->standard_axi_mode, base + REG_MMU_STANDARD_AXI_MODE); writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS); @@ -699,7 +706,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) } static const struct dev_pm_ops mtk_iommu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume) }; static const struct of_device_id mtk_iommu_of_ids[] = { From 30e2fccf9512380b5cdceffe04d776f5e6de6b49 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:20 +0800 Subject: [PATCH 71/87] iommu/mediatek: Enlarge the validate PA range for 4GB mode This patch is for 4GB mode, mainly for 4 issues: 1) Fix a 4GB bug: if the dram base is 0x4000_0000, the dram size is 0xc000_0000. then the code just meet a corner case because max_pfn is 0x10_0000. data->enable_4GB = !!(max_pfn > (0xffffffffUL >> PAGE_SHIFT)); It's true at the case above. That is unexpected. 2) In mt2712, there is a new register for the 4GB PA range(0x118) we should enlarge the max PA range, or the HW will report error. The dram range is from 0x1_0000_0000 to 0x1_ffff_ffff in the 4GB mode, we cut out the bit[32:30] of the SA(Start address) and EA(End address) into this REG_MMU_VLD_PA_RNG(0x118). 3) In mt2712, the register(0x13c) is extended for 4GB mode. bit[7:6] indicate the valid PA[32:33]. Thus, we don't mask the value and print it directly for debug. 4) if 4GB is enabled, the dram PA range is from 0x1_0000_0000 to 0x1_ffff_ffff. Thus, the PA from iova_to_pa should also '|' BIT(32) Signed-off-by: Honghui Zhang Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e21de8634cba..4f233e13c28a 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -61,6 +61,8 @@ #define REG_MMU_IVRP_PADDR 0x114 #define F_MMU_IVRP_PA_SET(pa, ext) (((pa) >> 1) | ((!!(ext)) << 31)) +#define REG_MMU_VLD_PA_RNG 0x118 +#define F_MMU_VLD_PA_RNG(EA, SA) (((EA) << 8) | (SA)) #define REG_MMU_INT_CONTROL0 0x120 #define F_L2_MULIT_HIT_EN BIT(0) @@ -85,7 +87,6 @@ #define REG_MMU_FAULT_ST1 0x134 #define REG_MMU_FAULT_VA 0x13c -#define F_MMU_FAULT_VA_MSK 0xfffff000 #define F_MMU_FAULT_VA_WRITE_BIT BIT(1) #define F_MMU_FAULT_VA_LAYER_BIT BIT(0) @@ -214,7 +215,6 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA); layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT; write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT; - fault_iova &= F_MMU_FAULT_VA_MSK; fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA); regval = readl_relaxed(data->base + REG_MMU_INT_ID); fault_larb = F_MMU0_INT_ID_LARB_ID(regval); @@ -395,6 +395,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); unsigned long flags; phys_addr_t pa; @@ -402,6 +403,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, pa = dom->iop->iova_to_phys(dom->iop, iova); spin_unlock_irqrestore(&dom->pgtlock, flags); + if (data->enable_4GB) + pa |= BIT(32); + return pa; } @@ -529,6 +533,14 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), data->base + REG_MMU_IVRP_PADDR); + if (data->enable_4GB && data->m4u_type != M4U_MT8173) { + /* + * If 4GB mode is enabled, the validate PA range is from + * 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30]. + */ + regval = F_MMU_VLD_PA_RNG(7, 4); + writel_relaxed(regval, data->base + REG_MMU_VLD_PA_RNG); + } writel_relaxed(0, data->base + REG_MMU_DCM_DIS); /* It's MISC control register whose default value is ok except mt8173.*/ @@ -574,7 +586,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); /* Whether the current dram is over 4GB */ - data->enable_4GB = !!(max_pfn > (0xffffffffUL >> PAGE_SHIFT)); + data->enable_4GB = !!(max_pfn > (BIT(32) >> PAGE_SHIFT)); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); data->base = devm_ioremap_resource(dev, res); From 4f608d382e6fa982655e37d0e1b6c134fdc0e4c3 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:21 +0800 Subject: [PATCH 72/87] memory: mtk-smi: Degrade SMI init to module_init The initialization of MediaTek power manager(SCPSYS) is builtin_platform_driver, and SMI must depend on power-domain. Thus, currently subsys_initcall for SMI is unnecessary, SMI will be always probe defered by power-domain. Degrade it to module_init. In addition, there are two small changes about the probe sequence: 1) Delete this two lines. if (!dev->pm_domain) return -EPROBE_DEFER; This is not helpful. the platform driver framework guarantee this. The "dev_pm_domain_attach" in the "platform_drv_probe" will return EPROBE_DEFER if its powerdomain is not ready. 2) Add the probe-defer for the smi-larb device should waiting for smi-common. In mt2712, there are 2 smi-commons, 10 smi-larbs. All will be probe-defered by the power-domain, there is seldom case that smi-larb probe done before smi-common. then it will hang like this: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ffffff800a4e0000 [00000000] *pgd=00000000beffe003[ 17.610026] , *pud=00000000beffe003 ... [] mtk_smi_enable+0x1c/0xd0 [] mtk_smi_larb_get+0x30/0x98 [] mtk_mipicsi0_resume+0x38/0x1b8 [] pm_generic_runtime_resume+0x3c/0x58 [] __genpd_runtime_resume+0x38/0x98 [] genpd_runtime_resume+0x164/0x220 [] __rpm_callback+0x78/0xa0 [] rpm_callback+0x38/0xa0 [] rpm_resume+0x4a4/0x6f8 [] __pm_runtime_resume+0x64/0xa0 [] mtk_mipicsi0_probe+0x40c/0xb70 [] platform_drv_probe+0x58/0xc0 [] driver_probe_device+0x284/0x438 [] __device_attach_driver+0xb4/0x160 [] bus_for_each_drv+0x68/0xa8 [] __device_attach+0xd4/0x168 [] device_initial_probe+0x24/0x30 [] bus_probe_device+0xa0/0xa8 [] deferred_probe_work_func+0x94/0xf0 [] process_one_work+0x1d8/0x6e0 Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/memory/mtk-smi.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index 8ffe3216d092..8f2d152a78b8 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -288,9 +289,6 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) struct platform_device *smi_pdev; int err; - if (!dev->pm_domain) - return -EPROBE_DEFER; - larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL); if (!larb) return -ENOMEM; @@ -326,6 +324,8 @@ static int mtk_smi_larb_probe(struct platform_device *pdev) smi_pdev = of_find_device_by_node(smi_node); of_node_put(smi_node); if (smi_pdev) { + if (!platform_get_drvdata(smi_pdev)) + return -EPROBE_DEFER; larb->smi_common_dev = &smi_pdev->dev; } else { dev_err(dev, "Failed to get the smi_common device\n"); @@ -377,9 +377,6 @@ static int mtk_smi_common_probe(struct platform_device *pdev) enum mtk_smi_gen smi_gen; int ret; - if (!dev->pm_domain) - return -EPROBE_DEFER; - common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); if (!common) return -ENOMEM; @@ -456,4 +453,4 @@ err_unreg_smi: return ret; } -subsys_initcall(mtk_smi_init); +module_init(mtk_smi_init); From 3ff2dcc058946c48afd3f2c8cd9e3a880b466c5b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Aug 2017 16:28:09 +0200 Subject: [PATCH 73/87] iommu/pamu: Fix PAMU boot crash Commit 68a17f0be6fe introduced an initialization order problem, where devices are linked against an iommu which is not yet initialized. Fix it by initializing the iommu-device before the iommu-ops are registered against the bus. Reported-by: Michael Ellerman Fixes: 68a17f0be6fe ('iommu/pamu: Add support for generic iommu-device') Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 17 ----------------- drivers/iommu/fsl_pamu.h | 3 --- drivers/iommu/fsl_pamu_domain.c | 17 ++++++++++++++++- drivers/iommu/fsl_pamu_domain.h | 2 -- 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 9238a85de53e..9ee8e9e161f5 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -44,8 +44,6 @@ static struct paace *spaact; static bool probed; /* Has PAMU been probed? */ -struct iommu_device pamu_iommu; /* IOMMU core code handle */ - /* * Table for matching compatible strings, for device tree * guts node, for QorIQ SOCs. @@ -1156,18 +1154,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) if (ret) goto error_genpool; - ret = iommu_device_sysfs_add(&pamu_iommu, dev, NULL, "iommu0"); - if (ret) - goto error_genpool; - - iommu_device_set_ops(&pamu_iommu, &fsl_pamu_ops); - - ret = iommu_device_register(&pamu_iommu); - if (ret) { - dev_err(dev, "Can't register iommu device\n"); - goto error_sysfs; - } - pamubypenr = in_be32(&guts_regs->pamubypenr); for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; @@ -1195,9 +1181,6 @@ static int fsl_pamu_probe(struct platform_device *pdev) return 0; -error_sysfs: - iommu_device_sysfs_remove(&pamu_iommu); - error_genpool: gen_pool_destroy(spaace_pool); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index fa48222f3421..c3434f29c967 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -391,9 +391,6 @@ struct ome { #define EOE_WWSAOL 0x1e /* Write with stash allocate only and lock */ #define EOE_VALID 0x80 -extern const struct iommu_ops fsl_pamu_ops; -extern struct iommu_device pamu_iommu; /* IOMMU core code handle */ - /* Function prototypes */ int pamu_domain_init(void); int pamu_enable_liodn(int liodn); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 914953b87bf1..e0fcd079cca9 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -33,6 +33,8 @@ static struct kmem_cache *fsl_pamu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static DEFINE_SPINLOCK(device_domain_lock); +struct iommu_device pamu_iommu; /* IOMMU core code handle */ + static struct fsl_dma_domain *to_fsl_dma_domain(struct iommu_domain *dom) { return container_of(dom, struct fsl_dma_domain, iommu_domain); @@ -1050,7 +1052,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) return dma_domain->win_cnt; } -const struct iommu_ops fsl_pamu_ops = { +static const struct iommu_ops fsl_pamu_ops = { .capable = fsl_pamu_capable, .domain_alloc = fsl_pamu_domain_alloc, .domain_free = fsl_pamu_domain_free, @@ -1076,6 +1078,19 @@ int __init pamu_domain_init(void) if (ret) return ret; + ret = iommu_device_sysfs_add(&pamu_iommu, NULL, NULL, "iommu0"); + if (ret) + return ret; + + iommu_device_set_ops(&pamu_iommu, &fsl_pamu_ops); + + ret = iommu_device_register(&pamu_iommu); + if (ret) { + iommu_device_sysfs_remove(&pamu_iommu); + pr_err("Can't register iommu device\n"); + return ret; + } + bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h index 6d8661e488fb..f2b0f741d3de 100644 --- a/drivers/iommu/fsl_pamu_domain.h +++ b/drivers/iommu/fsl_pamu_domain.h @@ -21,8 +21,6 @@ #include "fsl_pamu.h" -const struct iommu_ops fsl_pamu_ops; - struct dma_window { phys_addr_t paddr; u64 size; From 6ce5b0f22d6061b33aaf302f73302f402fea0c17 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Aug 2017 15:42:45 +0200 Subject: [PATCH 74/87] iommu: qcom: annotate PM functions as __maybe_unused The qcom_iommu_disable_clocks() function is only called from PM code that is hidden in an #ifdef, causing a harmless warning without CONFIG_PM: drivers/iommu/qcom_iommu.c:601:13: error: 'qcom_iommu_disable_clocks' defined but not used [-Werror=unused-function] static void qcom_iommu_disable_clocks(struct qcom_iommu_dev *qcom_iommu) drivers/iommu/qcom_iommu.c:581:12: error: 'qcom_iommu_enable_clocks' defined but not used [-Werror=unused-function] static int qcom_iommu_enable_clocks(struct qcom_iommu_dev *qcom_iommu) Replacing that #ifdef with __maybe_unused annotations lets the compiler drop the functions silently instead. Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Acked-by: Rob Clark Signed-off-by: Arnd Bergmann Signed-off-by: Joerg Roedel --- drivers/iommu/qcom_iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 48b62aa52787..c8a587d034b0 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -860,8 +860,7 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int qcom_iommu_resume(struct device *dev) +static int __maybe_unused qcom_iommu_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); @@ -869,7 +868,7 @@ static int qcom_iommu_resume(struct device *dev) return qcom_iommu_enable_clocks(qcom_iommu); } -static int qcom_iommu_suspend(struct device *dev) +static int __maybe_unused qcom_iommu_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); @@ -878,7 +877,6 @@ static int qcom_iommu_suspend(struct device *dev) return 0; } -#endif static const struct dev_pm_ops qcom_iommu_pm_ops = { SET_RUNTIME_PM_OPS(qcom_iommu_suspend, qcom_iommu_resume, NULL) From 4f1c8ea16b643be339f0e80b3535bc5b5fe8f9a3 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 24 Aug 2017 15:42:11 +0800 Subject: [PATCH 75/87] iommu/mediatek: Fix a build fail of m4u_type The commit ("iommu/mediatek: Enlarge the validate PA range for 4GB mode") introduce the following build error: drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_hw_init': >> drivers/iommu/mtk_iommu.c:536:30: error: 'const struct mtk_iommu_data' has no member named 'm4u_type'; did you mean 'm4u_dom'? if (data->enable_4GB && data->m4u_type != M4U_MT8173) { This patch fix it, use "m4u_plat" instead of "m4u_type". Reported-by: kernel test robot Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 4f233e13c28a..bc00e404514c 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -533,7 +533,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data) writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB), data->base + REG_MMU_IVRP_PADDR); - if (data->enable_4GB && data->m4u_type != M4U_MT8173) { + if (data->enable_4GB && data->m4u_plat != M4U_MT8173) { /* * If 4GB mode is enabled, the validate PA range is from * 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30]. From 419399804382d1aa7a3fc652915e8f3b61aff941 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 24 Aug 2017 15:42:12 +0800 Subject: [PATCH 76/87] iommu/mediatek: Fix a build warning of BIT(32) in ARM The commit ("iommu/mediatek: Enlarge the validate PA range for 4GB mode") introduce the following build warning while ARCH=arm: drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_iova_to_phys': include/linux/bitops.h:6:24: warning: left shift count >= width of type [-Wshift-count-overflow] #define BIT(nr) (1UL << (nr)) ^ >> drivers/iommu/mtk_iommu.c:407:9: note: in expansion of macro 'BIT' pa |= BIT(32); drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_probe': include/linux/bitops.h:6:24: warning: left shift count >= width of type [-Wshift-count-overflow] #define BIT(nr) (1UL << (nr)) ^ drivers/iommu/mtk_iommu.c:589:35: note: in expansion of macro 'BIT' data->enable_4GB = !!(max_pfn > (BIT(32) >> PAGE_SHIFT)); Use BIT_ULL instead of BIT. Reported-by: kernel test robot Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bc00e404514c..bd515be5b380 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -404,7 +404,7 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, spin_unlock_irqrestore(&dom->pgtlock, flags); if (data->enable_4GB) - pa |= BIT(32); + pa |= BIT_ULL(32); return pa; } @@ -586,7 +586,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN); /* Whether the current dram is over 4GB */ - data->enable_4GB = !!(max_pfn > (BIT(32) >> PAGE_SHIFT)); + data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT)); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); data->base = devm_ioremap_resource(dev, res); From ec62b1ab0f4ccbc48aa8b9852cc25b38a1f12d1e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 24 Aug 2017 21:13:57 +0800 Subject: [PATCH 77/87] iommu/amd: Check if domain is NULL in get_domain() and return -EBUSY In get_domain(), 'domain' could be NULL before it's passed to dma_ops_domain() to dereference. And the current code calling get_domain() can't deal with the returned 'domain' well if its value is NULL. So before dma_ops_domain() calling, check if 'domain' is NULL, If yes just return ERR_PTR(-EBUSY) directly. Reported-by: Dan Carpenter Fixes: df3f7a6e8e85 ('iommu/amd: Use is_attach_deferred call-back') Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9e8ea1907796..b531307a9360 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2472,6 +2472,9 @@ static struct protection_domain *get_domain(struct device *dev) domain = to_pdomain(io_domain); attach_device(dev, domain); } + if (domain == NULL) + return ERR_PTR(-EBUSY); + if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); From 0688a09990986cd8c2fda26afb04ce0a599ced3f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Aug 2017 15:50:03 +0200 Subject: [PATCH 78/87] iommu/amd: Rename a few flush functions Rename a few iommu cache-flush functions that start with iommu_ to start with amd_iommu now. This is to prevent name collisions with generic iommu code later on. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index cabcaa506ed6..7798fcf41b9b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1122,7 +1122,7 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } -static void iommu_flush_dte_all(struct amd_iommu *iommu) +static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; @@ -1136,7 +1136,7 @@ static void iommu_flush_dte_all(struct amd_iommu *iommu) * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. */ -static void iommu_flush_tlb_all(struct amd_iommu *iommu) +static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; @@ -1150,7 +1150,7 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } -static void iommu_flush_all(struct amd_iommu *iommu) +static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1169,7 +1169,7 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) iommu_queue_command(iommu, &cmd); } -static void iommu_flush_irt_all(struct amd_iommu *iommu) +static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; @@ -1182,11 +1182,11 @@ static void iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { - iommu_flush_all(iommu); + amd_iommu_flush_all(iommu); } else { - iommu_flush_dte_all(iommu); - iommu_flush_irt_all(iommu); - iommu_flush_tlb_all(iommu); + amd_iommu_flush_dte_all(iommu); + amd_iommu_flush_irt_all(iommu); + amd_iommu_flush_tlb_all(iommu); } } From a175a67d306ab3fd0e140595f49290b80c909ae8 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Wed, 23 Aug 2017 17:31:42 +0300 Subject: [PATCH 79/87] iommu/ipmmu-vmsa: Rereserving a free context before setting up a pagetable Reserving a free context is both quicker and more likely to fail (due to limited hardware resources) than setting up a pagetable. What is more the pagetable init/cleanup code could require the context to be set up. Signed-off-by: Oleksandr Tyshchenko CC: Robin Murphy CC: Laurent Pinchart Reviewed-by: Robin Murphy Reviewed-by: Laurent Pinchart CC: Joerg Roedel Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5a350582f359..1711fd306d33 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -324,6 +324,19 @@ static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu, return ret; } +static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, + unsigned int context_id) +{ + unsigned long flags; + + spin_lock_irqsave(&mmu->lock, flags); + + clear_bit(context_id, mmu->ctx); + mmu->domains[context_id] = NULL; + + spin_unlock_irqrestore(&mmu->lock, flags); +} + static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { u64 ttbr; @@ -353,22 +366,22 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) */ domain->cfg.iommu_dev = domain->mmu->dev; - domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, - domain); - if (!domain->iop) - return -EINVAL; - /* * Find an unused context. */ ret = ipmmu_domain_allocate_context(domain->mmu, domain); - if (ret == IPMMU_CTX_MAX) { - free_io_pgtable_ops(domain->iop); + if (ret == IPMMU_CTX_MAX) return -EBUSY; - } domain->context_id = ret; + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, + domain); + if (!domain->iop) { + ipmmu_domain_free_context(domain->mmu, domain->context_id); + return -EINVAL; + } + /* TTBR0 */ ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ipmmu_ctx_write(domain, IMTTLBR0, ttbr); @@ -409,19 +422,6 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) return 0; } -static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu, - unsigned int context_id) -{ - unsigned long flags; - - spin_lock_irqsave(&mmu->lock, flags); - - clear_bit(context_id, mmu->ctx); - mmu->domains[context_id] = NULL; - - spin_unlock_irqrestore(&mmu->lock, flags); -} - static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain) { /* From 8da4af95867e339d4aa61f9a1814bbfb2a55468e Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 28 Aug 2017 23:47:27 +0530 Subject: [PATCH 80/87] iommu/ipmmu-vmsa: Make ipmmu_gather_ops const Make these const as they are not modified anywhere. Signed-off-by: Bhumika Goyal Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 1711fd306d33..195d6e93ac71 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -295,7 +295,7 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, /* The hardware doesn't support selective TLB flush. */ } -static struct iommu_gather_ops ipmmu_gather_ops = { +static const struct iommu_gather_ops ipmmu_gather_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, From 0b9a36947c6bfa4b63224e0906c743aa3314a2d3 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 28 Aug 2017 17:42:05 +0530 Subject: [PATCH 81/87] iommu/exynos: Constify iommu_ops iommu_ops are not supposed to change at runtime. Functions 'iommu_device_set_ops' and 'bus_set_iommu' working with const iommu_ops provided by . So mark the non-const structs as const. Signed-off-by: Arvind Yadav Acked-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 622e44662ea1..f596fcc32898 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -565,7 +565,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, spin_unlock_irqrestore(&data->lock, flags); } -static struct iommu_ops exynos_iommu_ops; +static const struct iommu_ops exynos_iommu_ops; static int __init exynos_sysmmu_probe(struct platform_device *pdev) { @@ -1326,7 +1326,7 @@ static int exynos_iommu_of_xlate(struct device *dev, return 0; } -static struct iommu_ops exynos_iommu_ops = { +static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .domain_free = exynos_iommu_domain_free, .attach_dev = exynos_iommu_attach_device, From 96302d89a03524e04d46ec82c6730881bb755923 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Aug 2017 15:06:43 +0200 Subject: [PATCH 82/87] arm/tegra: Call bus_set_iommu() after iommu_device_register() The bus_set_iommu() function will call the add_device() call-back which needs the iommu to be registered. Reported-by: Jon Hunter Fixes: 0b480e447006 ('iommu/tegra: Add support for struct iommu_device') Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 2802e12e6a54..3b6449e2cbf1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -949,10 +949,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, tegra_smmu_ahb_enable(); - err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops); - if (err < 0) - return ERR_PTR(err); - err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev)); if (err) return ERR_PTR(err); @@ -965,6 +961,13 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, return ERR_PTR(err); } + err = bus_set_iommu(&platform_bus_type, &tegra_smmu_ops); + if (err < 0) { + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + return ERR_PTR(err); + } + if (IS_ENABLED(CONFIG_DEBUG_FS)) tegra_smmu_debugfs_init(smmu); From 9d8c3af31607819a61011d746e861b8096ac9761 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 8 Aug 2017 13:29:27 -0700 Subject: [PATCH 83/87] iommu/vt-d: IOMMU Page Request needs to check if address is canonical. Page Request from devices that support device-tlb would request translation to pre-cache them in device to avoid overhead of IOMMU lookups. IOMMU needs to check for canonicallity of the address before performing page-fault processing. To: Joerg Roedel To: linux-kernel@vger.kernel.org> Cc: iommu@lists.linux-foundation.org Cc: David Woodhouse Cc: Jacob Pan Cc: Ashok Raj Signed-off-by: Ashok Raj Reported-by: Sudeep Dutt Acked-by: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index f167c0d84ebf..0c9f0773601d 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -24,6 +24,7 @@ #include #include #include +#include static irqreturn_t prq_event_thread(int irq, void *d); @@ -555,6 +556,14 @@ static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req) return (requested & ~vma->vm_flags) != 0; } +static bool is_canonical_address(u64 addr) +{ + int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); + long saddr = (long) addr; + + return (((saddr << shift) >> shift) == saddr); +} + static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_iommu *iommu = d; @@ -612,6 +621,11 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* If the mm is already defunct, don't handle faults. */ if (!mmget_not_zero(svm->mm)) goto bad_req; + + /* If address is not canonical, return invalid response */ + if (!is_canonical_address(address)) + goto bad_req; + down_read(&svm->mm->mmap_sem); vma = find_extend_vma(svm->mm, address); if (!vma || address < vma->vm_start) From 11b93ebfa03e1cf45d9ad508eaf2c4d3547e06ca Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 8 Aug 2017 13:29:28 -0700 Subject: [PATCH 84/87] iommu/vt-d: Avoid calling virt_to_phys() on null pointer New kernels with debug show panic() from __phys_addr() checks. Avoid calling virt_to_phys() when pasid_state_tbl pointer is null To: Joerg Roedel To: linux-kernel@vger.kernel.org> Cc: iommu@lists.linux-foundation.org Cc: David Woodhouse Cc: Jacob Pan Cc: Ashok Raj Fixes: 2f26e0a9c9860 ('iommu/vt-d: Add basic SVM PASID support') Signed-off-by: Ashok Raj Acked-by: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bffc880f3fef..695f54ad07cc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5349,7 +5349,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { - context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); + if (iommu->pasid_state_table) + context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | intel_iommu_get_pts(iommu); From cceb84519520c775d2660ea6b878215cd116af75 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 28 Aug 2017 17:42:50 +0530 Subject: [PATCH 85/87] iommu/s390: Constify iommu_ops iommu_ops are not supposed to change at runtime. Functions 'bus_set_iommu' working with const iommu_ops provided by . So mark the non-const structs as const. Signed-off-by: Arvind Yadav Acked-by: Gerald Schaefer Signed-off-by: Joerg Roedel --- drivers/iommu/s390-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 85f3bc52efc2..0e2f31f9032b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -18,7 +18,7 @@ */ #define S390_IOMMU_PGSIZES (~0xFFFUL) -static struct iommu_ops s390_iommu_ops; +static const struct iommu_ops s390_iommu_ops; struct s390_domain { struct iommu_domain domain; @@ -362,7 +362,7 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } -static struct iommu_ops s390_iommu_ops = { +static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, .domain_free = s390_domain_free, From add02cfdc9bc2987b0121861d5bb0c7392865be9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Aug 2017 15:50:04 +0200 Subject: [PATCH 86/87] iommu: Introduce Interface for IOMMU TLB Flushing With the current IOMMU-API the hardware TLBs have to be flushed in every iommu_ops->unmap() call-back. For unmapping large amounts of address space, like it happens when a KVM domain with assigned devices is destroyed, this causes thousands of unnecessary TLB flushes in the IOMMU hardware because the unmap call-back runs for every unmapped physical page. With the TLB Flush Interface and the new iommu_unmap_fast() function introduced here the need to clean the hardware TLBs is removed from the unmapping code-path. Users of iommu_unmap_fast() have to explicitly call the TLB-Flush functions to sync the page-table changes to the hardware. Three functions for TLB-Flushes are introduced: * iommu_flush_tlb_all() - Flushes all TLB entries associated with that domain. TLBs entries are flushed when this function returns. * iommu_tlb_range_add() - This will add a given range to the flush queue for this domain. * iommu_tlb_sync() - Flushes all queued ranges from the hardware TLBs. Returns when the flush is finished. The semantic of this interface is intentionally similar to the iommu_gather_ops from the io-pgtable code. Cc: Alex Williamson Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 32 +++++++++++++++++++++++---- include/linux/iommu.h | 50 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5499a0387349..31c2b1dc8cfd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -527,6 +527,8 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group, } + iommu_flush_tlb_all(domain); + out: iommu_put_resv_regions(dev, &mappings); @@ -1547,13 +1549,16 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map); -size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +static size_t __iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size, + bool sync) { + const struct iommu_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; - unsigned int min_pagesz; unsigned long orig_iova = iova; + unsigned int min_pagesz; - if (unlikely(domain->ops->unmap == NULL || + if (unlikely(ops->unmap == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1583,10 +1588,13 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) while (unmapped < size) { size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); - unmapped_page = domain->ops->unmap(domain, iova, pgsize); + unmapped_page = ops->unmap(domain, iova, pgsize); if (!unmapped_page) break; + if (sync && ops->iotlb_range_add) + ops->iotlb_range_add(domain, iova, pgsize); + pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); @@ -1594,11 +1602,27 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } + if (sync && ops->iotlb_sync) + ops->iotlb_sync(domain); + trace_unmap(orig_iova, size, unmapped); return unmapped; } + +size_t iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, true); +} EXPORT_SYMBOL_GPL(iommu_unmap); +size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + return __iommu_unmap(domain, iova, size, false); +} +EXPORT_SYMBOL_GPL(iommu_unmap_fast); + size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f1ce8e517d8d..50be4fd338e4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -167,6 +167,10 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @map_sg: map a scatter-gather list of physically contiguous memory chunks + * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @tlb_range_add: Add a given iova range to the flush queue for this domain + * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * queue * to an iommu domain * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping @@ -199,6 +203,10 @@ struct iommu_ops { size_t size); size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot); + void (*flush_iotlb_all)(struct iommu_domain *domain); + void (*iotlb_range_add)(struct iommu_domain *domain, + unsigned long iova, size_t size); + void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); @@ -286,7 +294,9 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size); + size_t size); +extern size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, size_t size); extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg,unsigned int nents, int prot); @@ -343,6 +353,25 @@ extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); +static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +{ + if (domain->ops->flush_iotlb_all) + domain->ops->flush_iotlb_all(domain); +} + +static inline void iommu_tlb_range_add(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + if (domain->ops->iotlb_range_add) + domain->ops->iotlb_range_add(domain, iova, size); +} + +static inline void iommu_tlb_sync(struct iommu_domain *domain) +{ + if (domain->ops->iotlb_sync) + domain->ops->iotlb_sync(domain); +} + static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) @@ -436,6 +465,12 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } +static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, + int gfp_order) +{ + return -ENODEV; +} + static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) @@ -443,6 +478,19 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, return -ENODEV; } +static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +{ +} + +static inline void iommu_tlb_range_add(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ +} + +static inline void iommu_tlb_sync(struct iommu_domain *domain) +{ +} + static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) From 5082219b6a61cab585765a2ce4cd2a1f2f15dcdc Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Thu, 31 Aug 2017 10:58:11 +0200 Subject: [PATCH 87/87] iommu/vt-d: Don't be too aggressive when clearing one context entry Previously, we were invalidating context cache and IOTLB globally when clearing one context entry. This is a tad too aggressive. Invalidate the context cache and IOTLB for the interested device only. Signed-off-by: Filippo Sironi Cc: David Woodhouse Cc: David Woodhouse Cc: Joerg Roedel Cc: Jacob Pan Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Acked-by: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 695f54ad07cc..5b112b6a2c9c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -974,20 +974,6 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) return ret; } -static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) -{ - struct context_entry *context; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); - context = iommu_context_addr(iommu, bus, devfn, 0); - if (context) { - context_clear_entry(context); - __iommu_flush_cache(iommu, context, sizeof(*context)); - } - spin_unlock_irqrestore(&iommu->lock, flags); -} - static void free_context_table(struct intel_iommu *iommu) { int i; @@ -2361,13 +2347,33 @@ static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long i static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) { + unsigned long flags; + struct context_entry *context; + u16 did_old; + if (!iommu) return; - clear_context_table(iommu, bus, devfn); - iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + spin_lock_irqsave(&iommu->lock, flags); + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context) { + spin_unlock_irqrestore(&iommu->lock, flags); + return; + } + did_old = context_domain_id(context); + context_clear_entry(context); + __iommu_flush_cache(iommu, context, sizeof(*context)); + spin_unlock_irqrestore(&iommu->lock, flags); + iommu->flush.flush_context(iommu, + did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, + did_old, + 0, + 0, + DMA_TLB_DSI_FLUSH); } static inline void unlink_domain_info(struct device_domain_info *info)