diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9557808e8937..f8567e95f98b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,6 +8,7 @@ config ARM select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU + select ARCH_HAS_DMA_ALLOC if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 3e96486d9528..4b3e93cac723 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -6,15 +6,15 @@ config M68K select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER - select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE - select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA + select ARCH_HAS_DMA_PREP_COHERENT if M68K_NONCOHERENT_DMA && !COLDFIRE + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if M68K_NONCOHERENT_DMA select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS select ARCH_MIGHT_HAVE_PC_PARPORT if ISA select ARCH_NO_PREEMPT if !COLDFIRE select ARCH_USE_MEMTEST if MMU_MOTOROLA select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_FLAT_ARGVP_ENVP_ON_STACK - select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE + select DMA_DIRECT_REMAP if M68K_NONCOHERENT_DMA && !COLDFIRE select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_IOMAP diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index b826e9c677b2..ad69b466a08b 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -535,3 +535,15 @@ config CACHE_COPYBACK The ColdFire CPU cache is set into Copy-back mode. endchoice endif # HAVE_CACHE_CB + +# Coldfire cores that do not have a data cache configured can do coherent DMA. +config COLDFIRE_COHERENT_DMA + bool + default y + depends on COLDFIRE + depends on !HAVE_CACHE_CB && !CACHE_D && !CACHE_BOTH + +config M68K_NONCOHERENT_DMA + bool + default y + depends on HAS_DMA && !COLDFIRE_COHERENT_DMA diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index af015447dfb4..01fb69a5095f 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_MMU_MOTOROLA) += ints.o vectors.o obj-$(CONFIG_MMU_SUN3) += ints.o vectors.o obj-$(CONFIG_PCI) += pcibios.o -obj-$(CONFIG_HAS_DMA) += dma.o +obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 2e192a5df949..16063783aa80 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -4,20 +4,11 @@ * for more details. */ -#undef DEBUG - #include -#include #include -#include -#include -#include -#include -#include - #include -#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) +#ifndef CONFIG_COLDFIRE void arch_dma_prep_coherent(struct page *page, size_t size) { cache_push(page_to_phys(page), size); @@ -33,29 +24,6 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot) } return prot; } -#else -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *ret; - - if (dev == NULL || (*dev->dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} - #endif /* CONFIG_MMU && !CONFIG_COLDFIRE */ void arch_sync_dma_for_device(phys_addr_t handle, size_t size, diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 9288c39dbf39..fd69dfa0cdab 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -8,6 +8,7 @@ config PARISC select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS select ARCH_WANT_FRAME_POINTERS + select ARCH_HAS_DMA_ALLOC if PA11 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 032c15b541ff..c3b7694a7485 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -363,6 +363,70 @@ static void fec_dump(struct net_device *ndev) } while (bdp != txq->bd.base); } +/* + * Coldfire does not support DMA coherent allocations, and has historically used + * a band-aid with a manual flush in fec_enet_rx_queue. + */ +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + return dma_alloc_noncoherent(dev, size, handle, DMA_BIDIRECTIONAL, gfp); +} + +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle) +{ + dma_free_noncoherent(dev, size, cpu_addr, handle, DMA_BIDIRECTIONAL); +} +#else /* !CONFIG_COLDFIRE || CONFIG_COLDFIRE_COHERENT_DMA */ +static void *fec_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + return dma_alloc_coherent(dev, size, handle, gfp); +} + +static void fec_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle) +{ + dma_free_coherent(dev, size, cpu_addr, handle); +} +#endif /* !CONFIG_COLDFIRE || CONFIG_COLDFIRE_COHERENT_DMA */ + +struct fec_dma_devres { + size_t size; + void *vaddr; + dma_addr_t dma_handle; +}; + +static void fec_dmam_release(struct device *dev, void *res) +{ + struct fec_dma_devres *this = res; + + fec_dma_free(dev, this->size, this->vaddr, this->dma_handle); +} + +static void *fec_dmam_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp) +{ + struct fec_dma_devres *dr; + void *vaddr; + + dr = devres_alloc(fec_dmam_release, sizeof(*dr), gfp); + if (!dr) + return NULL; + vaddr = fec_dma_alloc(dev, size, handle, gfp); + if (!vaddr) { + devres_free(dr); + return NULL; + } + dr->vaddr = vaddr; + dr->dma_handle = *handle; + dr->size = size; + devres_add(dev, dr); + return vaddr; +} + static inline bool is_ipv4_pkt(struct sk_buff *skb) { return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; @@ -1617,7 +1681,11 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) } #endif -#ifdef CONFIG_M532x +#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA) + /* + * Hacky flush of all caches instead of using the DMA API for the TSO + * headers. + */ flush_cache_all(); #endif rxq = fep->rx_queue[queue_id]; @@ -3243,10 +3311,9 @@ static void fec_enet_free_queue(struct net_device *ndev) for (i = 0; i < fep->num_tx_queues; i++) if (fep->tx_queue[i] && fep->tx_queue[i]->tso_hdrs) { txq = fep->tx_queue[i]; - dma_free_coherent(&fep->pdev->dev, - txq->bd.ring_size * TSO_HEADER_SIZE, - txq->tso_hdrs, - txq->tso_hdrs_dma); + fec_dma_free(&fep->pdev->dev, + txq->bd.ring_size * TSO_HEADER_SIZE, + txq->tso_hdrs, txq->tso_hdrs_dma); } for (i = 0; i < fep->num_rx_queues; i++) @@ -3276,10 +3343,9 @@ static int fec_enet_alloc_queue(struct net_device *ndev) txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; - txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, + txq->tso_hdrs = fec_dma_alloc(&fep->pdev->dev, txq->bd.ring_size * TSO_HEADER_SIZE, - &txq->tso_hdrs_dma, - GFP_KERNEL); + &txq->tso_hdrs_dma, GFP_KERNEL); if (!txq->tso_hdrs) { ret = -ENOMEM; goto alloc_failed; @@ -3998,8 +4064,8 @@ static int fec_enet_init(struct net_device *ndev) bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize; /* Allocate memory for buffer descriptors. */ - cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma, - GFP_KERNEL); + cbd_base = fec_dmam_alloc(&fep->pdev->dev, bd_size, &bd_dma, + GFP_KERNEL); if (!cbd_base) { ret = -ENOMEM; goto free_queue_mem; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f488997b0717..d62f5957f36b 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -135,6 +135,8 @@ config DMA_COHERENT_POOL config DMA_GLOBAL_POOL select DMA_DECLARE_COHERENT + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP bool config DMA_DIRECT_REMAP @@ -142,6 +144,15 @@ config DMA_DIRECT_REMAP select DMA_COHERENT_POOL select DMA_NONCOHERENT_MMAP +# +# Fallback to arch code for DMA allocations. This should eventually go away. +# +config ARCH_HAS_DMA_ALLOC + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP + depends on !DMA_GLOBAL_POOL + bool + config DMA_CMA bool "DMA Contiguous Memory Allocator" depends on HAVE_DMA_CONTIGUOUS && CMA diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 06366acd27b0..3de494375b7b 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -139,7 +139,7 @@ static const char *const maperr2str[] = { static const char *type2name[] = { [dma_debug_single] = "single", - [dma_debug_sg] = "scather-gather", + [dma_debug_sg] = "scatter-gather", [dma_debug_coherent] = "coherent", [dma_debug_resource] = "resource", }; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 9596ae1aa0da..ed3056eb20b8 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -220,13 +220,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); if (!dev_is_dma_coherent(dev)) { - /* - * Fallback to the arch handler if it exists. This should - * eventually go away. - */ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); @@ -240,27 +234,24 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_handle); /* - * Otherwise remap if the architecture is asking for it. But - * given that remapping memory is a blocking operation we'll - * instead have to dip into the atomic pools. + * Otherwise we require the architecture to either be able to + * mark arbitrary parts of the kernel direct mapping uncached, + * or remapped it uncached. */ + set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED); remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); - if (remap) { - if (dma_direct_use_pool(dev, gfp)) - return dma_direct_alloc_from_pool(dev, size, - dma_handle, gfp); - } else { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED)) - return NULL; - set_uncached = true; + if (!set_uncached && !remap) { + pr_warn_once("coherent DMA allocations not supported on this platform.\n"); + return NULL; } } /* - * Decrypting memory may block, so allocate the memory from the atomic - * pools if we can't block. + * Remapping or decrypting memory may block, allocate the memory from + * the atomic pools instead if we aren't allowed block. */ - if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + if ((remap || force_dma_unencrypted(dev)) && + dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ @@ -330,9 +321,7 @@ void dma_direct_free(struct device *dev, size_t size, return; } - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !dev_is_dma_coherent(dev) && !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dff067bd56b1..26202274784f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1301,11 +1301,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; /* - * When dir == DMA_FROM_DEVICE we could omit the copy from the orig - * to the tlb buffer, if we knew for sure the device will - * overwrite the entire current content. But we don't. Thus - * unconditional bounce may prevent leaking swiotlb content (i.e. - * kernel memory) to user-space. + * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy + * the original buffer to the TLB buffer before initiating DMA in order + * to preserve the original's data if the device does a partial write, + * i.e. if the device doesn't overwrite the entire buffer. Preserving + * the original data, even if it's garbage, is necessary to match + * hardware behavior. Use of swiotlb is supposed to be transparent, + * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); return tlb_addr;