From 7543c3e3b9b88212fcd0aaf5cab5588797bdc7de Mon Sep 17 00:00:00 2001 From: Levi Yun Date: Fri, 25 Oct 2024 11:06:00 +0100 Subject: [PATCH 01/11] dma-debug: fix a possible deadlock on radix_lock radix_lock() shouldn't be held while holding dma_hash_entry[idx].lock otherwise, there's a possible deadlock scenario when dma debug API is called holding rq_lock(): CPU0 CPU1 CPU2 dma_free_attrs() check_unmap() add_dma_entry() __schedule() //out (A) rq_lock() get_hash_bucket() (A) dma_entry_hash check_sync() (A) radix_lock() (W) dma_entry_hash dma_entry_free() (W) radix_lock() // CPU2's one (W) rq_lock() CPU1 situation can happen when it extending radix tree and it tries to wake up kswapd via wake_all_kswapd(). CPU2 situation can happen while perf_event_task_sched_out() (i.e. dma sync operation is called while deleting perf_event using etm and etr tmc which are Arm Coresight hwtracing driver backends). To remove this possible situation, call dma_entry_free() after put_hash_bucket() in check_unmap(). Reported-by: Denis Nikitin Closes: https://lists.linaro.org/archives/list/coresight@lists.linaro.org/thread/2WMS7BBSF5OZYB63VT44U5YWLFP5HL6U/#RWM6MLQX5ANBTEQ2PRM7OXCBGCE6NPWU Signed-off-by: Levi Yun Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index d570535342cb..f6f0387761d0 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1052,9 +1052,13 @@ static void check_unmap(struct dma_debug_entry *ref) } hash_bucket_del(entry); - dma_entry_free(entry); - put_hash_bucket(bucket, flags); + + /* + * Free the entry outside of bucket_lock to avoid ABBA deadlocks + * between that and radix_lock. + */ + dma_entry_free(entry); } static void check_for_stack(struct device *dev, From 9d4f645a1fd49eea70a21e8671d358ebe1c08d02 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 6 Oct 2024 09:20:01 +0200 Subject: [PATCH 02/11] dma-debug: store a phys_addr_t in struct dma_debug_entry dma-debug goes to great length to split incoming physical addresses into a PFN and offset to store them in struct dma_debug_entry, just to recombine those for all meaningful uses. Just store a phys_addr_t instead. Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 79 ++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 51 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index f6f0387761d0..4e3692afdf0d 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -59,8 +59,7 @@ enum map_err_types { * @direction: enum dma_data_direction * @sg_call_ents: 'nents' from dma_map_sg * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn + * @paddr: physical start address of the mapping * @map_err_type: track whether dma_mapping_error() was checked * @stack_len: number of backtrace entries in @stack_entries * @stack_entries: stack of backtrace history @@ -74,8 +73,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; - unsigned long pfn; - size_t offset; + phys_addr_t paddr; enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE unsigned int stack_len; @@ -389,14 +387,6 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - /* * For each mapping (initial cacheline in the case of * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a @@ -428,8 +418,8 @@ static DEFINE_SPINLOCK(radix_lock); static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) { - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); + return ((entry->paddr >> PAGE_SHIFT) << CACHELINE_PER_PAGE_SHIFT) + + (offset_in_page(entry->paddr) >> L1_CACHE_SHIFT); } static int active_cacheline_read_overlap(phys_addr_t cln) @@ -538,11 +528,11 @@ void debug_dma_dump_mappings(struct device *dev) if (!dev || dev == entry->dev) { cln = to_cacheline_number(entry); dev_info(entry->dev, - "%s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + "%s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - &cln, dir2name[entry->direction], + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], maperr2str[entry->map_err_type]); } } @@ -569,13 +559,13 @@ static int dump_show(struct seq_file *seq, void *v) list_for_each_entry(entry, &bucket->list, list) { cln = to_cacheline_number(entry); seq_printf(seq, - "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + "%s %s %s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", dev_driver_string(entry->dev), dev_name(entry->dev), type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - &cln, dir2name[entry->direction], + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], maperr2str[entry->map_err_type]); } spin_unlock_irqrestore(&bucket->lock, flags); @@ -1003,16 +993,16 @@ static void check_unmap(struct dma_debug_entry *ref) "[mapped as %s] [unmapped as %s]\n", ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { + } else if (entry->type == dma_debug_coherent && + ref->paddr != entry->paddr) { err_printk(ref->dev, entry, "device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", + "[cpu alloc address=0x%pa] " + "[cpu free address=0x%pa]", ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); + &entry->paddr, + &ref->paddr); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1231,8 +1221,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; - entry->pfn = page_to_pfn(page); - entry->offset = offset; + entry->paddr = page_to_phys(page); entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1327,8 +1316,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset; + entry->paddr = sg_phys(s); entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1374,8 +1362,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1414,16 +1401,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->offset = offset_in_page(virt); + entry->paddr = page_to_phys((is_vmalloc_addr(virt) ? + vmalloc_to_page(virt) : virt_to_page(virt))); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - add_dma_entry(entry, attrs); } @@ -1433,7 +1416,6 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .offset = offset_in_page(virt), .dev_addr = dma_addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1443,10 +1425,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); + ref.paddr = page_to_phys((is_vmalloc_addr(virt) ? + vmalloc_to_page(virt) : virt_to_page(virt))); if (unlikely(dma_debug_disabled())) return; @@ -1469,8 +1449,7 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, entry->type = dma_debug_resource; entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); + entry->paddr = addr; entry->size = size; entry->dev_addr = dma_addr; entry->direction = direction; @@ -1547,8 +1526,7 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1579,8 +1557,7 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(sg), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, From 150745b49aca4dec8057e8908d5ce5383e036a4f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Oct 2024 10:29:20 +0200 Subject: [PATCH 03/11] dma-debug: remove DMA_API_DEBUG_SG The scatterlist validity checks are pretty simple and cheap, perform them unconditionally. Signed-off-by: Christoph Hellwig --- kernel/dma/Kconfig | 17 ----------------- kernel/dma/debug.c | 2 -- 2 files changed, 19 deletions(-) diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 4c0dcd909121..31cfdb6b4bc3 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -260,23 +260,6 @@ config DMA_API_DEBUG If unsure, say N. -config DMA_API_DEBUG_SG - bool "Debug DMA scatter-gather usage" - default y - depends on DMA_API_DEBUG - help - Perform extra checking that callers of dma_map_sg() have respected the - appropriate segment length/boundary limits for the given device when - preparing DMA scatterlists. - - This is particularly likely to have been overlooked in cases where the - dma_map_sg() API is used for general bulk mapping of pages rather than - preparing literal scatter-gather descriptors, where there is a risk of - unexpected behaviour from DMA API implementations if the scatterlist - is technically out-of-spec. - - If unsure, say N. - config DMA_MAP_BENCHMARK bool "Enable benchmarking of streaming DMA mapping" depends on DEBUG_FS diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 4e3692afdf0d..295396226f31 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1163,7 +1163,6 @@ out: static void check_sg_segment(struct device *dev, struct scatterlist *sg) { -#ifdef CONFIG_DMA_API_DEBUG_SG unsigned int max_seg = dma_get_max_seg_size(dev); u64 start, end, boundary = dma_get_seg_boundary(dev); @@ -1184,7 +1183,6 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) if ((start ^ end) & ~boundary) err_printk(dev, NULL, "mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", start, end, boundary); -#endif } void debug_dma_map_single(struct device *dev, const void *addr, From 5935b8377a0f3b2401e4e487336ed90fe6b9254d Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Wed, 23 Oct 2024 11:25:44 +0800 Subject: [PATCH 04/11] dma-mapping: remove an outdated comment from dma-map-ops.h The "/* CONFIG_ARCH_HAS_DMA_COHERENCE_H */" was an description about the ARCH_HAS_DMA_COHERENCE_H configure option, but it has been removed since the dma_default_coherent variable was lifted from the mips architecture to the driver core. Therefore it doesn't match any compile guard now. Just remove it. Fixes: 6d4e9a8efe3d ("driver core: lift dma_default_coherent into common code") Signed-off-by: Sui Jingfeng Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index b7773201414c..e172522cd936 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -242,7 +242,7 @@ static inline bool dev_is_dma_coherent(struct device *dev) { return true; } -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +#endif static inline void dma_reset_need_sync(struct device *dev) { From 5af5fc895fb9deec3d7b225f5bc2bd4949f8bbd5 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:34 -0400 Subject: [PATCH 05/11] dma-mapping: use macros to define events in a class Use a macro to avoid repeating the parameters and arguments for each event in a class. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 60 ++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index b0f41265191c..3d348cea4d7c 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -65,15 +65,14 @@ DECLARE_EVENT_CLASS(dma_map, decode_dma_attrs(__entry->attrs)) ); -DEFINE_EVENT(dma_map, dma_map_page, - TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); +#define DEFINE_MAP_EVENT(name) \ +DEFINE_EVENT(dma_map, name, \ + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)) -DEFINE_EVENT(dma_map, dma_map_resource, - TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); +DEFINE_MAP_EVENT(dma_map_page); +DEFINE_MAP_EVENT(dma_map_resource); DECLARE_EVENT_CLASS(dma_unmap, TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, @@ -104,15 +103,14 @@ DECLARE_EVENT_CLASS(dma_unmap, decode_dma_attrs(__entry->attrs)) ); -DEFINE_EVENT(dma_unmap, dma_unmap_page, - TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, addr, size, dir, attrs)); +#define DEFINE_UNMAP_EVENT(name) \ +DEFINE_EVENT(dma_unmap, name, \ + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, \ + enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, addr, size, dir, attrs)) -DEFINE_EVENT(dma_unmap, dma_unmap_resource, - TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs), - TP_ARGS(dev, addr, size, dir, attrs)); +DEFINE_UNMAP_EVENT(dma_unmap_page); +DEFINE_UNMAP_EVENT(dma_unmap_resource); TRACE_EVENT(dma_alloc, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, @@ -279,15 +277,14 @@ DECLARE_EVENT_CLASS(dma_sync_single, __entry->size) ); -DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, - TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir), - TP_ARGS(dev, dma_addr, size, dir)); +#define DEFINE_SYNC_SINGLE_EVENT(name) \ +DEFINE_EVENT(dma_sync_single, name, \ + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, \ + enum dma_data_direction dir), \ + TP_ARGS(dev, dma_addr, size, dir)) -DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, - TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir), - TP_ARGS(dev, dma_addr, size, dir)); +DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_cpu); +DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_device); DECLARE_EVENT_CLASS(dma_sync_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, @@ -326,15 +323,14 @@ DECLARE_EVENT_CLASS(dma_sync_sg, sizeof(unsigned int), sizeof(unsigned int))) ); -DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir)); +#define DEFINE_SYNC_SG_EVENT(name) \ +DEFINE_EVENT(dma_sync_sg, name, \ + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, \ + enum dma_data_direction dir), \ + TP_ARGS(dev, sg, nents, dir)) -DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, - TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir), - TP_ARGS(dev, sg, nents, dir)); +DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_cpu); +DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_device); #endif /* _TRACE_DMA_H */ From 3afff779a725cba914e6caba360b696ae6f90249 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:35 -0400 Subject: [PATCH 06/11] dma-mapping: trace dma_alloc/free direction In preparation for using these tracepoints in a few more places, trace the DMA direction as well. For coherent allocations this is always bidirectional. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 18 ++++++++++++------ kernel/dma/mapping.c | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 3d348cea4d7c..267cfa49d9d5 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -114,8 +114,9 @@ DEFINE_UNMAP_EVENT(dma_unmap_resource); TRACE_EVENT(dma_alloc, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, - size_t size, gfp_t flags, unsigned long attrs), - TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), + size_t size, enum dma_data_direction dir, gfp_t flags, + unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) @@ -123,6 +124,7 @@ TRACE_EVENT(dma_alloc, __field(u64, dma_addr) __field(size_t, size) __field(gfp_t, flags) + __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), @@ -135,8 +137,9 @@ TRACE_EVENT(dma_alloc, __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", + TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", __get_str(device), + decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, @@ -146,14 +149,15 @@ TRACE_EVENT(dma_alloc, TRACE_EVENT(dma_free, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, - size_t size, unsigned long attrs), - TP_ARGS(dev, virt_addr, dma_addr, size, attrs), + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) + __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), @@ -162,11 +166,13 @@ TRACE_EVENT(dma_free, __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; + __entry->dir = dir; __entry->attrs = attrs; ), - TP_printk("%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", + TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", __get_str(device), + decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 864a1121bf08..944ac835030a 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -619,7 +619,8 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, else return NULL; - trace_dma_alloc(dev, cpu_addr, *dma_handle, size, flag, attrs); + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, DMA_BIDIRECTIONAL, + flag, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } @@ -644,7 +645,8 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (!cpu_addr) return; - trace_dma_free(dev, cpu_addr, dma_handle, size, attrs); + trace_dma_free(dev, cpu_addr, dma_handle, size, DMA_BIDIRECTIONAL, + attrs); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); From c4484ab86ee00f2d9236e2851621ea02c105f4cc Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:36 -0400 Subject: [PATCH 07/11] dma-mapping: use trace_dma_alloc for dma_alloc* instead of using trace_dma_map In some cases, we use trace_dma_map to trace dma_alloc* functions. This generally follows dma_debug. However, this does not record all of the relevant information for allocations, such as GFP flags. Create new dma_alloc tracepoints for these functions. Note that while dma_alloc_noncontiguous may allocate discontiguous pages (from the CPU's point of view), the device will only see one contiguous mapping. Therefore, we just need to trace dma_addr and size. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 99 +++++++++++++++++++++++++++++++++++++- kernel/dma/mapping.c | 10 ++-- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 267cfa49d9d5..7a9606b8934e 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -112,7 +112,7 @@ DEFINE_EVENT(dma_unmap, name, \ DEFINE_UNMAP_EVENT(dma_unmap_page); DEFINE_UNMAP_EVENT(dma_unmap_resource); -TRACE_EVENT(dma_alloc, +DECLARE_EVENT_CLASS(dma_alloc_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, gfp_t flags, unsigned long attrs), @@ -147,7 +147,58 @@ TRACE_EVENT(dma_alloc, decode_dma_attrs(__entry->attrs)) ); -TRACE_EVENT(dma_free, +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(dma_alloc_class, name, \ + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, gfp_t flags, \ + unsigned long attrs), \ + TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs)) + +DEFINE_ALLOC_EVENT(dma_alloc); +DEFINE_ALLOC_EVENT(dma_alloc_pages); + +TRACE_EVENT(dma_alloc_sgt, + TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, + enum dma_data_direction dir, gfp_t flags, unsigned long attrs), + TP_ARGS(dev, sgt, size, dir, flags, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, sgt->orig_nents) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(gfp_t, flags) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->dma_addr = sg_dma_address(sgt->sgl); + __entry->size = size; + __entry->dir = dir; + __entry->flags = flags; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s flags=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + show_gfp_flags(__entry->flags), + decode_dma_attrs(__entry->attrs)) +); + +DECLARE_EVENT_CLASS(dma_free_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs), @@ -179,6 +230,50 @@ TRACE_EVENT(dma_free, decode_dma_attrs(__entry->attrs)) ); +#define DEFINE_FREE_EVENT(name) \ +DEFINE_EVENT(dma_free_class, name, \ + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ + size_t size, enum dma_data_direction dir, unsigned long attrs), \ + TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs)) + +DEFINE_FREE_EVENT(dma_free); +DEFINE_FREE_EVENT(dma_free_pages); + +TRACE_EVENT(dma_free_sgt, + TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, sgt, size, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, sgt->orig_nents) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->dma_addr = sg_dma_address(sgt->sgl); + __entry->size = size; + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64))) +); + TRACE_EVENT(dma_map_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 944ac835030a..b8a6bc492fae 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -685,8 +685,8 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); if (page) { - trace_dma_map_page(dev, page_to_phys(page), *dma_handle, size, - dir, 0); + trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle, + size, dir, gfp, 0); debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); } return page; @@ -710,7 +710,7 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) { - trace_dma_unmap_page(dev, dma_handle, size, dir, 0); + trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0); debug_dma_unmap_page(dev, dma_handle, size, dir); __dma_free_pages(dev, size, page, dma_handle, dir); } @@ -770,7 +770,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (sgt) { sgt->nents = 1; - trace_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); } return sgt; @@ -789,7 +789,7 @@ static void free_single_sgt(struct device *dev, size_t size, void dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { - trace_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir, 0); + trace_dma_free_sgt(dev, sgt, size, dir); debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); if (use_dma_iommu(dev)) From 68b6dbf1f441c4eba3b8511728a41cf9b01dca35 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 18 Oct 2024 11:00:37 -0400 Subject: [PATCH 08/11] dma-mapping: trace more error paths It can be surprising to the user if DMA functions are only traced on success. On failure, it can be unclear what the source of the problem is. Fix this by tracing all functions even when they fail. Cases where we BUG/WARN are skipped, since those should be sufficiently noisy already. Signed-off-by: Sean Anderson Reviewed-by: Steven Rostedt (Google) Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 36 ++++++++++++++++++++++++++++++++++++ kernel/dma/mapping.c | 25 ++++++++++++++++++------- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 7a9606b8934e..d8ddc27b6a7c 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -156,6 +156,7 @@ DEFINE_EVENT(dma_alloc_class, name, \ DEFINE_ALLOC_EVENT(dma_alloc); DEFINE_ALLOC_EVENT(dma_alloc_pages); +DEFINE_ALLOC_EVENT(dma_alloc_sgt_err); TRACE_EVENT(dma_alloc_sgt, TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, @@ -320,6 +321,41 @@ TRACE_EVENT(dma_map_sg, decode_dma_attrs(__entry->attrs)) ); +TRACE_EVENT(dma_map_sg_err, + TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, + int err, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sgl, nents, err, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, nents) + __field(int, err) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + struct scatterlist *sg; + int i; + + __assign_str(device); + for_each_sg(sgl, sg, nents, i) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); + __entry->err = err; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addrs=%s err=%d attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + __entry->err, + decode_dma_attrs(__entry->attrs)) +); + TRACE_EVENT(dma_unmap_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b8a6bc492fae..636dbb0629a4 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -223,6 +223,7 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && ents != -EIO && ents != -EREMOTEIO)) { + trace_dma_map_sg_err(dev, sg, nents, ents, dir, attrs); return -EIO; } @@ -604,20 +605,26 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (WARN_ON_ONCE(flag & __GFP_COMP)) return NULL; - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) { + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, + DMA_BIDIRECTIONAL, flag, attrs); return cpu_addr; + } /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (dma_alloc_direct(dev, ops)) + if (dma_alloc_direct(dev, ops)) { cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); - else if (use_dma_iommu(dev)) + } else if (use_dma_iommu(dev)) { cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); - else if (ops->alloc) + } else if (ops->alloc) { cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - else + } else { + trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag, + attrs); return NULL; + } trace_dma_alloc(dev, cpu_addr, *dma_handle, size, DMA_BIDIRECTIONAL, flag, attrs); @@ -642,11 +649,11 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, */ WARN_ON(irqs_disabled()); + trace_dma_free(dev, cpu_addr, dma_handle, size, DMA_BIDIRECTIONAL, + attrs); if (!cpu_addr) return; - trace_dma_free(dev, cpu_addr, dma_handle, size, DMA_BIDIRECTIONAL, - attrs); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); @@ -688,6 +695,8 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle, size, dir, gfp, 0); debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + } else { + trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0); } return page; } @@ -772,6 +781,8 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, sgt->nents = 1; trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + } else { + trace_dma_alloc_sgt_err(dev, NULL, 0, size, gfp, dir, attrs); } return sgt; } From be164349e173a8e71cd76f17c7ed720813b8d69b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Oct 2024 07:19:48 +0200 Subject: [PATCH 09/11] dma-mapping: drop unneeded includes from dma-mapping.h Back in the day a lot of logic was implemented inline in dma-mapping.h and needed various includes. Move of this has long been moved out of line, so we can drop various includes to improve kernel rebuild times. Signed-off-by: Christoph Hellwig --- arch/powerpc/platforms/pseries/svm.c | 1 + include/linux/dma-mapping.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 3b4045d508ec..384c9dc1899a 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 1524da363734..b79925b1c433 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -2,15 +2,11 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H -#include -#include -#include #include #include #include #include #include -#include /** * List of possible attributes associated with a DMA mapping. The semantics From d5bbfbad58ec0ccd187282f0e171bc763efa6828 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 31 Oct 2024 11:45:14 -0400 Subject: [PATCH 10/11] dma-mapping: fix swapped dir/flags arguments to trace_dma_alloc_sgt_err trace_dma_alloc_sgt_err was called with the dir and flags arguments swapped. Fix this. Fixes: 68b6dbf1f441 ("dma-mapping: trace more error paths") Signed-off-by: Sean Anderson Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410302243.1wnTlPk3-lkp@intel.com/ Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 636dbb0629a4..cda127027e48 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -782,7 +782,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); } else { - trace_dma_alloc_sgt_err(dev, NULL, 0, size, gfp, dir, attrs); + trace_dma_alloc_sgt_err(dev, NULL, 0, size, dir, gfp, attrs); } return sgt; } From 22293c33738c14bb84b9d3e771bc37150e7cf8e7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 12 Nov 2024 19:39:37 +0100 Subject: [PATCH 11/11] dma-mapping: save base/size instead of pointer to shared DMA pool On RZ/Five, which is non-coherent, and uses CONFIG_DMA_GLOBAL_POOL=y: Oops - store (or AMO) access fault [#1] CPU: 0 UID: 0 PID: 1 Comm: swapper Not tainted 6.12.0-rc1-00015-g8a6e02d0c00e #201 Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT) epc : __memset+0x60/0x100 ra : __dma_alloc_from_coherent+0x150/0x17a epc : ffffffff8062d2bc ra : ffffffff80053a94 sp : ffffffc60000ba20 gp : ffffffff812e9938 tp : ffffffd601920000 t0 : ffffffc6000d0000 t1 : 0000000000000000 t2 : ffffffffe9600000 s0 : ffffffc60000baa0 s1 : ffffffc6000d0000 a0 : ffffffc6000d0000 a1 : 0000000000000000 a2 : 0000000000001000 a3 : ffffffc6000d1000 a4 : 0000000000000000 a5 : 0000000000000000 a6 : ffffffd601adacc0 a7 : ffffffd601a841a8 s2 : ffffffd6018573c0 s3 : 0000000000001000 s4 : ffffffd6019541e0 s5 : 0000000200000022 s6 : ffffffd6018f8410 s7 : ffffffd6018573e8 s8 : 0000000000000001 s9 : 0000000000000001 s10: 0000000000000010 s11: 0000000000000000 t3 : 0000000000000000 t4 : ffffffffdefe62d1 t5 : 000000001cd6a3a9 t6 : ffffffd601b2aad6 status: 0000000200000120 badaddr: ffffffc6000d0000 cause: 0000000000000007 [] __memset+0x60/0x100 [] dma_alloc_from_global_coherent+0x1c/0x28 [] dma_direct_alloc+0x98/0x112 [] dma_alloc_attrs+0x78/0x86 [] rz_dmac_probe+0x3f6/0x50a [] platform_probe+0x4c/0x8a If CONFIG_DMA_GLOBAL_POOL=y, the reserved_mem structure passed to rmem_dma_setup() is saved for later use, by saving the passed pointer. However, when dma_init_reserved_memory() is called later, the pointer has become stale, causing a crash. E.g. in the RZ/Five case, the referenced memory now contains the reserved_mem structure for the "mmode_resv0@30000" node (with base 0x30000 and size 0x10000), instead of the correct "pma_resv0@58000000" node (with base 0x58000000 and size 0x8000000). Fix this by saving the needed reserved_mem structure's contents instead. Fixes: 8a6e02d0c00e7b62 ("of: reserved_mem: Restructure how the reserved memory regions are processed") Signed-off-by: Geert Uytterhoeven Reviewed-by: Oreoluwa Babatunde Tested-by: Lad Prabhakar Signed-off-by: Christoph Hellwig --- kernel/dma/coherent.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index ff5683a57f77..3b2bdca9f1d4 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -330,7 +330,8 @@ int dma_init_global_coherent(phys_addr_t phys_addr, size_t size) #include #ifdef CONFIG_DMA_GLOBAL_POOL -static struct reserved_mem *dma_reserved_default_memory __initdata; +static phys_addr_t dma_reserved_default_memory_base __initdata; +static phys_addr_t dma_reserved_default_memory_size __initdata; #endif static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) @@ -376,9 +377,10 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) #ifdef CONFIG_DMA_GLOBAL_POOL if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { - WARN(dma_reserved_default_memory, + WARN(dma_reserved_default_memory_size, "Reserved memory: region for default DMA coherent area is redefined\n"); - dma_reserved_default_memory = rmem; + dma_reserved_default_memory_base = rmem->base; + dma_reserved_default_memory_size = rmem->size; } #endif @@ -391,10 +393,10 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) #ifdef CONFIG_DMA_GLOBAL_POOL static int __init dma_init_reserved_memory(void) { - if (!dma_reserved_default_memory) + if (!dma_reserved_default_memory_size) return -ENOMEM; - return dma_init_global_coherent(dma_reserved_default_memory->base, - dma_reserved_default_memory->size); + return dma_init_global_coherent(dma_reserved_default_memory_base, + dma_reserved_default_memory_size); } core_initcall(dma_init_reserved_memory); #endif /* CONFIG_DMA_GLOBAL_POOL */